Pārlūkot izejas kodu

Use generator to deal with large patch stream

Matthew Wang 12 gadus atpakaļ
vecāks
revīzija
3fcd0625b9
1 mainītis faili ar 77 papildinājumiem un 40 dzēšanām
  1. 77
    40
      cdiff.py

+ 77
- 40
cdiff.py Parādīt failu

384
         return line.startswith(r'\ No newline at end of')
384
         return line.startswith(r'\ No newline at end of')
385
 
385
 
386
 
386
 
387
+class PatchStream(object):
388
+
389
+    def __init__(self, diff_hdl):
390
+        self._diff_hdl = diff_hdl
391
+        self._header_chunk_size = 0
392
+        self._header_chunk = []
393
+
394
+        # Test whether stream is empty by read 1 line
395
+        line = self._diff_hdl.readline()
396
+        if line is None:
397
+            self._is_empty = True
398
+        else:
399
+            self._header_chunk.append(line)
400
+            self._header_chunk_size += 1
401
+            self._is_empty = False
402
+
403
+    def is_empty(self):
404
+        return self._is_empty
405
+
406
+    def read_header_chunks(self, header_chunk_size):
407
+        """Returns a small chunk for patch type detect, suppose to call once"""
408
+        for i in range(1, header_chunk_size):
409
+            line = self._diff_hdl.readline()
410
+            if line is None:
411
+                break
412
+            self._header_chunk.append(line)
413
+            self._header_chunk_size += 1
414
+            yield line
415
+
416
+    def __iter__(self):
417
+        for line in self._header_chunk:
418
+            yield line
419
+        for line in self._diff_hdl:
420
+            yield line
421
+
422
+
387
 class DiffParser(object):
423
 class DiffParser(object):
388
 
424
 
389
     def __init__(self, stream):
425
     def __init__(self, stream):
390
         """Detect Udiff with 3 conditions, '## ' uaually indicates svn property
426
         """Detect Udiff with 3 conditions, '## ' uaually indicates svn property
391
         changes in output from `svn log --diff`
427
         changes in output from `svn log --diff`
392
         """
428
         """
429
+        self._stream = stream
430
+
393
         flag = 0
431
         flag = 0
394
-        for line in stream[:100]:
432
+        for line in self._stream.read_header_chunks(100):
433
+            line = decode(line)
395
             if line.startswith('--- '):
434
             if line.startswith('--- '):
396
                 flag |= 1
435
                 flag |= 1
397
             elif line.startswith('+++ '):
436
             elif line.startswith('+++ '):
404
         else:
443
         else:
405
             raise RuntimeError('unknown diff type')
444
             raise RuntimeError('unknown diff type')
406
 
445
 
446
+    def get_diff_generator(self):
407
         try:
447
         try:
408
-            self._diffs = self._parse(stream)
448
+            return self._parse()
409
         except (AssertionError, IndexError):
449
         except (AssertionError, IndexError):
410
             raise RuntimeError('invalid patch format')
450
             raise RuntimeError('invalid patch format')
411
 
451
 
412
-    def get_diffs(self):
413
-        return self._diffs
414
-
415
-    def _parse(self, stream):
452
+    def _parse(self):
416
         """parse all diff lines, construct a list of Diff objects"""
453
         """parse all diff lines, construct a list of Diff objects"""
417
         if self._type == 'udiff':
454
         if self._type == 'udiff':
418
             difflet = Udiff(None, None, None, None)
455
             difflet = Udiff(None, None, None, None)
419
         else:
456
         else:
420
             raise RuntimeError('unsupported diff format')
457
             raise RuntimeError('unsupported diff format')
421
 
458
 
422
-        out_diffs = []
459
+        diff = Diff([], None, None, [])
423
         headers = []
460
         headers = []
424
 
461
 
425
-        while stream:
426
-            if difflet.is_old_path(stream[0]):
427
-                old_path = stream.pop(0)
428
-                out_diffs.append(Diff(headers, old_path, None, []))
462
+        for line in self._stream:
463
+            line = decode(line)
464
+
465
+            if difflet.is_old_path(line):
466
+                if diff._old_path and diff._new_path and len(diff._hunks) > 0:
467
+                    # One diff constructed
468
+                    yield diff
469
+                    diff = Diff([], None, None, [])
470
+                diff = Diff(headers, line, None, [])
429
                 headers = []
471
                 headers = []
430
 
472
 
431
-            elif difflet.is_new_path(stream[0]):
432
-                new_path = stream.pop(0)
433
-                out_diffs[-1]._new_path = new_path
473
+            elif difflet.is_new_path(line):
474
+                diff._new_path = line
434
 
475
 
435
-            elif difflet.is_hunk_meta(stream[0]):
436
-                hunk_meta = stream.pop(0)
476
+            elif difflet.is_hunk_meta(line):
477
+                hunk_meta = line
437
                 old_addr, new_addr = difflet.parse_hunk_meta(hunk_meta)
478
                 old_addr, new_addr = difflet.parse_hunk_meta(hunk_meta)
438
                 hunk = Hunk(headers, hunk_meta, old_addr, new_addr)
479
                 hunk = Hunk(headers, hunk_meta, old_addr, new_addr)
439
                 headers = []
480
                 headers = []
440
-                out_diffs[-1]._hunks.append(hunk)
481
+                diff._hunks.append(hunk)
441
 
482
 
442
-            elif out_diffs and out_diffs[-1]._hunks and \
443
-                    (difflet.is_old(stream[0]) or difflet.is_new(stream[0]) or \
444
-                    difflet.is_common(stream[0])):
445
-                hunk_line = stream.pop(0)
446
-                out_diffs[-1]._hunks[-1].append(hunk_line[0], hunk_line[1:])
483
+            elif len(diff._hunks) > 0 and (difflet.is_old(line) or \
484
+                    difflet.is_new(line) or difflet.is_common(line)):
485
+                hunk_line = line
486
+                diff._hunks[-1].append(hunk_line[0], hunk_line[1:])
447
 
487
 
448
-            elif difflet.is_eof(stream[0]):
488
+            elif difflet.is_eof(line):
449
                 # ignore
489
                 # ignore
450
-                stream.pop(0)
490
+                pass
451
 
491
 
452
             else:
492
             else:
453
                 # All other non-recognized lines are considered as headers or
493
                 # All other non-recognized lines are considered as headers or
454
                 # hunk headers respectively
494
                 # hunk headers respectively
455
                 #
495
                 #
456
-                headers.append(stream.pop(0))
496
+                headers.append(line)
457
 
497
 
458
         if headers:
498
         if headers:
459
             raise RuntimeError('dangling header(s):\n%s' % ''.join(headers))
499
             raise RuntimeError('dangling header(s):\n%s' % ''.join(headers))
460
 
500
 
461
-        # Validate the last patch set
462
-        if out_diffs:
463
-            assert out_diffs[-1]._old_path is not None
464
-            assert out_diffs[-1]._new_path is not None
465
-            assert len(out_diffs[-1]._hunks) > 0
466
-            assert len(out_diffs[-1]._hunks[-1]._hunk_meta) > 0
467
-
468
-        return out_diffs
501
+        # Validate and yield the last patch set
502
+        assert diff._old_path is not None
503
+        assert diff._new_path is not None
504
+        assert len(diff._hunks) > 0
505
+        assert len(diff._hunks[-1]._hunk_meta) > 0
506
+        yield diff
469
 
507
 
470
 
508
 
471
 class DiffMarkup(object):
509
 class DiffMarkup(object):
472
 
510
 
473
     def __init__(self, stream):
511
     def __init__(self, stream):
474
-        self._diffs = DiffParser(stream).get_diffs()
512
+        self._diffs = DiffParser(stream).get_diff_generator()
475
 
513
 
476
     def markup(self, side_by_side=False, width=0):
514
     def markup(self, side_by_side=False, width=0):
477
         """Returns a generator"""
515
         """Returns a generator"""
572
     else:
610
     else:
573
         diff_hdl = sys.stdin
611
         diff_hdl = sys.stdin
574
 
612
 
575
-    # FIXME: can't use generator for now due to current implementation in parser
576
-    stream = [decode(line) for line in diff_hdl.readlines()]
577
-
578
-    if diff_hdl is not sys.stdin:
579
-        diff_hdl.close()
613
+    stream = PatchStream(diff_hdl)
580
 
614
 
581
     # Don't let empty diff pass thru
615
     # Don't let empty diff pass thru
582
-    if not stream:
616
+    if stream.is_empty():
583
         return 0
617
         return 0
584
 
618
 
585
     if opts.color == 'always' or (opts.color == 'auto' and sys.stdout.isatty()):
619
     if opts.color == 'always' or (opts.color == 'auto' and sys.stdout.isatty()):
593
         # pipe out stream untouched to make sure it is still a patch
627
         # pipe out stream untouched to make sure it is still a patch
594
         sys.stdout.write(''.join(stream))
628
         sys.stdout.write(''.join(stream))
595
 
629
 
630
+    if diff_hdl is not sys.stdin:
631
+        diff_hdl.close()
632
+
596
     return 0
633
     return 0
597
 
634
 
598
 
635