Quellcode durchsuchen

Use generator to deal with large patch stream

Matthew Wang vor 11 Jahren
Ursprung
Commit
3fcd0625b9
1 geänderte Dateien mit 77 neuen und 40 gelöschten Zeilen
  1. 77
    40
      cdiff.py

+ 77
- 40
cdiff.py Datei anzeigen

@@ -384,14 +384,53 @@ class Udiff(Diff):
384 384
         return line.startswith(r'\ No newline at end of')
385 385
 
386 386
 
387
+class PatchStream(object):
388
+
389
+    def __init__(self, diff_hdl):
390
+        self._diff_hdl = diff_hdl
391
+        self._header_chunk_size = 0
392
+        self._header_chunk = []
393
+
394
+        # Test whether stream is empty by read 1 line
395
+        line = self._diff_hdl.readline()
396
+        if line is None:
397
+            self._is_empty = True
398
+        else:
399
+            self._header_chunk.append(line)
400
+            self._header_chunk_size += 1
401
+            self._is_empty = False
402
+
403
+    def is_empty(self):
404
+        return self._is_empty
405
+
406
+    def read_header_chunks(self, header_chunk_size):
407
+        """Returns a small chunk for patch type detect, suppose to call once"""
408
+        for i in range(1, header_chunk_size):
409
+            line = self._diff_hdl.readline()
410
+            if line is None:
411
+                break
412
+            self._header_chunk.append(line)
413
+            self._header_chunk_size += 1
414
+            yield line
415
+
416
+    def __iter__(self):
417
+        for line in self._header_chunk:
418
+            yield line
419
+        for line in self._diff_hdl:
420
+            yield line
421
+
422
+
387 423
 class DiffParser(object):
388 424
 
389 425
     def __init__(self, stream):
390 426
         """Detect Udiff with 3 conditions, '## ' uaually indicates svn property
391 427
         changes in output from `svn log --diff`
392 428
         """
429
+        self._stream = stream
430
+
393 431
         flag = 0
394
-        for line in stream[:100]:
432
+        for line in self._stream.read_header_chunks(100):
433
+            line = decode(line)
395 434
             if line.startswith('--- '):
396 435
                 flag |= 1
397 436
             elif line.startswith('+++ '):
@@ -404,74 +443,73 @@ class DiffParser(object):
404 443
         else:
405 444
             raise RuntimeError('unknown diff type')
406 445
 
446
+    def get_diff_generator(self):
407 447
         try:
408
-            self._diffs = self._parse(stream)
448
+            return self._parse()
409 449
         except (AssertionError, IndexError):
410 450
             raise RuntimeError('invalid patch format')
411 451
 
412
-    def get_diffs(self):
413
-        return self._diffs
414
-
415
-    def _parse(self, stream):
452
+    def _parse(self):
416 453
         """parse all diff lines, construct a list of Diff objects"""
417 454
         if self._type == 'udiff':
418 455
             difflet = Udiff(None, None, None, None)
419 456
         else:
420 457
             raise RuntimeError('unsupported diff format')
421 458
 
422
-        out_diffs = []
459
+        diff = Diff([], None, None, [])
423 460
         headers = []
424 461
 
425
-        while stream:
426
-            if difflet.is_old_path(stream[0]):
427
-                old_path = stream.pop(0)
428
-                out_diffs.append(Diff(headers, old_path, None, []))
462
+        for line in self._stream:
463
+            line = decode(line)
464
+
465
+            if difflet.is_old_path(line):
466
+                if diff._old_path and diff._new_path and len(diff._hunks) > 0:
467
+                    # One diff constructed
468
+                    yield diff
469
+                    diff = Diff([], None, None, [])
470
+                diff = Diff(headers, line, None, [])
429 471
                 headers = []
430 472
 
431
-            elif difflet.is_new_path(stream[0]):
432
-                new_path = stream.pop(0)
433
-                out_diffs[-1]._new_path = new_path
473
+            elif difflet.is_new_path(line):
474
+                diff._new_path = line
434 475
 
435
-            elif difflet.is_hunk_meta(stream[0]):
436
-                hunk_meta = stream.pop(0)
476
+            elif difflet.is_hunk_meta(line):
477
+                hunk_meta = line
437 478
                 old_addr, new_addr = difflet.parse_hunk_meta(hunk_meta)
438 479
                 hunk = Hunk(headers, hunk_meta, old_addr, new_addr)
439 480
                 headers = []
440
-                out_diffs[-1]._hunks.append(hunk)
481
+                diff._hunks.append(hunk)
441 482
 
442
-            elif out_diffs and out_diffs[-1]._hunks and \
443
-                    (difflet.is_old(stream[0]) or difflet.is_new(stream[0]) or \
444
-                    difflet.is_common(stream[0])):
445
-                hunk_line = stream.pop(0)
446
-                out_diffs[-1]._hunks[-1].append(hunk_line[0], hunk_line[1:])
483
+            elif len(diff._hunks) > 0 and (difflet.is_old(line) or \
484
+                    difflet.is_new(line) or difflet.is_common(line)):
485
+                hunk_line = line
486
+                diff._hunks[-1].append(hunk_line[0], hunk_line[1:])
447 487
 
448
-            elif difflet.is_eof(stream[0]):
488
+            elif difflet.is_eof(line):
449 489
                 # ignore
450
-                stream.pop(0)
490
+                pass
451 491
 
452 492
             else:
453 493
                 # All other non-recognized lines are considered as headers or
454 494
                 # hunk headers respectively
455 495
                 #
456
-                headers.append(stream.pop(0))
496
+                headers.append(line)
457 497
 
458 498
         if headers:
459 499
             raise RuntimeError('dangling header(s):\n%s' % ''.join(headers))
460 500
 
461
-        # Validate the last patch set
462
-        if out_diffs:
463
-            assert out_diffs[-1]._old_path is not None
464
-            assert out_diffs[-1]._new_path is not None
465
-            assert len(out_diffs[-1]._hunks) > 0
466
-            assert len(out_diffs[-1]._hunks[-1]._hunk_meta) > 0
467
-
468
-        return out_diffs
501
+        # Validate and yield the last patch set
502
+        assert diff._old_path is not None
503
+        assert diff._new_path is not None
504
+        assert len(diff._hunks) > 0
505
+        assert len(diff._hunks[-1]._hunk_meta) > 0
506
+        yield diff
469 507
 
470 508
 
471 509
 class DiffMarkup(object):
472 510
 
473 511
     def __init__(self, stream):
474
-        self._diffs = DiffParser(stream).get_diffs()
512
+        self._diffs = DiffParser(stream).get_diff_generator()
475 513
 
476 514
     def markup(self, side_by_side=False, width=0):
477 515
         """Returns a generator"""
@@ -572,14 +610,10 @@ def main():
572 610
     else:
573 611
         diff_hdl = sys.stdin
574 612
 
575
-    # FIXME: can't use generator for now due to current implementation in parser
576
-    stream = [decode(line) for line in diff_hdl.readlines()]
577
-
578
-    if diff_hdl is not sys.stdin:
579
-        diff_hdl.close()
613
+    stream = PatchStream(diff_hdl)
580 614
 
581 615
     # Don't let empty diff pass thru
582
-    if not stream:
616
+    if stream.is_empty():
583 617
         return 0
584 618
 
585 619
     if opts.color == 'always' or (opts.color == 'auto' and sys.stdout.isatty()):
@@ -593,6 +627,9 @@ def main():
593 627
         # pipe out stream untouched to make sure it is still a patch
594 628
         sys.stdout.write(''.join(stream))
595 629
 
630
+    if diff_hdl is not sys.stdin:
631
+        diff_hdl.close()
632
+
596 633
     return 0
597 634
 
598 635