cdiff.py 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Term based tool to view **colored**, **incremental** diff in Git/Mercurial/Svn
  5. workspace, given patch or two files, or from stdin, with **side by side** and
  6. **auto pager** support. Requires python (>= 2.5.0) and ``less``.
  7. """
  8. META_INFO = {
  9. 'version' : '0.5.1',
  10. 'license' : 'BSD-3',
  11. 'author' : 'Matthew Wang',
  12. 'email' : 'mattwyl(@)gmail(.)com',
  13. 'url' : 'https://github.com/ymattw/cdiff',
  14. 'keywords' : 'colored incremental side-by-side diff',
  15. 'description' : ('View colored, incremental diff in workspace, given patch '
  16. 'or two files, or from stdin, with side by side and auto '
  17. 'pager support')
  18. }
  19. import sys
  20. if sys.hexversion < 0x02050000:
  21. raise SystemExit("*** Requires python >= 2.5.0")
  22. IS_PY3 = sys.hexversion >= 0x03000000
  23. import re
  24. import subprocess
  25. import errno
  26. import difflib
  27. COLORS = {
  28. 'reset' : '\x1b[0m',
  29. 'underline' : '\x1b[4m',
  30. 'reverse' : '\x1b[7m',
  31. 'red' : '\x1b[31m',
  32. 'green' : '\x1b[32m',
  33. 'yellow' : '\x1b[33m',
  34. 'blue' : '\x1b[34m',
  35. 'magenta' : '\x1b[35m',
  36. 'cyan' : '\x1b[36m',
  37. 'lightred' : '\x1b[1;31m',
  38. 'lightgreen' : '\x1b[1;32m',
  39. 'lightyellow' : '\x1b[1;33m',
  40. 'lightblue' : '\x1b[1;34m',
  41. 'lightmagenta' : '\x1b[1;35m',
  42. 'lightcyan' : '\x1b[1;36m',
  43. }
  44. # Keys for revision control probe, diff and log with diff
  45. VCS_INFO = {
  46. 'Git': {
  47. 'probe' : ['git', 'rev-parse'],
  48. 'diff' : ['git', 'diff'],
  49. 'log' : ['git', 'log', '--patch'],
  50. },
  51. 'Mercurial': {
  52. 'probe' : ['hg', 'summary'],
  53. 'diff' : ['hg', 'diff'],
  54. 'log' : ['hg', 'log', '--patch'],
  55. },
  56. 'Svn': {
  57. 'probe' : ['svn', 'info'],
  58. 'diff' : ['svn', 'diff'],
  59. 'log' : ['svn', 'log', '--diff'],
  60. },
  61. }
  62. def colorize(text, start_color, end_color='reset'):
  63. return COLORS[start_color] + text + COLORS[end_color]
  64. class Hunk(object):
  65. def __init__(self, hunk_headers, hunk_meta, old_addr, new_addr):
  66. self._hunk_headers = hunk_headers
  67. self._hunk_meta = hunk_meta
  68. self._old_addr = old_addr # tuple (start, offset)
  69. self._new_addr = new_addr # tuple (start, offset)
  70. self._hunk_list = [] # list of tuple (attr, line)
  71. def get_hunk_headers(self):
  72. return self._hunk_headers
  73. def get_hunk_meta(self):
  74. return self._hunk_meta
  75. def get_old_addr(self):
  76. return self._old_addr
  77. def get_new_addr(self):
  78. return self._new_addr
  79. def append(self, hunk_line):
  80. """hunk_line is a 2-element tuple: (attr, text), where attris : '-':
  81. old, '+': new, ' ': common"""
  82. self._hunk_list.append(hunk_line)
  83. def mdiff(self):
  84. r"""The difflib._mdiff() function returns an interator which returns a
  85. tuple: (from line tuple, to line tuple, boolean flag)
  86. from/to line tuple -- (line num, line text)
  87. line num -- integer or None (to indicate a context separation)
  88. line text -- original line text with following markers inserted:
  89. '\0+' -- marks start of added text
  90. '\0-' -- marks start of deleted text
  91. '\0^' -- marks start of changed text
  92. '\1' -- marks end of added/deleted/changed text
  93. boolean flag -- None indicates context separation, True indicates
  94. either "from" or "to" line contains a change, otherwise False.
  95. """
  96. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  97. def _get_old_text(self):
  98. out = []
  99. for (attr, line) in self._hunk_list:
  100. if attr != '+':
  101. out.append(line)
  102. return out
  103. def _get_new_text(self):
  104. out = []
  105. for (attr, line) in self._hunk_list:
  106. if attr != '-':
  107. out.append(line)
  108. return out
  109. def __iter__(self):
  110. for hunk_line in self._hunk_list:
  111. yield hunk_line
  112. class Diff(object):
  113. def __init__(self, headers, old_path, new_path, hunks):
  114. self._headers = headers
  115. self._old_path = old_path
  116. self._new_path = new_path
  117. self._hunks = hunks
  118. # Following detectors, parse_hunk_meta() and parse_hunk_line() are suppose
  119. # to be overwritten by derived class. No is_header() anymore, all
  120. # non-recognized lines are considered as headers
  121. #
  122. def is_old_path(self, line):
  123. return False
  124. def is_new_path(self, line):
  125. return False
  126. def is_hunk_meta(self, line):
  127. return False
  128. def parse_hunk_meta(self, line):
  129. """Returns a 2-element tuple, each of them is a tuple in form of (start,
  130. offset)"""
  131. return None
  132. def parse_hunk_line(self, line):
  133. """Returns a 2-element tuple: (attr, text), where attr is: '-': old,
  134. '+': new, ' ': common"""
  135. return None
  136. def is_old(self, line):
  137. return False
  138. def is_new(self, line):
  139. return False
  140. def is_common(self, line):
  141. return False
  142. def is_eof(self, line):
  143. return False
  144. def markup_traditional(self):
  145. """Returns a generator"""
  146. for line in self._headers:
  147. yield self._markup_header(line)
  148. yield self._markup_old_path(self._old_path)
  149. yield self._markup_new_path(self._new_path)
  150. for hunk in self._hunks:
  151. for hunk_header in hunk.get_hunk_headers():
  152. yield self._markup_hunk_header(hunk_header)
  153. yield self._markup_hunk_meta(hunk.get_hunk_meta())
  154. for old, new, changed in hunk.mdiff():
  155. if changed:
  156. if not old[0]:
  157. # The '+' char after \x00 is kept
  158. # DEBUG: yield 'NEW: %s %s\n' % (old, new)
  159. line = new[1].strip('\x00\x01')
  160. yield self._markup_new(line)
  161. elif not new[0]:
  162. # The '-' char after \x00 is kept
  163. # DEBUG: yield 'OLD: %s %s\n' % (old, new)
  164. line = old[1].strip('\x00\x01')
  165. yield self._markup_old(line)
  166. else:
  167. # DEBUG: yield 'CHG: %s %s\n' % (old, new)
  168. yield self._markup_old('-') + \
  169. self._markup_old_mix(old[1])
  170. yield self._markup_new('+') + \
  171. self._markup_new_mix(new[1])
  172. else:
  173. yield self._markup_common(' ' + old[1])
  174. def markup_side_by_side(self, width):
  175. """Returns a generator"""
  176. wrap_char = colorize('>', 'lightmagenta')
  177. def _normalize(line):
  178. return line.replace('\t', ' '*8).replace('\n', '').replace('\r', '')
  179. def _fit_with_marker(text, markup_fn, width, pad=False):
  180. """Wrap or pad input pure text, then markup"""
  181. if len(text) > width:
  182. return markup_fn(text[:width-1]) + wrap_char
  183. elif pad:
  184. pad_len = width - len(text)
  185. return '%s%*s' % (markup_fn(text), pad_len, '')
  186. else:
  187. return markup_fn(text)
  188. def _fit_markup(markup, width, pad=False):
  189. """Fit input markup to given width, pad or wrap accordingly, str len
  190. does not count correctly if string contains ansi color code. Only
  191. left side need to set `pad`
  192. """
  193. out = []
  194. count = 0
  195. ansi_color_regex = r'\x1b\[(1;)?\d{1,2}m'
  196. patt = re.compile('^((%s)+)(.*)' % ansi_color_regex)
  197. repl = re.compile(ansi_color_regex)
  198. while markup and count < width:
  199. if patt.match(markup):
  200. # Extract longest ansi color code seq to target output and
  201. # remove the seq from input markup, no update on counter
  202. #
  203. out.append(patt.sub(r'\1', markup))
  204. markup = patt.sub(r'\4', markup)
  205. else:
  206. # FIXME: utf-8 wchar might break the rule here, e.g.
  207. # u'\u554a' takes double width of a single letter, also this
  208. # depends on your terminal font. I guess audience of this
  209. # tool never put that kind of symbol in their code :-)
  210. #
  211. out.append(markup[0])
  212. count += 1
  213. markup = markup[1:]
  214. if count == width and repl.sub('', markup):
  215. # Was stripped: output fulfil and still has ascii in markup
  216. out[-1] = COLORS['reset'] + wrap_char
  217. elif count < width and pad:
  218. pad_len = width - count
  219. out.append('%*s' % (pad_len, ''))
  220. return ''.join(out)
  221. # Setup line width and number width
  222. if width <= 0:
  223. width = 80
  224. (start, offset) = self._hunks[-1].get_old_addr()
  225. max1 = start + offset - 1
  226. (start, offset) = self._hunks[-1].get_new_addr()
  227. max2 = start + offset - 1
  228. num_width = max(len(str(max1)), len(str(max2)))
  229. left_num_fmt = colorize('%%(left_num)%ds' % num_width, 'yellow')
  230. right_num_fmt = colorize('%%(right_num)%ds' % num_width, 'yellow')
  231. line_fmt = left_num_fmt + ' %(left)s ' + COLORS['reset'] + \
  232. right_num_fmt + ' %(right)s\n'
  233. # yield header, old path and new path
  234. for line in self._headers:
  235. yield self._markup_header(line)
  236. yield self._markup_old_path(self._old_path)
  237. yield self._markup_new_path(self._new_path)
  238. # yield hunks
  239. for hunk in self._hunks:
  240. for hunk_header in hunk.get_hunk_headers():
  241. yield self._markup_hunk_header(hunk_header)
  242. yield self._markup_hunk_meta(hunk.get_hunk_meta())
  243. for old, new, changed in hunk.mdiff():
  244. if old[0]:
  245. left_num = str(hunk.get_old_addr()[0] + int(old[0]) - 1)
  246. else:
  247. left_num = ' '
  248. if new[0]:
  249. right_num = str(hunk.get_new_addr()[0] + int(new[0]) - 1)
  250. else:
  251. right_num = ' '
  252. left = _normalize(old[1])
  253. right = _normalize(new[1])
  254. if changed:
  255. if not old[0]:
  256. left = '%*s' % (width, ' ')
  257. right = right.lstrip('\x00+').rstrip('\x01')
  258. right = _fit_with_marker(right, self._markup_new, width)
  259. elif not new[0]:
  260. left = left.lstrip('\x00-').rstrip('\x01')
  261. left = _fit_with_marker(left, self._markup_old, width)
  262. right = ''
  263. else:
  264. left = _fit_markup(self._markup_old_mix(left), width, 1)
  265. right = _fit_markup(self._markup_new_mix(right), width)
  266. else:
  267. left = _fit_with_marker(left, self._markup_common, width, 1)
  268. right = _fit_with_marker(right, self._markup_common, width)
  269. yield line_fmt % {
  270. 'left_num': left_num,
  271. 'left': left,
  272. 'right_num': right_num,
  273. 'right': right
  274. }
  275. def _markup_header(self, line):
  276. return colorize(line, 'cyan')
  277. def _markup_old_path(self, line):
  278. return colorize(line, 'yellow')
  279. def _markup_new_path(self, line):
  280. return colorize(line, 'yellow')
  281. def _markup_hunk_header(self, line):
  282. return colorize(line, 'lightcyan')
  283. def _markup_hunk_meta(self, line):
  284. return colorize(line, 'lightblue')
  285. def _markup_common(self, line):
  286. return colorize(line, 'reset')
  287. def _markup_old(self, line):
  288. return colorize(line, 'lightred')
  289. def _markup_new(self, line):
  290. return colorize(line, 'lightgreen')
  291. def _markup_mix(self, line, base_color):
  292. del_code = COLORS['reverse'] + COLORS[base_color]
  293. add_code = COLORS['reverse'] + COLORS[base_color]
  294. chg_code = COLORS['underline'] + COLORS[base_color]
  295. rst_code = COLORS['reset'] + COLORS[base_color]
  296. line = line.replace('\x00-', del_code)
  297. line = line.replace('\x00+', add_code)
  298. line = line.replace('\x00^', chg_code)
  299. line = line.replace('\x01', rst_code)
  300. return colorize(line, base_color)
  301. def _markup_old_mix(self, line):
  302. return self._markup_mix(line, 'red')
  303. def _markup_new_mix(self, line):
  304. return self._markup_mix(line, 'green')
  305. class Udiff(Diff):
  306. def is_old_path(self, line):
  307. return line.startswith('--- ')
  308. def is_new_path(self, line):
  309. return line.startswith('+++ ')
  310. def is_hunk_meta(self, line):
  311. return line.startswith('@@ -') or line.startswith('## -')
  312. def parse_hunk_meta(self, hunk_meta):
  313. # @@ -3,7 +3,6 @@
  314. a = hunk_meta.split()[1].split(',') # -3 7
  315. if len(a) > 1:
  316. old_addr = (int(a[0][1:]), int(a[1]))
  317. else:
  318. # @@ -1 +1,2 @@
  319. old_addr = (int(a[0][1:]), 0)
  320. b = hunk_meta.split()[2].split(',') # +3 6
  321. if len(b) > 1:
  322. new_addr = (int(b[0][1:]), int(b[1]))
  323. else:
  324. # @@ -0,0 +1 @@
  325. new_addr = (int(b[0][1:]), 0)
  326. return (old_addr, new_addr)
  327. def parse_hunk_line(self, line):
  328. return (line[0], line[1:])
  329. def is_old(self, line):
  330. """Exclude header line from svn log --diff output"""
  331. return line.startswith('-') and not self.is_old_path(line) and \
  332. not re.match(r'^-{4,}$', line.rstrip())
  333. def is_new(self, line):
  334. return line.startswith('+') and not self.is_new_path(line)
  335. def is_common(self, line):
  336. return line.startswith(' ')
  337. def is_eof(self, line):
  338. # \ No newline at end of file
  339. # \ No newline at end of property
  340. return line.startswith(r'\ No newline at end of')
  341. class PatchStream(object):
  342. def __init__(self, diff_hdl):
  343. self._diff_hdl = diff_hdl
  344. self._stream_header_size = 0
  345. self._stream_header = []
  346. # Test whether stream is empty by read 1 line
  347. line = self._diff_hdl.readline()
  348. if not line:
  349. self._is_empty = True
  350. else:
  351. self._stream_header.append(line)
  352. self._stream_header_size += 1
  353. self._is_empty = False
  354. def is_empty(self):
  355. return self._is_empty
  356. def read_stream_header(self, stream_header_size):
  357. """Returns a small chunk for patch type detect, suppose to call once"""
  358. for i in range(1, stream_header_size):
  359. line = self._diff_hdl.readline()
  360. if not line:
  361. break
  362. self._stream_header.append(line)
  363. self._stream_header_size += 1
  364. return self._stream_header
  365. def __iter__(self):
  366. for line in self._stream_header:
  367. yield line
  368. for line in self._diff_hdl:
  369. yield line
  370. class DiffParser(object):
  371. def __init__(self, stream):
  372. """Detect Udiff with 3 conditions, '## ' uaually indicates svn property
  373. changes in output from `svn log --diff`
  374. """
  375. self._stream = stream
  376. flag = 0
  377. for line in self._stream.read_stream_header(100):
  378. line = decode(line)
  379. if line.startswith('--- '):
  380. flag |= 1
  381. elif line.startswith('+++ '):
  382. flag |= 2
  383. elif line.startswith('@@ ') or line.startswith('## '):
  384. flag |= 4
  385. if (flag & 7) == 7:
  386. self._type = 'udiff'
  387. break
  388. else:
  389. raise RuntimeError('unknown diff type')
  390. def get_diff_generator(self):
  391. try:
  392. return self._parse()
  393. except (AssertionError, IndexError):
  394. raise RuntimeError('invalid patch format')
  395. def _parse(self):
  396. """parse all diff lines, construct a list of Diff objects"""
  397. if self._type == 'udiff':
  398. difflet = Udiff(None, None, None, None)
  399. else:
  400. raise RuntimeError('unsupported diff format')
  401. diff = Diff([], None, None, [])
  402. headers = []
  403. for line in self._stream:
  404. line = decode(line)
  405. if difflet.is_old_path(line):
  406. if diff._old_path and diff._new_path and len(diff._hunks) > 0:
  407. # One diff constructed
  408. yield diff
  409. diff = Diff([], None, None, [])
  410. diff = Diff(headers, line, None, [])
  411. headers = []
  412. elif difflet.is_new_path(line):
  413. diff._new_path = line
  414. elif difflet.is_hunk_meta(line):
  415. hunk_meta = line
  416. old_addr, new_addr = difflet.parse_hunk_meta(hunk_meta)
  417. hunk = Hunk(headers, hunk_meta, old_addr, new_addr)
  418. headers = []
  419. diff._hunks.append(hunk)
  420. elif len(diff._hunks) > 0 and (difflet.is_old(line) or \
  421. difflet.is_new(line) or difflet.is_common(line)):
  422. diff._hunks[-1].append(difflet.parse_hunk_line(line))
  423. elif difflet.is_eof(line):
  424. # ignore
  425. pass
  426. else:
  427. # All other non-recognized lines are considered as headers or
  428. # hunk headers respectively
  429. #
  430. headers.append(line)
  431. if headers:
  432. raise RuntimeError('dangling header(s):\n%s' % ''.join(headers))
  433. # Validate and yield the last patch set
  434. assert diff._old_path is not None
  435. assert diff._new_path is not None
  436. assert len(diff._hunks) > 0
  437. assert len(diff._hunks[-1]._hunk_meta) > 0
  438. yield diff
  439. class DiffMarkup(object):
  440. def __init__(self, stream):
  441. self._diffs = DiffParser(stream).get_diff_generator()
  442. def markup(self, side_by_side=False, width=0):
  443. """Returns a generator"""
  444. if side_by_side:
  445. return self._markup_side_by_side(width)
  446. else:
  447. return self._markup_traditional()
  448. def _markup_traditional(self):
  449. for diff in self._diffs:
  450. for line in diff.markup_traditional():
  451. yield line
  452. def _markup_side_by_side(self, width):
  453. for diff in self._diffs:
  454. for line in diff.markup_side_by_side(width):
  455. yield line
  456. def markup_to_pager(stream, opts):
  457. markup = DiffMarkup(stream)
  458. color_diff = markup.markup(side_by_side=opts.side_by_side,
  459. width=opts.width)
  460. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  461. pager = subprocess.Popen(['less', '-FRSX'],
  462. stdin=subprocess.PIPE, stdout=sys.stdout)
  463. try:
  464. for line in color_diff:
  465. pager.stdin.write(line.encode('utf-8'))
  466. except KeyboardInterrupt:
  467. pass
  468. pager.stdin.close()
  469. pager.wait()
  470. def check_command_status(arguments):
  471. """Return True if command returns 0."""
  472. try:
  473. return subprocess.call(
  474. arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
  475. except OSError:
  476. return False
  477. def revision_control_diff():
  478. """Return diff from revision control system."""
  479. for _, ops in VCS_INFO.items():
  480. if check_command_status(ops['probe']):
  481. return subprocess.Popen(ops['diff'], stdout=subprocess.PIPE).stdout
  482. def revision_control_log():
  483. """Return log from revision control system."""
  484. for _, ops in VCS_INFO.items():
  485. if check_command_status(ops['probe']):
  486. return subprocess.Popen(ops['log'], stdout=subprocess.PIPE).stdout
  487. def decode(line):
  488. """Decode UTF-8 if necessary."""
  489. try:
  490. return line.decode('utf-8')
  491. except AttributeError:
  492. return line
  493. def main():
  494. import optparse
  495. supported_vcs = sorted(VCS_INFO.keys())
  496. usage = """
  497. %prog [options]
  498. %prog [options] <patch>
  499. %prog [options] <file1> <file2>"""
  500. parser = optparse.OptionParser(usage=usage,
  501. description=META_INFO['description'],
  502. version='%%prog %s' % META_INFO['version'])
  503. parser.add_option('-s', '--side-by-side', action='store_true',
  504. help='show in side-by-side mode')
  505. parser.add_option('-w', '--width', type='int', default=80, metavar='N',
  506. help='set text width (side-by-side mode only), default is 80')
  507. parser.add_option('-l', '--log', action='store_true',
  508. help='show diff log from revision control')
  509. parser.add_option('-c', '--color', default='auto', metavar='X',
  510. help='colorize mode "auto" (default), "always", or "never"')
  511. opts, args = parser.parse_args()
  512. if opts.log:
  513. diff_hdl = revision_control_log()
  514. if not diff_hdl:
  515. sys.stderr.write(('*** Not in a supported workspace, supported '
  516. 'are: %s\n') % ', '.join(supported_vcs))
  517. return 1
  518. elif len(args) > 2:
  519. parser.print_help()
  520. return 1
  521. elif len(args) == 2:
  522. diff_hdl = subprocess.Popen(['diff', '-u', args[0], args[1]],
  523. stdout=subprocess.PIPE).stdout
  524. elif len(args) == 1:
  525. if IS_PY3:
  526. # Python3 needs the newline='' to keep '\r' (DOS format)
  527. diff_hdl = open(args[0], mode='rt', newline='')
  528. else:
  529. diff_hdl = open(args[0], mode='rt')
  530. elif sys.stdin.isatty():
  531. diff_hdl = revision_control_diff()
  532. if not diff_hdl:
  533. sys.stderr.write(('*** Not in a supported workspace, supported '
  534. 'are: %s\n\n') % ', '.join(supported_vcs))
  535. parser.print_help()
  536. return 1
  537. else:
  538. diff_hdl = sys.stdin
  539. stream = PatchStream(diff_hdl)
  540. # Don't let empty diff pass thru
  541. if stream.is_empty():
  542. return 0
  543. if opts.color == 'always' or (opts.color == 'auto' and sys.stdout.isatty()):
  544. try:
  545. markup_to_pager(stream, opts)
  546. except IOError:
  547. e = sys.exc_info()[1]
  548. if e.errno == errno.EPIPE:
  549. pass
  550. else:
  551. # pipe out stream untouched to make sure it is still a patch
  552. for line in stream:
  553. sys.stdout.write(decode(line))
  554. if diff_hdl is not sys.stdin:
  555. diff_hdl.close()
  556. return 0
  557. if __name__ == '__main__':
  558. sys.exit(main())
  559. # vim:set et sts=4 sw=4 tw=80: