cdiff.py 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Term based tool to view **colored**, **incremental** diff in *git/svn/hg*
  5. workspace, or diff from given file or stdin, with **side by side** and **auto
  6. pager** support. Requires python (>= 2.5.0) and ``less``.
  7. AUTHOR : Matthew Wang <mattwyl(@)gmail(.)com>
  8. LICENSE : BSD-3
  9. HOMEPAGE: https://github.com/ymattw/cdiff
  10. """
  11. import sys
  12. if sys.hexversion < 0x02050000:
  13. sys.stderr.write("*** Requires python >= 2.5.0\n")
  14. sys.exit(1)
  15. IS_PY3 = sys.hexversion >= 0x03000000
  16. import os
  17. import re
  18. import subprocess
  19. import errno
  20. import difflib
  21. # REMEMBER UPDATE ``CHANGES``
  22. __version__ = '0.1'
  23. COLORS = {
  24. 'reset' : '\x1b[0m',
  25. 'underline' : '\x1b[4m',
  26. 'reverse' : '\x1b[7m',
  27. 'red' : '\x1b[31m',
  28. 'green' : '\x1b[32m',
  29. 'yellow' : '\x1b[33m',
  30. 'blue' : '\x1b[34m',
  31. 'magenta' : '\x1b[35m',
  32. 'cyan' : '\x1b[36m',
  33. 'lightred' : '\x1b[1;31m',
  34. 'lightgreen' : '\x1b[1;32m',
  35. 'lightyellow' : '\x1b[1;33m',
  36. 'lightblue' : '\x1b[1;34m',
  37. 'lightmagenta' : '\x1b[1;35m',
  38. 'lightcyan' : '\x1b[1;36m',
  39. }
  40. # Keys for checking and values for diffing.
  41. REVISION_CONTROL = (
  42. (['git', 'rev-parse'], ['git', 'diff']),
  43. (['svn', 'info'], ['svn', 'diff']),
  44. (['hg', 'summary'], ['hg', 'diff'])
  45. )
  46. def ansi_code(color):
  47. return COLORS.get(color, '')
  48. def colorize(text, start_color, end_color='reset'):
  49. return ansi_code(start_color) + text + ansi_code(end_color)
  50. class Hunk(object):
  51. def __init__(self, hunk_header, old_addr, new_addr):
  52. self._hunk_header = hunk_header
  53. self._old_addr = old_addr # tuple (start, offset)
  54. self._new_addr = new_addr # tuple (start, offset)
  55. self._hunk_list = [] # list of tuple (attr, line)
  56. def get_header(self):
  57. return self._hunk_header
  58. def get_old_addr(self):
  59. return self._old_addr
  60. def get_new_addr(self):
  61. return self._new_addr
  62. def append(self, attr, line):
  63. """attr: '-': old, '+': new, ' ': common"""
  64. self._hunk_list.append((attr, line))
  65. def mdiff(self):
  66. r"""The difflib._mdiff() function returns an interator which returns a
  67. tuple: (from line tuple, to line tuple, boolean flag)
  68. from/to line tuple -- (line num, line text)
  69. line num -- integer or None (to indicate a context separation)
  70. line text -- original line text with following markers inserted:
  71. '\0+' -- marks start of added text
  72. '\0-' -- marks start of deleted text
  73. '\0^' -- marks start of changed text
  74. '\1' -- marks end of added/deleted/changed text
  75. boolean flag -- None indicates context separation, True indicates
  76. either "from" or "to" line contains a change, otherwise False.
  77. """
  78. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  79. def _get_old_text(self):
  80. out = []
  81. for (attr, line) in self._hunk_list:
  82. if attr != '+':
  83. out.append(line)
  84. return out
  85. def _get_new_text(self):
  86. out = []
  87. for (attr, line) in self._hunk_list:
  88. if attr != '-':
  89. out.append(line)
  90. return out
  91. def __iter__(self):
  92. for hunk_line in self._hunk_list:
  93. yield hunk_line
  94. class Diff(object):
  95. def __init__(self, headers, old_path, new_path, hunks):
  96. self._headers = headers
  97. self._old_path = old_path
  98. self._new_path = new_path
  99. self._hunks = hunks
  100. # Follow detector and the parse_hunk_header() are suppose to be overwritten
  101. # by derived class
  102. #
  103. def is_old_path(self, line):
  104. return False
  105. def is_new_path(self, line):
  106. return False
  107. def is_hunk_header(self, line):
  108. return False
  109. def parse_hunk_header(self, line):
  110. """Returns a 2-eliment tuple, each of them is a tuple in form of (start,
  111. offset)"""
  112. return False
  113. def is_old(self, line):
  114. return False
  115. def is_new(self, line):
  116. return False
  117. def is_common(self, line):
  118. return False
  119. def is_eof(self, line):
  120. return False
  121. def is_header(self, line):
  122. return False
  123. def markup_traditional(self):
  124. """Returns a generator"""
  125. for line in self._headers:
  126. yield self._markup_header(line)
  127. yield self._markup_old_path(self._old_path)
  128. yield self._markup_new_path(self._new_path)
  129. for hunk in self._hunks:
  130. yield self._markup_hunk_header(hunk.get_header())
  131. for old, new, changed in hunk.mdiff():
  132. if changed:
  133. if not old[0]:
  134. # The '+' char after \x00 is kept
  135. # DEBUG: yield 'NEW: %s %s\n' % (old, new)
  136. line = new[1].strip('\x00\x01')
  137. yield self._markup_new(line)
  138. elif not new[0]:
  139. # The '-' char after \x00 is kept
  140. # DEBUG: yield 'OLD: %s %s\n' % (old, new)
  141. line = old[1].strip('\x00\x01')
  142. yield self._markup_old(line)
  143. else:
  144. # DEBUG: yield 'CHG: %s %s\n' % (old, new)
  145. yield self._markup_old('-') + \
  146. self._markup_old_mix(old[1])
  147. yield self._markup_new('+') + \
  148. self._markup_new_mix(new[1])
  149. else:
  150. yield self._markup_common(' ' + old[1])
  151. def markup_side_by_side(self, width):
  152. """Returns a generator"""
  153. def _normalize(line):
  154. return line.replace('\t', ' '*8).replace('\n', '').replace('\r', '')
  155. def _fit_width(markup, width, pad=False):
  156. """str len does not count correctly if left column contains ansi
  157. color code. Only left side need to set `pad`
  158. """
  159. out = []
  160. count = 0
  161. ansi_color_regex = r'\x1b\[(1;)?\d{1,2}m'
  162. patt = re.compile('^(%s)(.*)' % ansi_color_regex)
  163. repl = re.compile(ansi_color_regex)
  164. while markup and count < width:
  165. if patt.match(markup):
  166. out.append(patt.sub(r'\1', markup))
  167. markup = patt.sub(r'\3', markup)
  168. else:
  169. # FIXME: utf-8 wchar might break the rule here, e.g.
  170. # u'\u554a' takes double width of a single letter, also this
  171. # depends on your terminal font. I guess audience of this
  172. # tool never put that kind of symbol in their code :-)
  173. #
  174. out.append(markup[0])
  175. count += 1
  176. markup = markup[1:]
  177. if count == width and repl.sub('', markup):
  178. # stripped: output fulfil and still have ascii in markup
  179. out[-1] = ansi_code('reset') + colorize('>', 'lightmagenta')
  180. elif count < width and pad:
  181. pad_len = width - count
  182. out.append('%*s' % (pad_len, ''))
  183. return ''.join(out)
  184. # Setup line width and number width
  185. if width <= 0:
  186. width = 80
  187. (start, offset) = self._hunks[-1].get_old_addr()
  188. max1 = start + offset - 1
  189. (start, offset) = self._hunks[-1].get_new_addr()
  190. max2 = start + offset - 1
  191. num_width = max(len(str(max1)), len(str(max2)))
  192. left_num_fmt = colorize('%%(left_num)%ds' % num_width, 'yellow')
  193. right_num_fmt = colorize('%%(right_num)%ds' % num_width, 'yellow')
  194. line_fmt = left_num_fmt + ' %(left)s ' + ansi_code('reset') + \
  195. right_num_fmt + ' %(right)s\n'
  196. # yield header, old path and new path
  197. for line in self._headers:
  198. yield self._markup_header(line)
  199. yield self._markup_old_path(self._old_path)
  200. yield self._markup_new_path(self._new_path)
  201. # yield hunks
  202. for hunk in self._hunks:
  203. yield self._markup_hunk_header(hunk.get_header())
  204. for old, new, changed in hunk.mdiff():
  205. if old[0]:
  206. left_num = str(hunk.get_old_addr()[0] + int(old[0]) - 1)
  207. else:
  208. left_num = ' '
  209. if new[0]:
  210. right_num = str(hunk.get_new_addr()[0] + int(new[0]) - 1)
  211. else:
  212. right_num = ' '
  213. left = _normalize(old[1])
  214. right = _normalize(new[1])
  215. if changed:
  216. if not old[0]:
  217. left = '%*s' % (width, ' ')
  218. right = right.lstrip('\x00+').rstrip('\x01')
  219. right = _fit_width(self._markup_new(right), width)
  220. elif not new[0]:
  221. left = left.lstrip('\x00-').rstrip('\x01')
  222. left = _fit_width(self._markup_old(left), width)
  223. right = ''
  224. else:
  225. left = _fit_width(self._markup_old_mix(left), width, 1)
  226. right = _fit_width(self._markup_new_mix(right), width)
  227. else:
  228. left = _fit_width(self._markup_common(left), width, 1)
  229. right = _fit_width(self._markup_common(right), width)
  230. yield line_fmt % {
  231. 'left_num': left_num,
  232. 'left': left,
  233. 'right_num': right_num,
  234. 'right': right
  235. }
  236. def _markup_header(self, line):
  237. return colorize(line, 'cyan')
  238. def _markup_old_path(self, line):
  239. return colorize(line, 'yellow')
  240. def _markup_new_path(self, line):
  241. return colorize(line, 'yellow')
  242. def _markup_hunk_header(self, line):
  243. return colorize(line, 'lightblue')
  244. def _markup_common(self, line):
  245. return colorize(line, 'reset')
  246. def _markup_old(self, line):
  247. return colorize(line, 'lightred')
  248. def _markup_new(self, line):
  249. return colorize(line, 'lightgreen')
  250. def _markup_mix(self, line, base_color):
  251. del_code = ansi_code('reverse') + ansi_code(base_color)
  252. add_code = ansi_code('reverse') + ansi_code(base_color)
  253. chg_code = ansi_code('underline') + ansi_code(base_color)
  254. rst_code = ansi_code('reset') + ansi_code(base_color)
  255. line = line.replace('\x00-', del_code)
  256. line = line.replace('\x00+', add_code)
  257. line = line.replace('\x00^', chg_code)
  258. line = line.replace('\x01', rst_code)
  259. return colorize(line, base_color)
  260. def _markup_old_mix(self, line):
  261. return self._markup_mix(line, 'red')
  262. def _markup_new_mix(self, line):
  263. return self._markup_mix(line, 'green')
  264. class Udiff(Diff):
  265. def is_old_path(self, line):
  266. return line.startswith('--- ')
  267. def is_new_path(self, line):
  268. return line.startswith('+++ ')
  269. def is_hunk_header(self, line):
  270. return line.startswith('@@ -')
  271. def parse_hunk_header(self, hunk_header):
  272. # @@ -3,7 +3,6 @@
  273. a = hunk_header.split()[1].split(',') # -3 7
  274. if len(a) > 1:
  275. old_addr = (int(a[0][1:]), int(a[1]))
  276. else:
  277. # @@ -1 +1,2 @@
  278. old_addr = (int(a[0][1:]), 0)
  279. b = hunk_header.split()[2].split(',') # +3 6
  280. if len(b) > 1:
  281. new_addr = (int(b[0][1:]), int(b[1]))
  282. else:
  283. # @@ -0,0 +1 @@
  284. new_addr = (int(b[0][1:]), 0)
  285. return (old_addr, new_addr)
  286. def is_old(self, line):
  287. return line.startswith('-') and not self.is_old_path(line)
  288. def is_new(self, line):
  289. return line.startswith('+') and not self.is_new_path(line)
  290. def is_common(self, line):
  291. return line.startswith(' ')
  292. def is_eof(self, line):
  293. # \ No newline at end of file
  294. return line.startswith('\\')
  295. def is_header(self, line):
  296. return re.match(r'^[^+@\\ -]', line)
  297. class DiffParser(object):
  298. def __init__(self, stream):
  299. """Detect Udiff with 3 conditions"""
  300. flag = 0
  301. for line in stream[:20]:
  302. if line.startswith('--- '):
  303. flag |= 1
  304. elif line.startswith('+++ '):
  305. flag |= 2
  306. elif line.startswith('@@ '):
  307. flag |= 4
  308. if flag & 7:
  309. self._type = 'udiff'
  310. else:
  311. raise RuntimeError('unknown diff type')
  312. try:
  313. self._diffs = self._parse(stream)
  314. except (AssertionError, IndexError):
  315. raise RuntimeError('invalid patch format')
  316. def get_diffs(self):
  317. return self._diffs
  318. def _parse(self, stream):
  319. """parse all diff lines, construct a list of Diff objects"""
  320. if self._type == 'udiff':
  321. difflet = Udiff(None, None, None, None)
  322. else:
  323. raise RuntimeError('unsupported diff format')
  324. out_diffs = []
  325. headers = []
  326. old_path = None
  327. new_path = None
  328. hunks = []
  329. hunk = None
  330. while stream:
  331. # 'common' line occurs before 'old_path' is considered as header
  332. # too, this happens with `git log -p` and `git show <commit>`
  333. #
  334. if difflet.is_header(stream[0]) or \
  335. (difflet.is_common(stream[0]) and old_path is None):
  336. if headers and old_path:
  337. # Encounter a new header
  338. assert new_path is not None
  339. assert hunk is not None
  340. hunks.append(hunk)
  341. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  342. headers = []
  343. old_path = None
  344. new_path = None
  345. hunks = []
  346. hunk = None
  347. else:
  348. headers.append(stream.pop(0))
  349. elif difflet.is_old_path(stream[0]):
  350. if old_path:
  351. # Encounter a new patch set
  352. assert new_path is not None
  353. assert hunk is not None
  354. hunks.append(hunk)
  355. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  356. headers = []
  357. old_path = None
  358. new_path = None
  359. hunks = []
  360. hunk = None
  361. else:
  362. old_path = stream.pop(0)
  363. elif difflet.is_new_path(stream[0]):
  364. assert old_path is not None
  365. assert new_path is None
  366. new_path = stream.pop(0)
  367. elif difflet.is_hunk_header(stream[0]):
  368. assert old_path is not None
  369. assert new_path is not None
  370. if hunk:
  371. # Encounter a new hunk header
  372. hunks.append(hunk)
  373. hunk = None
  374. else:
  375. hunk_header = stream.pop(0)
  376. old_addr, new_addr = difflet.parse_hunk_header(hunk_header)
  377. hunk = Hunk(hunk_header, old_addr, new_addr)
  378. elif difflet.is_old(stream[0]) or difflet.is_new(stream[0]) or \
  379. difflet.is_common(stream[0]):
  380. assert old_path is not None
  381. assert new_path is not None
  382. assert hunk is not None
  383. hunk_line = stream.pop(0)
  384. hunk.append(hunk_line[0], hunk_line[1:])
  385. elif difflet.is_eof(stream[0]):
  386. # ignore
  387. stream.pop(0)
  388. else:
  389. raise RuntimeError('unknown patch format: %s' % stream[0])
  390. # The last patch
  391. if hunk:
  392. hunks.append(hunk)
  393. if old_path:
  394. if new_path:
  395. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  396. else:
  397. raise RuntimeError('unknown patch format after "%s"' % old_path)
  398. elif headers:
  399. raise RuntimeError('unknown patch format: %s' % \
  400. ('\n'.join(headers)))
  401. return out_diffs
  402. class DiffMarkup(object):
  403. def __init__(self, stream):
  404. self._diffs = DiffParser(stream).get_diffs()
  405. def markup(self, side_by_side=False, width=0):
  406. """Returns a generator"""
  407. if side_by_side:
  408. return self._markup_side_by_side(width)
  409. else:
  410. return self._markup_traditional()
  411. def _markup_traditional(self):
  412. for diff in self._diffs:
  413. for line in diff.markup_traditional():
  414. yield line
  415. def _markup_side_by_side(self, width):
  416. for diff in self._diffs:
  417. for line in diff.markup_side_by_side(width):
  418. yield line
  419. def markup_to_pager(stream, opts):
  420. markup = DiffMarkup(stream)
  421. color_diff = markup.markup(side_by_side=opts.side_by_side,
  422. width=opts.width)
  423. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  424. pager = subprocess.Popen(['less', '-FRSXK'],
  425. stdin=subprocess.PIPE, stdout=sys.stdout)
  426. for line in color_diff:
  427. pager.stdin.write(line.encode('utf-8'))
  428. pager.stdin.close()
  429. pager.wait()
  430. def check_command_status(arguments):
  431. """Return True if command returns 0."""
  432. try:
  433. return subprocess.call(
  434. arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
  435. except OSError:
  436. return False
  437. def revision_control_diff():
  438. """Return diff from revision control system."""
  439. for check, diff in REVISION_CONTROL:
  440. if check_command_status(check):
  441. return subprocess.Popen(diff, stdout=subprocess.PIPE).stdout
  442. def decode(line):
  443. """Decode UTF-8 if necessary."""
  444. try:
  445. return line.decode('utf-8')
  446. except AttributeError:
  447. return line
  448. def main():
  449. import optparse
  450. supported_vcs = [check[0] for check, _ in REVISION_CONTROL]
  451. usage = '%prog [options] [diff]'
  452. description= ('View colored, incremental diff in %s workspace, or diff '
  453. 'from given file or stdin, with side by side and auto '
  454. 'pager support') % '/'.join(supported_vcs)
  455. parser = optparse.OptionParser(usage=usage, description=description,
  456. version='%%prog %s' % __version__)
  457. parser.add_option('-s', '--side-by-side', action='store_true',
  458. help=('show in side-by-side mode'))
  459. parser.add_option('-w', '--width', type='int', default=80, metavar='N',
  460. help='set text width (side-by-side mode only), default is 80')
  461. opts, args = parser.parse_args()
  462. if len(args) >= 1:
  463. if IS_PY3:
  464. # Python3 needs the newline='' to keep '\r' (DOS format)
  465. diff_hdl = open(args[0], mode='rt', newline='')
  466. else:
  467. diff_hdl = open(args[0], mode='rt')
  468. elif sys.stdin.isatty():
  469. diff_hdl = revision_control_diff()
  470. if not diff_hdl:
  471. sys.stderr.write(('*** Not in a supported workspace, supported '
  472. 'are: %s\n\n') % ', '.join(supported_vcs))
  473. parser.print_help()
  474. return 1
  475. else:
  476. diff_hdl = sys.stdin
  477. # FIXME: can't use generator for now due to current implementation in parser
  478. stream = [decode(line) for line in diff_hdl.readlines()]
  479. # Don't let empty diff pass thru
  480. if not stream:
  481. return 0
  482. if diff_hdl is not sys.stdin:
  483. diff_hdl.close()
  484. if sys.stdout.isatty():
  485. try:
  486. markup_to_pager(stream, opts)
  487. except IOError:
  488. e = sys.exc_info()[1]
  489. if e.errno == errno.EPIPE:
  490. pass
  491. else:
  492. # pipe out stream untouched to make sure it is still a patch
  493. sys.stdout.write(''.join(stream))
  494. return 0
  495. if __name__ == '__main__':
  496. sys.exit(main())
  497. # vim:set et sts=4 sw=4 tw=80: