cdiff.py 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. View colored, incremental diff in unified format or in side by side mode with
  5. auto pager. Requires Python (>= 2.5.0) and less.
  6. See demo at homepage: https://github.com/ymattw/cdiff
  7. """
  8. import sys
  9. if sys.hexversion < 0x02050000:
  10. sys.stderr.write("*** Requires python >= 2.5.0\n")
  11. sys.exit(1)
  12. IS_PY3 = sys.hexversion >= 0x03000000
  13. import os
  14. import re
  15. import subprocess
  16. import errno
  17. import difflib
  18. COLORS = {
  19. 'reset' : '\x1b[0m',
  20. 'underline' : '\x1b[4m',
  21. 'reverse' : '\x1b[7m',
  22. 'red' : '\x1b[31m',
  23. 'green' : '\x1b[32m',
  24. 'yellow' : '\x1b[33m',
  25. 'blue' : '\x1b[34m',
  26. 'magenta' : '\x1b[35m',
  27. 'cyan' : '\x1b[36m',
  28. 'lightred' : '\x1b[1;31m',
  29. 'lightgreen' : '\x1b[1;32m',
  30. 'lightyellow' : '\x1b[1;33m',
  31. 'lightblue' : '\x1b[1;34m',
  32. 'lightmagenta' : '\x1b[1;35m',
  33. 'lightcyan' : '\x1b[1;36m',
  34. }
  35. # Keys for checking and values for diffing.
  36. REVISION_CONTROL = (
  37. (['git', 'rev-parse'], ['git', 'diff']),
  38. (['svn', 'info'], ['svn', 'diff']),
  39. (['hg', 'summary'], ['hg', 'diff'])
  40. )
  41. def ansi_code(color):
  42. return COLORS.get(color, '')
  43. def colorize(text, start_color, end_color='reset'):
  44. return ansi_code(start_color) + text + ansi_code(end_color)
  45. class Hunk(object):
  46. def __init__(self, hunk_header, old_addr, new_addr):
  47. self._hunk_header = hunk_header
  48. self._old_addr = old_addr # tuple (start, offset)
  49. self._new_addr = new_addr # tuple (start, offset)
  50. self._hunk_list = [] # list of tuple (attr, line)
  51. def get_header(self):
  52. return self._hunk_header
  53. def get_old_addr(self):
  54. return self._old_addr
  55. def get_new_addr(self):
  56. return self._new_addr
  57. def append(self, attr, line):
  58. """attr: '-': old, '+': new, ' ': common"""
  59. self._hunk_list.append((attr, line))
  60. def mdiff(self):
  61. r"""The difflib._mdiff() function returns an interator which returns a
  62. tuple: (from line tuple, to line tuple, boolean flag)
  63. from/to line tuple -- (line num, line text)
  64. line num -- integer or None (to indicate a context separation)
  65. line text -- original line text with following markers inserted:
  66. '\0+' -- marks start of added text
  67. '\0-' -- marks start of deleted text
  68. '\0^' -- marks start of changed text
  69. '\1' -- marks end of added/deleted/changed text
  70. boolean flag -- None indicates context separation, True indicates
  71. either "from" or "to" line contains a change, otherwise False.
  72. """
  73. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  74. def _get_old_text(self):
  75. out = []
  76. for (attr, line) in self._hunk_list:
  77. if attr != '+':
  78. out.append(line)
  79. return out
  80. def _get_new_text(self):
  81. out = []
  82. for (attr, line) in self._hunk_list:
  83. if attr != '-':
  84. out.append(line)
  85. return out
  86. def __iter__(self):
  87. for hunk_line in self._hunk_list:
  88. yield hunk_line
  89. class Diff(object):
  90. def __init__(self, headers, old_path, new_path, hunks):
  91. self._headers = headers
  92. self._old_path = old_path
  93. self._new_path = new_path
  94. self._hunks = hunks
  95. def markup_traditional(self):
  96. """Returns a generator"""
  97. for line in self._headers:
  98. yield self._markup_header(line)
  99. yield self._markup_old_path(self._old_path)
  100. yield self._markup_new_path(self._new_path)
  101. for hunk in self._hunks:
  102. yield self._markup_hunk_header(hunk.get_header())
  103. for old, new, changed in hunk.mdiff():
  104. if changed:
  105. if not old[0]:
  106. # The '+' char after \x00 is kept
  107. # DEBUG: yield 'NEW: %s %s\n' % (old, new)
  108. line = new[1].strip('\x00\x01')
  109. yield self._markup_new(line)
  110. elif not new[0]:
  111. # The '-' char after \x00 is kept
  112. # DEBUG: yield 'OLD: %s %s\n' % (old, new)
  113. line = old[1].strip('\x00\x01')
  114. yield self._markup_old(line)
  115. else:
  116. # DEBUG: yield 'CHG: %s %s\n' % (old, new)
  117. yield self._markup_old('-') + \
  118. self._markup_old_mix(old[1])
  119. yield self._markup_new('+') + \
  120. self._markup_new_mix(new[1])
  121. else:
  122. yield self._markup_common(' ' + old[1])
  123. def markup_side_by_side(self, width):
  124. """Returns a generator"""
  125. def _normalize(line):
  126. return line.replace('\t', ' '*8).replace('\n', '').replace('\r', '')
  127. def _fit_width(markup, width, pad=False):
  128. """str len does not count correctly if left column contains ansi
  129. color code. Only left side need to set `pad`
  130. """
  131. out = []
  132. count = 0
  133. ansi_color_regex = r'\x1b\[(1;)?\d{1,2}m'
  134. patt = re.compile('^(%s)(.*)' % ansi_color_regex)
  135. repl = re.compile(ansi_color_regex)
  136. while markup and count < width:
  137. if patt.match(markup):
  138. out.append(patt.sub(r'\1', markup))
  139. markup = patt.sub(r'\3', markup)
  140. else:
  141. # FIXME: utf-8 wchar broken here
  142. out.append(markup[0])
  143. markup = markup[1:]
  144. count += 1
  145. if count == width and repl.sub('', markup):
  146. # stripped: output fulfil and still have ascii in markup
  147. out[-1] = ansi_code('reset') + colorize('>', 'lightmagenta')
  148. elif count < width and pad:
  149. pad_len = width - count
  150. out.append('%*s' % (pad_len, ''))
  151. return ''.join(out)
  152. # Setup line width and number width
  153. if width <= 0:
  154. width = 80
  155. (start, offset) = self._hunks[-1].get_old_addr()
  156. max1 = start + offset - 1
  157. (start, offset) = self._hunks[-1].get_new_addr()
  158. max2 = start + offset - 1
  159. num_width = max(len(str(max1)), len(str(max2)))
  160. left_num_fmt = colorize('%%(left_num)%ds' % num_width, 'yellow')
  161. right_num_fmt = colorize('%%(right_num)%ds' % num_width, 'yellow')
  162. line_fmt = left_num_fmt + ' %(left)s ' + ansi_code('reset') + \
  163. right_num_fmt + ' %(right)s\n'
  164. # yield header, old path and new path
  165. for line in self._headers:
  166. yield self._markup_header(line)
  167. yield self._markup_old_path(self._old_path)
  168. yield self._markup_new_path(self._new_path)
  169. # yield hunks
  170. for hunk in self._hunks:
  171. yield self._markup_hunk_header(hunk.get_header())
  172. for old, new, changed in hunk.mdiff():
  173. if old[0]:
  174. left_num = str(hunk.get_old_addr()[0] + int(old[0]) - 1)
  175. else:
  176. left_num = ' '
  177. if new[0]:
  178. right_num = str(hunk.get_new_addr()[0] + int(new[0]) - 1)
  179. else:
  180. right_num = ' '
  181. left = _normalize(old[1])
  182. right = _normalize(new[1])
  183. if changed:
  184. if not old[0]:
  185. left = '%*s' % (width, ' ')
  186. right = right.lstrip('\x00+').rstrip('\x01')
  187. right = _fit_width(self._markup_new(right), width)
  188. elif not new[0]:
  189. left = left.lstrip('\x00-').rstrip('\x01')
  190. left = _fit_width(self._markup_old(left), width)
  191. right = ''
  192. else:
  193. left = _fit_width(self._markup_old_mix(left), width, 1)
  194. right = _fit_width(self._markup_new_mix(right), width)
  195. else:
  196. left = _fit_width(self._markup_common(left), width, 1)
  197. right = _fit_width(self._markup_common(right), width)
  198. yield line_fmt % {
  199. 'left_num': left_num,
  200. 'left': left,
  201. 'right_num': right_num,
  202. 'right': right
  203. }
  204. def _markup_header(self, line):
  205. return colorize(line, 'cyan')
  206. def _markup_old_path(self, line):
  207. return colorize(line, 'yellow')
  208. def _markup_new_path(self, line):
  209. return colorize(line, 'yellow')
  210. def _markup_hunk_header(self, line):
  211. return colorize(line, 'lightblue')
  212. def _markup_common(self, line):
  213. return colorize(line, 'reset')
  214. def _markup_old(self, line):
  215. return colorize(line, 'lightred')
  216. def _markup_new(self, line):
  217. return colorize(line, 'lightgreen')
  218. def _markup_mix(self, line, base_color):
  219. del_code = ansi_code('reverse') + ansi_code(base_color)
  220. add_code = ansi_code('reverse') + ansi_code(base_color)
  221. chg_code = ansi_code('underline') + ansi_code(base_color)
  222. rst_code = ansi_code('reset') + ansi_code(base_color)
  223. line = line.replace('\x00-', del_code)
  224. line = line.replace('\x00+', add_code)
  225. line = line.replace('\x00^', chg_code)
  226. line = line.replace('\x01', rst_code)
  227. return colorize(line, base_color)
  228. def _markup_old_mix(self, line):
  229. return self._markup_mix(line, 'red')
  230. def _markup_new_mix(self, line):
  231. return self._markup_mix(line, 'green')
  232. class Udiff(Diff):
  233. @staticmethod
  234. def is_old_path(line):
  235. return line.startswith('--- ')
  236. @staticmethod
  237. def is_new_path(line):
  238. return line.startswith('+++ ')
  239. @staticmethod
  240. def is_hunk_header(line):
  241. return line.startswith('@@ -')
  242. @staticmethod
  243. def is_old(line):
  244. return line.startswith('-') and not Udiff.is_old_path(line)
  245. @staticmethod
  246. def is_new(line):
  247. return line.startswith('+') and not Udiff.is_new_path(line)
  248. @staticmethod
  249. def is_common(line):
  250. return line.startswith(' ')
  251. @staticmethod
  252. def is_eof(line):
  253. # \ No newline at end of file
  254. return line.startswith('\\')
  255. @staticmethod
  256. def is_header(line):
  257. return re.match(r'^[^+@\\ -]', line)
  258. class DiffParser(object):
  259. def __init__(self, stream):
  260. for line in stream[:20]:
  261. if line.startswith('+++ '):
  262. self._type = 'udiff'
  263. break
  264. else:
  265. raise RuntimeError('unknown diff type')
  266. try:
  267. self._diffs = self._parse(stream)
  268. except (AssertionError, IndexError):
  269. raise RuntimeError('invalid patch format')
  270. def get_diffs(self):
  271. return self._diffs
  272. def _parse(self, stream):
  273. if self._type == 'udiff':
  274. return self._parse_udiff(stream)
  275. else:
  276. raise RuntimeError('unsupported diff format')
  277. def _parse_udiff(self, stream):
  278. """parse all diff lines here, construct a list of Udiff objects"""
  279. out_diffs = []
  280. headers = []
  281. old_path = None
  282. new_path = None
  283. hunks = []
  284. hunk = None
  285. while stream:
  286. # 'common' line occurs before 'old_path' is considered as header
  287. # too, this happens with `git log -p` and `git show <commit>`
  288. #
  289. if Udiff.is_header(stream[0]) or \
  290. (Udiff.is_common(stream[0]) and old_path is None):
  291. if headers and old_path:
  292. # Encounter a new header
  293. assert new_path is not None
  294. assert hunk is not None
  295. hunks.append(hunk)
  296. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  297. headers = []
  298. old_path = None
  299. new_path = None
  300. hunks = []
  301. hunk = None
  302. else:
  303. headers.append(stream.pop(0))
  304. elif Udiff.is_old_path(stream[0]):
  305. if old_path:
  306. # Encounter a new patch set
  307. assert new_path is not None
  308. assert hunk is not None
  309. hunks.append(hunk)
  310. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  311. headers = []
  312. old_path = None
  313. new_path = None
  314. hunks = []
  315. hunk = None
  316. else:
  317. old_path = stream.pop(0)
  318. elif Udiff.is_new_path(stream[0]):
  319. assert old_path is not None
  320. assert new_path is None
  321. new_path = stream.pop(0)
  322. elif Udiff.is_hunk_header(stream[0]):
  323. assert old_path is not None
  324. assert new_path is not None
  325. if hunk:
  326. # Encounter a new hunk header
  327. hunks.append(hunk)
  328. hunk = None
  329. else:
  330. # @@ -3,7 +3,6 @@
  331. hunk_header = stream.pop(0)
  332. a = hunk_header.split()[1].split(',') # -3 7
  333. if len(a) > 1:
  334. old_addr = (int(a[0][1:]), int(a[1]))
  335. else:
  336. # @@ -1 +1,2 @@
  337. old_addr = (int(a[0][1:]), 0)
  338. b = hunk_header.split()[2].split(',') # +3 6
  339. if len(b) > 1:
  340. new_addr = (int(b[0][1:]), int(b[1]))
  341. else:
  342. # @@ -0,0 +1 @@
  343. new_addr = (int(b[0][1:]), 0)
  344. hunk = Hunk(hunk_header, old_addr, new_addr)
  345. elif Udiff.is_old(stream[0]) or Udiff.is_new(stream[0]) or \
  346. Udiff.is_common(stream[0]):
  347. assert old_path is not None
  348. assert new_path is not None
  349. assert hunk is not None
  350. hunk_line = stream.pop(0)
  351. hunk.append(hunk_line[0], hunk_line[1:])
  352. elif Udiff.is_eof(stream[0]):
  353. # ignore
  354. stream.pop(0)
  355. else:
  356. raise RuntimeError('unknown patch format: %s' % stream[0])
  357. # The last patch
  358. if hunk:
  359. hunks.append(hunk)
  360. if old_path:
  361. if new_path:
  362. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  363. else:
  364. raise RuntimeError('unknown patch format after "%s"' % old_path)
  365. elif headers:
  366. raise RuntimeError('unknown patch format: %s' % \
  367. ('\n'.join(headers)))
  368. return out_diffs
  369. class DiffMarkup(object):
  370. def __init__(self, stream):
  371. self._diffs = DiffParser(stream).get_diffs()
  372. def markup(self, side_by_side=False, width=0):
  373. """Returns a generator"""
  374. if side_by_side:
  375. return self._markup_side_by_side(width)
  376. else:
  377. return self._markup_traditional()
  378. def _markup_traditional(self):
  379. for diff in self._diffs:
  380. for line in diff.markup_traditional():
  381. yield line
  382. def _markup_side_by_side(self, width):
  383. for diff in self._diffs:
  384. for line in diff.markup_side_by_side(width):
  385. yield line
  386. def markup_to_pager(stream, opts):
  387. markup = DiffMarkup(stream)
  388. color_diff = markup.markup(side_by_side=opts.side_by_side,
  389. width=opts.width)
  390. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  391. pager = subprocess.Popen(['less', '-FRSXK'],
  392. stdin=subprocess.PIPE, stdout=sys.stdout)
  393. for line in color_diff:
  394. pager.stdin.write(line.encode('utf-8'))
  395. pager.stdin.close()
  396. pager.wait()
  397. def check_command_status(arguments):
  398. """Return True if command returns 0."""
  399. try:
  400. return subprocess.call(
  401. arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
  402. except OSError:
  403. return False
  404. def revision_control_diff():
  405. """Return diff from revision control system."""
  406. for check, diff in REVISION_CONTROL:
  407. if check_command_status(check):
  408. return subprocess.Popen(diff, stdout=subprocess.PIPE).stdout
  409. def decode(line):
  410. """Decode UTF-8 if necessary."""
  411. try:
  412. return line.decode('utf-8')
  413. except AttributeError:
  414. return line
  415. def main():
  416. import optparse
  417. supported_vcs = [check[0] for check, _ in REVISION_CONTROL]
  418. usage = '%s [options] [diff]' % os.path.basename(sys.argv[0])
  419. description= ('View colored, incremental diff in unified format or '
  420. 'side by side with auto pager. Read diff from diff '
  421. '(patch) file if given, or stdin if redirected, or '
  422. 'diff produced by revision tool if in a %s workspace') \
  423. % '/'.join(supported_vcs)
  424. parser = optparse.OptionParser(usage=usage, description=description)
  425. parser.add_option('-s', '--side-by-side', action='store_true',
  426. help=('show in side-by-side mode'))
  427. parser.add_option('-w', '--width', type='int', default=80, metavar='N',
  428. help='set text width (side-by-side mode only), default is 80')
  429. opts, args = parser.parse_args()
  430. if len(args) >= 1:
  431. if IS_PY3:
  432. # Python3 needs the newline='' to keep '\r' (DOS format)
  433. diff_hdl = open(args[0], mode='rt', newline='')
  434. else:
  435. diff_hdl = open(args[0], mode='rt')
  436. elif sys.stdin.isatty():
  437. diff_hdl = revision_control_diff()
  438. if not diff_hdl:
  439. sys.stderr.write(('*** Not in a supported workspace, supported '
  440. 'are: %s\n\n') % ', '.join(supported_vcs))
  441. parser.print_help()
  442. return 1
  443. else:
  444. diff_hdl = sys.stdin
  445. # FIXME: can't use generator for now due to current implementation in parser
  446. stream = [decode(line) for line in diff_hdl.readlines()]
  447. # Don't let empty diff pass thru
  448. if not stream:
  449. return 0
  450. if diff_hdl is not sys.stdin:
  451. diff_hdl.close()
  452. if sys.stdout.isatty():
  453. try:
  454. markup_to_pager(stream, opts)
  455. except IOError:
  456. e = sys.exc_info()[1]
  457. if e.errno == errno.EPIPE:
  458. pass
  459. else:
  460. # pipe out stream untouched to make sure it is still a patch
  461. sys.stdout.write(''.join(stream))
  462. return 0
  463. if __name__ == '__main__':
  464. sys.exit(main())
  465. # vim:set et sts=4 sw=4 tw=80: