cdiff.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. View colored diff in unified-diff format or side-by-side mode with auto pager.
  5. Requires Python (>= 2.5.0) and less.
  6. See demo at homepage: https://github.com/ymattw/cdiff
  7. """
  8. import sys
  9. if sys.hexversion < 0x02050000:
  10. sys.stderr.write("ERROR: requires python >= 2.5.0\n")
  11. sys.exit(1)
  12. import os
  13. import re
  14. import errno
  15. import difflib
  16. COLORS = {
  17. 'reset' : '\x1b[0m',
  18. 'underline' : '\x1b[4m',
  19. 'reverse' : '\x1b[7m',
  20. 'red' : '\x1b[31m',
  21. 'green' : '\x1b[32m',
  22. 'yellow' : '\x1b[33m',
  23. 'blue' : '\x1b[34m',
  24. 'magenta' : '\x1b[35m',
  25. 'cyan' : '\x1b[36m',
  26. 'lightred' : '\x1b[1;31m',
  27. 'lightgreen' : '\x1b[1;32m',
  28. 'lightyellow' : '\x1b[1;33m',
  29. 'lightblue' : '\x1b[1;34m',
  30. 'lightmagenta' : '\x1b[1;35m',
  31. 'lightcyan' : '\x1b[1;36m',
  32. }
  33. def ansi_code(color):
  34. return COLORS.get(color, '')
  35. def colorize(text, start_color, end_color='reset'):
  36. return ansi_code(start_color) + text + ansi_code(end_color)
  37. class Hunk(object):
  38. def __init__(self, hunk_header, old_addr, new_addr):
  39. self._hunk_header = hunk_header
  40. self._old_addr = old_addr # tuple (start, offset)
  41. self._new_addr = new_addr # tuple group (start, offset)
  42. self._hunk_list = [] # list of tuple (attr, line)
  43. def get_header(self):
  44. return self._hunk_header
  45. def get_old_addr(self):
  46. return self._old_addr
  47. def get_new_addr(self):
  48. return self._new_addr
  49. def append(self, attr, line):
  50. """attr: '-': old, '+': new, ' ': common"""
  51. self._hunk_list.append((attr, line))
  52. def mdiff(self):
  53. """The difflib._mdiff() function returns an interator which returns a
  54. tuple: (from line tuple, to line tuple, boolean flag)
  55. from/to line tuple -- (line num, line text)
  56. line num -- integer or None (to indicate a context separation)
  57. line text -- original line text with following markers inserted:
  58. '\0+' -- marks start of added text
  59. '\0-' -- marks start of deleted text
  60. '\0^' -- marks start of changed text
  61. '\1' -- marks end of added/deleted/changed text
  62. boolean flag -- None indicates context separation, True indicates
  63. either "from" or "to" line contains a change, otherwise False.
  64. """
  65. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  66. def _get_old_text(self):
  67. out = []
  68. for (attr, line) in self._hunk_list:
  69. if attr != '+':
  70. out.append(line)
  71. return out
  72. def _get_new_text(self):
  73. out = []
  74. for (attr, line) in self._hunk_list:
  75. if attr != '-':
  76. out.append(line)
  77. return out
  78. def __iter__(self):
  79. for hunk_line in self._hunk_list:
  80. yield hunk_line
  81. class Diff(object):
  82. def __init__(self, headers, old_path, new_path, hunks):
  83. self._headers = headers
  84. self._old_path = old_path
  85. self._new_path = new_path
  86. self._hunks = hunks
  87. def markup_traditional(self):
  88. """Returns a generator"""
  89. for line in self._headers:
  90. yield self._markup_header(line)
  91. yield self._markup_old_path(self._old_path)
  92. yield self._markup_new_path(self._new_path)
  93. for hunk in self._hunks:
  94. yield self._markup_hunk_header(hunk.get_header())
  95. for old, new, changed in hunk.mdiff():
  96. if changed:
  97. if not old[0]:
  98. # The '+' char after \x00 is kept
  99. # DEBUG: yield 'NEW: %s %s\n' % (old, new)
  100. line = new[1].strip('\x00\x01')
  101. yield self._markup_new(line)
  102. elif not new[0]:
  103. # The '-' char after \x00 is kept
  104. # DEBUG: yield 'OLD: %s %s\n' % (old, new)
  105. line = old[1].strip('\x00\x01')
  106. yield self._markup_old(line)
  107. else:
  108. # DEBUG: yield 'CHG: %s %s\n' % (old, new)
  109. yield self._markup_old('-') + \
  110. self._markup_old_mix(old[1])
  111. yield self._markup_new('+') + \
  112. self._markup_new_mix(new[1])
  113. else:
  114. yield self._markup_common(' ' + old[1])
  115. def markup_side_by_side(self, width):
  116. """Returns a generator"""
  117. def _normalize(line):
  118. return line.replace('\t', ' '*8).replace('\n', '').replace('\r', '')
  119. def _fit_width(markup, width, pad=False):
  120. """str len does not count correctly if left column contains ansi
  121. color code. Only left side need to set `pad`
  122. """
  123. out = []
  124. count = 0
  125. ansi_color_regex = r'\x1b\[(1;)?\d{1,2}m'
  126. patt = re.compile('^(%s)(.*)' % ansi_color_regex)
  127. repl = re.compile(ansi_color_regex)
  128. while markup and count < width:
  129. if patt.match(markup):
  130. out.append(patt.sub(r'\1', markup))
  131. markup = patt.sub(r'\3', markup)
  132. else:
  133. # FIXME: utf-8 char broken here
  134. out.append(markup[0])
  135. markup = markup[1:]
  136. count += 1
  137. if count == width and repl.sub('', markup):
  138. # stripped: output fulfil and still have ascii in markup
  139. out[-1] = ansi_code('reset') + colorize('>', 'lightmagenta')
  140. elif count < width and pad:
  141. pad_len = width - count
  142. out.append('%*s' % (pad_len, ''))
  143. return ''.join(out)
  144. # Setup line width and number width
  145. if width <= 0:
  146. width = 80
  147. (start, offset) = self._hunks[-1].get_old_addr()
  148. max1 = start + offset - 1
  149. (start, offset) = self._hunks[-1].get_new_addr()
  150. max2 = start + offset - 1
  151. num_width = max(len(str(max1)), len(str(max2)))
  152. left_num_fmt = colorize('%%(left_num)%ds' % num_width, 'yellow')
  153. right_num_fmt = colorize('%%(right_num)%ds' % num_width, 'yellow')
  154. line_fmt = left_num_fmt + ' %(left)s ' + ansi_code('reset') + \
  155. right_num_fmt + ' %(right)s\n'
  156. # yield header, old path and new path
  157. for line in self._headers:
  158. yield self._markup_header(line)
  159. yield self._markup_old_path(self._old_path)
  160. yield self._markup_new_path(self._new_path)
  161. # yield hunks
  162. for hunk in self._hunks:
  163. yield self._markup_hunk_header(hunk.get_header())
  164. for old, new, changed in hunk.mdiff():
  165. if old[0]:
  166. left_num = str(hunk.get_old_addr()[0] + int(old[0]) - 1)
  167. else:
  168. left_num = ' '
  169. if new[0]:
  170. right_num = str(hunk.get_new_addr()[0] + int(new[0]) - 1)
  171. else:
  172. right_num = ' '
  173. left = _normalize(old[1])
  174. right = _normalize(new[1])
  175. if changed:
  176. if not old[0]:
  177. left = '%*s' % (width, ' ')
  178. right = right.lstrip('\x00+').rstrip('\x01')
  179. right = _fit_width(self._markup_new(right), width)
  180. elif not new[0]:
  181. left = left.lstrip('\x00-').rstrip('\x01')
  182. left = _fit_width(self._markup_old(left), width)
  183. right = ''
  184. else:
  185. left = _fit_width(self._markup_old_mix(left), width, 1)
  186. right = _fit_width(self._markup_new_mix(right), width)
  187. else:
  188. left = _fit_width(self._markup_common(left), width, 1)
  189. right = _fit_width(self._markup_common(right), width)
  190. yield line_fmt % {
  191. 'left_num': left_num,
  192. 'left': left,
  193. 'right_num': right_num,
  194. 'right': right
  195. }
  196. def _markup_header(self, line):
  197. return colorize(line, 'cyan')
  198. def _markup_old_path(self, line):
  199. return colorize(line, 'yellow')
  200. def _markup_new_path(self, line):
  201. return colorize(line, 'yellow')
  202. def _markup_hunk_header(self, line):
  203. return colorize(line, 'lightblue')
  204. def _markup_common(self, line):
  205. return colorize(line, 'reset')
  206. def _markup_old(self, line):
  207. return colorize(line, 'lightred')
  208. def _markup_new(self, line):
  209. return colorize(line, 'lightgreen')
  210. def _markup_mix(self, line, base_color):
  211. del_code = ansi_code('reverse') + ansi_code(base_color)
  212. add_code = ansi_code('reverse') + ansi_code(base_color)
  213. chg_code = ansi_code('underline') + ansi_code(base_color)
  214. rst_code = ansi_code('reset') + ansi_code(base_color)
  215. line = line.replace('\x00-', del_code)
  216. line = line.replace('\x00+', add_code)
  217. line = line.replace('\x00^', chg_code)
  218. line = line.replace('\x01', rst_code)
  219. return colorize(line, base_color)
  220. def _markup_old_mix(self, line):
  221. return self._markup_mix(line, 'red')
  222. def _markup_new_mix(self, line):
  223. return self._markup_mix(line, 'green')
  224. class Udiff(Diff):
  225. @staticmethod
  226. def is_old_path(line):
  227. return line.startswith('--- ')
  228. @staticmethod
  229. def is_new_path(line):
  230. return line.startswith('+++ ')
  231. @staticmethod
  232. def is_hunk_header(line):
  233. return line.startswith('@@ -')
  234. @staticmethod
  235. def is_old(line):
  236. return line.startswith('-') and not Udiff.is_old_path(line)
  237. @staticmethod
  238. def is_new(line):
  239. return line.startswith('+') and not Udiff.is_new_path(line)
  240. @staticmethod
  241. def is_common(line):
  242. return line.startswith(' ')
  243. @staticmethod
  244. def is_eof(line):
  245. # \ No newline at end of file
  246. return line.startswith('\\')
  247. @staticmethod
  248. def is_header(line):
  249. return re.match(r'^[^+@\\ -]', line)
  250. class DiffParser(object):
  251. def __init__(self, stream):
  252. for line in stream[:20]:
  253. if line.startswith('+++ '):
  254. self._type = 'udiff'
  255. break
  256. else:
  257. raise RuntimeError('unknown diff type')
  258. try:
  259. self._diffs = self._parse(stream)
  260. except (AssertionError, IndexError):
  261. raise RuntimeError('invalid patch format')
  262. def get_diffs(self):
  263. return self._diffs
  264. def _parse(self, stream):
  265. if self._type == 'udiff':
  266. return self._parse_udiff(stream)
  267. else:
  268. raise RuntimeError('unsupported diff format')
  269. def _parse_udiff(self, stream):
  270. """parse all diff lines here, construct a list of Udiff objects"""
  271. out_diffs = []
  272. headers = []
  273. old_path = None
  274. new_path = None
  275. hunks = []
  276. hunk = None
  277. while stream:
  278. # 'common' line occurs before 'old_path' is considered as header
  279. # too, this happens with `git log -p` and `git show <commit>`
  280. #
  281. if Udiff.is_header(stream[0]) or \
  282. (Udiff.is_common(stream[0]) and old_path is None):
  283. if headers and old_path:
  284. # Encounter a new header
  285. assert new_path is not None
  286. assert hunk is not None
  287. hunks.append(hunk)
  288. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  289. headers = []
  290. old_path = None
  291. new_path = None
  292. hunks = []
  293. hunk = None
  294. else:
  295. headers.append(stream.pop(0))
  296. elif Udiff.is_old_path(stream[0]):
  297. if old_path:
  298. # Encounter a new patch set
  299. assert new_path is not None
  300. assert hunk is not None
  301. hunks.append(hunk)
  302. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  303. headers = []
  304. old_path = None
  305. new_path = None
  306. hunks = []
  307. hunk = None
  308. else:
  309. old_path = stream.pop(0)
  310. elif Udiff.is_new_path(stream[0]):
  311. assert old_path is not None
  312. assert new_path is None
  313. new_path = stream.pop(0)
  314. elif Udiff.is_hunk_header(stream[0]):
  315. assert old_path is not None
  316. assert new_path is not None
  317. if hunk:
  318. # Encounter a new hunk header
  319. hunks.append(hunk)
  320. hunk = None
  321. else:
  322. # @@ -3,7 +3,6 @@
  323. hunk_header = stream.pop(0)
  324. a = hunk_header.split()[1].split(',') # -3 7
  325. old_addr = (int(a[0][1:]), int(a[1]))
  326. b = hunk_header.split()[2].split(',') # +3 6
  327. new_addr = (int(b[0][1:]), int(b[1]))
  328. hunk = Hunk(hunk_header, old_addr, new_addr)
  329. elif Udiff.is_old(stream[0]) or Udiff.is_new(stream[0]) or \
  330. Udiff.is_common(stream[0]):
  331. assert old_path is not None
  332. assert new_path is not None
  333. assert hunk is not None
  334. hunk_line = stream.pop(0)
  335. hunk.append(hunk_line[0], hunk_line[1:])
  336. elif Udiff.is_eof(stream[0]):
  337. # ignore
  338. stream.pop(0)
  339. else:
  340. raise RuntimeError('unknown patch format: %s' % stream[0])
  341. # The last patch
  342. if hunk:
  343. hunks.append(hunk)
  344. if old_path:
  345. if new_path:
  346. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  347. else:
  348. raise RuntimeError('unknown patch format after "%s"' % old_path)
  349. elif headers:
  350. raise RuntimeError('unknown patch format: %s' % \
  351. ('\n'.join(headers)))
  352. return out_diffs
  353. class DiffMarkup(object):
  354. def __init__(self, stream):
  355. self._diffs = DiffParser(stream).get_diffs()
  356. def markup(self, side_by_side=False, width=0):
  357. """Returns a generator"""
  358. if side_by_side:
  359. return self._markup_side_by_side(width)
  360. else:
  361. return self._markup_traditional()
  362. def _markup_traditional(self):
  363. for diff in self._diffs:
  364. for line in diff.markup_traditional():
  365. yield line
  366. def _markup_side_by_side(self, width):
  367. for diff in self._diffs:
  368. for line in diff.markup_side_by_side(width):
  369. yield line
  370. if __name__ == '__main__':
  371. import optparse
  372. import subprocess
  373. usage = '''
  374. %(prog)s [options] [diff]
  375. View diff (patch) file if given, otherwise read stdin''' % \
  376. {'prog': os.path.basename(sys.argv[0])}
  377. parser = optparse.OptionParser(usage)
  378. parser.add_option('-s', '--side-by-side', action='store_true',
  379. help=('show in side-by-side mode'))
  380. parser.add_option('-w', '--width', type='int', default=80,
  381. help='set line width (side-by-side mode only), default is 80')
  382. opts, args = parser.parse_args()
  383. if len(args) >= 1:
  384. diff_hdl = open(args[0], 'r')
  385. elif sys.stdin.isatty():
  386. sys.stderr.write('Try --help option for usage\n')
  387. sys.exit(1)
  388. else:
  389. diff_hdl = sys.stdin
  390. # FIXME: can't use generator for now due to current implementation in parser
  391. stream = diff_hdl.readlines()
  392. # Don't let empty diff pass thru
  393. if not stream:
  394. sys.exit(0)
  395. if diff_hdl is not sys.stdin:
  396. diff_hdl.close()
  397. if sys.stdout.isatty():
  398. markup = DiffMarkup(stream)
  399. color_diff = markup.markup(side_by_side=opts.side_by_side,
  400. width=opts.width)
  401. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  402. pager = subprocess.Popen(['less', '-FRSXK'],
  403. stdin=subprocess.PIPE, stdout=sys.stdout)
  404. try:
  405. for line in color_diff:
  406. pager.stdin.write(line)
  407. except IOError as e:
  408. if e.errno == errno.EPIPE:
  409. pass
  410. pager.stdin.close()
  411. pager.wait()
  412. else:
  413. # pipe out stream untouched to make sure it is still a patch
  414. sys.stdout.write(''.join(stream))
  415. sys.exit(0)
  416. # vim:set et sts=4 sw=4 tw=80: