cdiff.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. View incremental, colored diff in unified format or in side by side mode with
  5. auto pager. Requires Python (>= 2.5.0) and less.
  6. See demo at homepage: https://github.com/ymattw/cdiff
  7. """
  8. import sys
  9. if sys.hexversion < 0x02050000:
  10. sys.stderr.write("ERROR: requires python >= 2.5.0\n")
  11. sys.exit(1)
  12. IS_PY3 = sys.hexversion >= 0x03000000
  13. import os
  14. import re
  15. import errno
  16. import difflib
  17. COLORS = {
  18. 'reset' : '\x1b[0m',
  19. 'underline' : '\x1b[4m',
  20. 'reverse' : '\x1b[7m',
  21. 'red' : '\x1b[31m',
  22. 'green' : '\x1b[32m',
  23. 'yellow' : '\x1b[33m',
  24. 'blue' : '\x1b[34m',
  25. 'magenta' : '\x1b[35m',
  26. 'cyan' : '\x1b[36m',
  27. 'lightred' : '\x1b[1;31m',
  28. 'lightgreen' : '\x1b[1;32m',
  29. 'lightyellow' : '\x1b[1;33m',
  30. 'lightblue' : '\x1b[1;34m',
  31. 'lightmagenta' : '\x1b[1;35m',
  32. 'lightcyan' : '\x1b[1;36m',
  33. }
  34. def ansi_code(color):
  35. return COLORS.get(color, '')
  36. def colorize(text, start_color, end_color='reset'):
  37. return ansi_code(start_color) + text + ansi_code(end_color)
  38. class Hunk(object):
  39. def __init__(self, hunk_header, old_addr, new_addr):
  40. self._hunk_header = hunk_header
  41. self._old_addr = old_addr # tuple (start, offset)
  42. self._new_addr = new_addr # tuple (start, offset)
  43. self._hunk_list = [] # list of tuple (attr, line)
  44. def get_header(self):
  45. return self._hunk_header
  46. def get_old_addr(self):
  47. return self._old_addr
  48. def get_new_addr(self):
  49. return self._new_addr
  50. def append(self, attr, line):
  51. """attr: '-': old, '+': new, ' ': common"""
  52. self._hunk_list.append((attr, line))
  53. def mdiff(self):
  54. """The difflib._mdiff() function returns an interator which returns a
  55. tuple: (from line tuple, to line tuple, boolean flag)
  56. from/to line tuple -- (line num, line text)
  57. line num -- integer or None (to indicate a context separation)
  58. line text -- original line text with following markers inserted:
  59. '\0+' -- marks start of added text
  60. '\0-' -- marks start of deleted text
  61. '\0^' -- marks start of changed text
  62. '\1' -- marks end of added/deleted/changed text
  63. boolean flag -- None indicates context separation, True indicates
  64. either "from" or "to" line contains a change, otherwise False.
  65. """
  66. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  67. def _get_old_text(self):
  68. out = []
  69. for (attr, line) in self._hunk_list:
  70. if attr != '+':
  71. out.append(line)
  72. return out
  73. def _get_new_text(self):
  74. out = []
  75. for (attr, line) in self._hunk_list:
  76. if attr != '-':
  77. out.append(line)
  78. return out
  79. def __iter__(self):
  80. for hunk_line in self._hunk_list:
  81. yield hunk_line
  82. class Diff(object):
  83. def __init__(self, headers, old_path, new_path, hunks):
  84. self._headers = headers
  85. self._old_path = old_path
  86. self._new_path = new_path
  87. self._hunks = hunks
  88. def markup_traditional(self):
  89. """Returns a generator"""
  90. for line in self._headers:
  91. yield self._markup_header(line)
  92. yield self._markup_old_path(self._old_path)
  93. yield self._markup_new_path(self._new_path)
  94. for hunk in self._hunks:
  95. yield self._markup_hunk_header(hunk.get_header())
  96. for old, new, changed in hunk.mdiff():
  97. if changed:
  98. if not old[0]:
  99. # The '+' char after \x00 is kept
  100. # DEBUG: yield 'NEW: %s %s\n' % (old, new)
  101. line = new[1].strip('\x00\x01')
  102. yield self._markup_new(line)
  103. elif not new[0]:
  104. # The '-' char after \x00 is kept
  105. # DEBUG: yield 'OLD: %s %s\n' % (old, new)
  106. line = old[1].strip('\x00\x01')
  107. yield self._markup_old(line)
  108. else:
  109. # DEBUG: yield 'CHG: %s %s\n' % (old, new)
  110. yield self._markup_old('-') + \
  111. self._markup_old_mix(old[1])
  112. yield self._markup_new('+') + \
  113. self._markup_new_mix(new[1])
  114. else:
  115. yield self._markup_common(' ' + old[1])
  116. def markup_side_by_side(self, width):
  117. """Returns a generator"""
  118. def _normalize(line):
  119. return line.replace('\t', ' '*8).replace('\n', '').replace('\r', '')
  120. def _fit_width(markup, width, pad=False):
  121. """str len does not count correctly if left column contains ansi
  122. color code. Only left side need to set `pad`
  123. """
  124. out = []
  125. count = 0
  126. ansi_color_regex = r'\x1b\[(1;)?\d{1,2}m'
  127. patt = re.compile('^(%s)(.*)' % ansi_color_regex)
  128. repl = re.compile(ansi_color_regex)
  129. while markup and count < width:
  130. if patt.match(markup):
  131. out.append(patt.sub(r'\1', markup))
  132. markup = patt.sub(r'\3', markup)
  133. else:
  134. # FIXME: utf-8 wchar broken here
  135. out.append(markup[0])
  136. markup = markup[1:]
  137. count += 1
  138. if count == width and repl.sub('', markup):
  139. # stripped: output fulfil and still have ascii in markup
  140. out[-1] = ansi_code('reset') + colorize('>', 'lightmagenta')
  141. elif count < width and pad:
  142. pad_len = width - count
  143. out.append('%*s' % (pad_len, ''))
  144. return ''.join(out)
  145. # Setup line width and number width
  146. if width <= 0:
  147. width = 80
  148. (start, offset) = self._hunks[-1].get_old_addr()
  149. max1 = start + offset - 1
  150. (start, offset) = self._hunks[-1].get_new_addr()
  151. max2 = start + offset - 1
  152. num_width = max(len(str(max1)), len(str(max2)))
  153. left_num_fmt = colorize('%%(left_num)%ds' % num_width, 'yellow')
  154. right_num_fmt = colorize('%%(right_num)%ds' % num_width, 'yellow')
  155. line_fmt = left_num_fmt + ' %(left)s ' + ansi_code('reset') + \
  156. right_num_fmt + ' %(right)s\n'
  157. # yield header, old path and new path
  158. for line in self._headers:
  159. yield self._markup_header(line)
  160. yield self._markup_old_path(self._old_path)
  161. yield self._markup_new_path(self._new_path)
  162. # yield hunks
  163. for hunk in self._hunks:
  164. yield self._markup_hunk_header(hunk.get_header())
  165. for old, new, changed in hunk.mdiff():
  166. if old[0]:
  167. left_num = str(hunk.get_old_addr()[0] + int(old[0]) - 1)
  168. else:
  169. left_num = ' '
  170. if new[0]:
  171. right_num = str(hunk.get_new_addr()[0] + int(new[0]) - 1)
  172. else:
  173. right_num = ' '
  174. left = _normalize(old[1])
  175. right = _normalize(new[1])
  176. if changed:
  177. if not old[0]:
  178. left = '%*s' % (width, ' ')
  179. right = right.lstrip('\x00+').rstrip('\x01')
  180. right = _fit_width(self._markup_new(right), width)
  181. elif not new[0]:
  182. left = left.lstrip('\x00-').rstrip('\x01')
  183. left = _fit_width(self._markup_old(left), width)
  184. right = ''
  185. else:
  186. left = _fit_width(self._markup_old_mix(left), width, 1)
  187. right = _fit_width(self._markup_new_mix(right), width)
  188. else:
  189. left = _fit_width(self._markup_common(left), width, 1)
  190. right = _fit_width(self._markup_common(right), width)
  191. yield line_fmt % {
  192. 'left_num': left_num,
  193. 'left': left,
  194. 'right_num': right_num,
  195. 'right': right
  196. }
  197. def _markup_header(self, line):
  198. return colorize(line, 'cyan')
  199. def _markup_old_path(self, line):
  200. return colorize(line, 'yellow')
  201. def _markup_new_path(self, line):
  202. return colorize(line, 'yellow')
  203. def _markup_hunk_header(self, line):
  204. return colorize(line, 'lightblue')
  205. def _markup_common(self, line):
  206. return colorize(line, 'reset')
  207. def _markup_old(self, line):
  208. return colorize(line, 'lightred')
  209. def _markup_new(self, line):
  210. return colorize(line, 'lightgreen')
  211. def _markup_mix(self, line, base_color):
  212. del_code = ansi_code('reverse') + ansi_code(base_color)
  213. add_code = ansi_code('reverse') + ansi_code(base_color)
  214. chg_code = ansi_code('underline') + ansi_code(base_color)
  215. rst_code = ansi_code('reset') + ansi_code(base_color)
  216. line = line.replace('\x00-', del_code)
  217. line = line.replace('\x00+', add_code)
  218. line = line.replace('\x00^', chg_code)
  219. line = line.replace('\x01', rst_code)
  220. return colorize(line, base_color)
  221. def _markup_old_mix(self, line):
  222. return self._markup_mix(line, 'red')
  223. def _markup_new_mix(self, line):
  224. return self._markup_mix(line, 'green')
  225. class Udiff(Diff):
  226. @staticmethod
  227. def is_old_path(line):
  228. return line.startswith('--- ')
  229. @staticmethod
  230. def is_new_path(line):
  231. return line.startswith('+++ ')
  232. @staticmethod
  233. def is_hunk_header(line):
  234. return line.startswith('@@ -')
  235. @staticmethod
  236. def is_old(line):
  237. return line.startswith('-') and not Udiff.is_old_path(line)
  238. @staticmethod
  239. def is_new(line):
  240. return line.startswith('+') and not Udiff.is_new_path(line)
  241. @staticmethod
  242. def is_common(line):
  243. return line.startswith(' ')
  244. @staticmethod
  245. def is_eof(line):
  246. # \ No newline at end of file
  247. return line.startswith('\\')
  248. @staticmethod
  249. def is_header(line):
  250. return re.match(r'^[^+@\\ -]', line)
  251. class DiffParser(object):
  252. def __init__(self, stream):
  253. for line in stream[:20]:
  254. if line.startswith('+++ '):
  255. self._type = 'udiff'
  256. break
  257. else:
  258. raise RuntimeError('unknown diff type')
  259. try:
  260. self._diffs = self._parse(stream)
  261. except (AssertionError, IndexError):
  262. raise RuntimeError('invalid patch format')
  263. def get_diffs(self):
  264. return self._diffs
  265. def _parse(self, stream):
  266. if self._type == 'udiff':
  267. return self._parse_udiff(stream)
  268. else:
  269. raise RuntimeError('unsupported diff format')
  270. def _parse_udiff(self, stream):
  271. """parse all diff lines here, construct a list of Udiff objects"""
  272. out_diffs = []
  273. headers = []
  274. old_path = None
  275. new_path = None
  276. hunks = []
  277. hunk = None
  278. while stream:
  279. # 'common' line occurs before 'old_path' is considered as header
  280. # too, this happens with `git log -p` and `git show <commit>`
  281. #
  282. if Udiff.is_header(stream[0]) or \
  283. (Udiff.is_common(stream[0]) and old_path is None):
  284. if headers and old_path:
  285. # Encounter a new header
  286. assert new_path is not None
  287. assert hunk is not None
  288. hunks.append(hunk)
  289. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  290. headers = []
  291. old_path = None
  292. new_path = None
  293. hunks = []
  294. hunk = None
  295. else:
  296. headers.append(stream.pop(0))
  297. elif Udiff.is_old_path(stream[0]):
  298. if old_path:
  299. # Encounter a new patch set
  300. assert new_path is not None
  301. assert hunk is not None
  302. hunks.append(hunk)
  303. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  304. headers = []
  305. old_path = None
  306. new_path = None
  307. hunks = []
  308. hunk = None
  309. else:
  310. old_path = stream.pop(0)
  311. elif Udiff.is_new_path(stream[0]):
  312. assert old_path is not None
  313. assert new_path is None
  314. new_path = stream.pop(0)
  315. elif Udiff.is_hunk_header(stream[0]):
  316. assert old_path is not None
  317. assert new_path is not None
  318. if hunk:
  319. # Encounter a new hunk header
  320. hunks.append(hunk)
  321. hunk = None
  322. else:
  323. # @@ -3,7 +3,6 @@
  324. hunk_header = stream.pop(0)
  325. a = hunk_header.split()[1].split(',') # -3 7
  326. old_addr = (int(a[0][1:]), int(a[1]))
  327. b = hunk_header.split()[2].split(',') # +3 6
  328. if len(b) > 1:
  329. new_addr = (int(b[0][1:]), int(b[1]))
  330. else:
  331. # @@ -0,0 +1 @@
  332. new_addr = (int(b[0][1:]), 0)
  333. hunk = Hunk(hunk_header, old_addr, new_addr)
  334. elif Udiff.is_old(stream[0]) or Udiff.is_new(stream[0]) or \
  335. Udiff.is_common(stream[0]):
  336. assert old_path is not None
  337. assert new_path is not None
  338. assert hunk is not None
  339. hunk_line = stream.pop(0)
  340. hunk.append(hunk_line[0], hunk_line[1:])
  341. elif Udiff.is_eof(stream[0]):
  342. # ignore
  343. stream.pop(0)
  344. else:
  345. raise RuntimeError('unknown patch format: %s' % stream[0])
  346. # The last patch
  347. if hunk:
  348. hunks.append(hunk)
  349. if old_path:
  350. if new_path:
  351. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  352. else:
  353. raise RuntimeError('unknown patch format after "%s"' % old_path)
  354. elif headers:
  355. raise RuntimeError('unknown patch format: %s' % \
  356. ('\n'.join(headers)))
  357. return out_diffs
  358. class DiffMarkup(object):
  359. def __init__(self, stream):
  360. self._diffs = DiffParser(stream).get_diffs()
  361. def markup(self, side_by_side=False, width=0):
  362. """Returns a generator"""
  363. if side_by_side:
  364. return self._markup_side_by_side(width)
  365. else:
  366. return self._markup_traditional()
  367. def _markup_traditional(self):
  368. for diff in self._diffs:
  369. for line in diff.markup_traditional():
  370. yield line
  371. def _markup_side_by_side(self, width):
  372. for diff in self._diffs:
  373. for line in diff.markup_side_by_side(width):
  374. yield line
  375. def markup_to_pager(stream):
  376. markup = DiffMarkup(stream)
  377. color_diff = markup.markup(side_by_side=opts.side_by_side,
  378. width=opts.width)
  379. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  380. pager = subprocess.Popen(['less', '-FRSXK'],
  381. stdin=subprocess.PIPE, stdout=sys.stdout)
  382. for line in color_diff:
  383. pager.stdin.write(line.encode('utf-8'))
  384. pager.stdin.close()
  385. pager.wait()
  386. if __name__ == '__main__':
  387. import optparse
  388. import subprocess
  389. usage = '''%s [options] [diff]''' % os.path.basename(sys.argv[0])
  390. description= ('''View incremental, colored diff in unified format or '''
  391. '''in side by side mode with auto pager, read stdin if '''
  392. '''diff (patch) file is not given''')
  393. parser = optparse.OptionParser(usage=usage, description=description)
  394. parser.add_option('-s', '--side-by-side', action='store_true',
  395. help=('show in side-by-side mode'))
  396. parser.add_option('-w', '--width', type='int', default=80, metavar='N',
  397. help='set text width (side-by-side mode only), default is 80')
  398. opts, args = parser.parse_args()
  399. if len(args) >= 1:
  400. if IS_PY3:
  401. # Python3 needs the newline='' to keep '\r' (DOS format)
  402. diff_hdl = open(args[0], mode='rt', newline='')
  403. else:
  404. diff_hdl = open(args[0], mode='rt')
  405. elif sys.stdin.isatty():
  406. parser.print_help()
  407. sys.exit(1)
  408. else:
  409. diff_hdl = sys.stdin
  410. # FIXME: can't use generator for now due to current implementation in parser
  411. stream = diff_hdl.readlines()
  412. # Don't let empty diff pass thru
  413. if not stream:
  414. sys.exit(0)
  415. if diff_hdl is not sys.stdin:
  416. diff_hdl.close()
  417. if sys.stdout.isatty():
  418. try:
  419. markup_to_pager(stream)
  420. except IOError:
  421. e = sys.exc_info()[1]
  422. if e.errno == errno.EPIPE:
  423. pass
  424. else:
  425. # pipe out stream untouched to make sure it is still a patch
  426. sys.stdout.write(''.join(stream))
  427. sys.exit(0)
  428. # vim:set et sts=4 sw=4 tw=80: