cdiff.py 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. View incremental, colored diff in unified format or in side by side mode with
  5. auto pager. Requires Python (>= 2.5.0) and less.
  6. See demo at homepage: https://github.com/ymattw/cdiff
  7. """
  8. import sys
  9. if sys.hexversion < 0x02050000:
  10. sys.stderr.write("ERROR: requires python >= 2.5.0\n")
  11. sys.exit(1)
  12. IS_PY3 = sys.hexversion >= 0x03000000
  13. import os
  14. import re
  15. import subprocess
  16. import errno
  17. import difflib
  18. COLORS = {
  19. 'reset' : '\x1b[0m',
  20. 'underline' : '\x1b[4m',
  21. 'reverse' : '\x1b[7m',
  22. 'red' : '\x1b[31m',
  23. 'green' : '\x1b[32m',
  24. 'yellow' : '\x1b[33m',
  25. 'blue' : '\x1b[34m',
  26. 'magenta' : '\x1b[35m',
  27. 'cyan' : '\x1b[36m',
  28. 'lightred' : '\x1b[1;31m',
  29. 'lightgreen' : '\x1b[1;32m',
  30. 'lightyellow' : '\x1b[1;33m',
  31. 'lightblue' : '\x1b[1;34m',
  32. 'lightmagenta' : '\x1b[1;35m',
  33. 'lightcyan' : '\x1b[1;36m',
  34. }
  35. def ansi_code(color):
  36. return COLORS.get(color, '')
  37. def colorize(text, start_color, end_color='reset'):
  38. return ansi_code(start_color) + text + ansi_code(end_color)
  39. class Hunk(object):
  40. def __init__(self, hunk_header, old_addr, new_addr):
  41. self._hunk_header = hunk_header
  42. self._old_addr = old_addr # tuple (start, offset)
  43. self._new_addr = new_addr # tuple (start, offset)
  44. self._hunk_list = [] # list of tuple (attr, line)
  45. def get_header(self):
  46. return self._hunk_header
  47. def get_old_addr(self):
  48. return self._old_addr
  49. def get_new_addr(self):
  50. return self._new_addr
  51. def append(self, attr, line):
  52. """attr: '-': old, '+': new, ' ': common"""
  53. self._hunk_list.append((attr, line))
  54. def mdiff(self):
  55. """The difflib._mdiff() function returns an interator which returns a
  56. tuple: (from line tuple, to line tuple, boolean flag)
  57. from/to line tuple -- (line num, line text)
  58. line num -- integer or None (to indicate a context separation)
  59. line text -- original line text with following markers inserted:
  60. '\0+' -- marks start of added text
  61. '\0-' -- marks start of deleted text
  62. '\0^' -- marks start of changed text
  63. '\1' -- marks end of added/deleted/changed text
  64. boolean flag -- None indicates context separation, True indicates
  65. either "from" or "to" line contains a change, otherwise False.
  66. """
  67. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  68. def _get_old_text(self):
  69. out = []
  70. for (attr, line) in self._hunk_list:
  71. if attr != '+':
  72. out.append(line)
  73. return out
  74. def _get_new_text(self):
  75. out = []
  76. for (attr, line) in self._hunk_list:
  77. if attr != '-':
  78. out.append(line)
  79. return out
  80. def __iter__(self):
  81. for hunk_line in self._hunk_list:
  82. yield hunk_line
  83. class Diff(object):
  84. def __init__(self, headers, old_path, new_path, hunks):
  85. self._headers = headers
  86. self._old_path = old_path
  87. self._new_path = new_path
  88. self._hunks = hunks
  89. def markup_traditional(self):
  90. """Returns a generator"""
  91. for line in self._headers:
  92. yield self._markup_header(line)
  93. yield self._markup_old_path(self._old_path)
  94. yield self._markup_new_path(self._new_path)
  95. for hunk in self._hunks:
  96. yield self._markup_hunk_header(hunk.get_header())
  97. for old, new, changed in hunk.mdiff():
  98. if changed:
  99. if not old[0]:
  100. # The '+' char after \x00 is kept
  101. # DEBUG: yield 'NEW: %s %s\n' % (old, new)
  102. line = new[1].strip('\x00\x01')
  103. yield self._markup_new(line)
  104. elif not new[0]:
  105. # The '-' char after \x00 is kept
  106. # DEBUG: yield 'OLD: %s %s\n' % (old, new)
  107. line = old[1].strip('\x00\x01')
  108. yield self._markup_old(line)
  109. else:
  110. # DEBUG: yield 'CHG: %s %s\n' % (old, new)
  111. yield self._markup_old('-') + \
  112. self._markup_old_mix(old[1])
  113. yield self._markup_new('+') + \
  114. self._markup_new_mix(new[1])
  115. else:
  116. yield self._markup_common(' ' + old[1])
  117. def markup_side_by_side(self, width):
  118. """Returns a generator"""
  119. def _normalize(line):
  120. return line.replace('\t', ' '*8).replace('\n', '').replace('\r', '')
  121. def _fit_width(markup, width, pad=False):
  122. """str len does not count correctly if left column contains ansi
  123. color code. Only left side need to set `pad`
  124. """
  125. out = []
  126. count = 0
  127. ansi_color_regex = r'\x1b\[(1;)?\d{1,2}m'
  128. patt = re.compile('^(%s)(.*)' % ansi_color_regex)
  129. repl = re.compile(ansi_color_regex)
  130. while markup and count < width:
  131. if patt.match(markup):
  132. out.append(patt.sub(r'\1', markup))
  133. markup = patt.sub(r'\3', markup)
  134. else:
  135. # FIXME: utf-8 wchar broken here
  136. out.append(markup[0])
  137. markup = markup[1:]
  138. count += 1
  139. if count == width and repl.sub('', markup):
  140. # stripped: output fulfil and still have ascii in markup
  141. out[-1] = ansi_code('reset') + colorize('>', 'lightmagenta')
  142. elif count < width and pad:
  143. pad_len = width - count
  144. out.append('%*s' % (pad_len, ''))
  145. return ''.join(out)
  146. # Setup line width and number width
  147. if width <= 0:
  148. width = 80
  149. (start, offset) = self._hunks[-1].get_old_addr()
  150. max1 = start + offset - 1
  151. (start, offset) = self._hunks[-1].get_new_addr()
  152. max2 = start + offset - 1
  153. num_width = max(len(str(max1)), len(str(max2)))
  154. left_num_fmt = colorize('%%(left_num)%ds' % num_width, 'yellow')
  155. right_num_fmt = colorize('%%(right_num)%ds' % num_width, 'yellow')
  156. line_fmt = left_num_fmt + ' %(left)s ' + ansi_code('reset') + \
  157. right_num_fmt + ' %(right)s\n'
  158. # yield header, old path and new path
  159. for line in self._headers:
  160. yield self._markup_header(line)
  161. yield self._markup_old_path(self._old_path)
  162. yield self._markup_new_path(self._new_path)
  163. # yield hunks
  164. for hunk in self._hunks:
  165. yield self._markup_hunk_header(hunk.get_header())
  166. for old, new, changed in hunk.mdiff():
  167. if old[0]:
  168. left_num = str(hunk.get_old_addr()[0] + int(old[0]) - 1)
  169. else:
  170. left_num = ' '
  171. if new[0]:
  172. right_num = str(hunk.get_new_addr()[0] + int(new[0]) - 1)
  173. else:
  174. right_num = ' '
  175. left = _normalize(old[1])
  176. right = _normalize(new[1])
  177. if changed:
  178. if not old[0]:
  179. left = '%*s' % (width, ' ')
  180. right = right.lstrip('\x00+').rstrip('\x01')
  181. right = _fit_width(self._markup_new(right), width)
  182. elif not new[0]:
  183. left = left.lstrip('\x00-').rstrip('\x01')
  184. left = _fit_width(self._markup_old(left), width)
  185. right = ''
  186. else:
  187. left = _fit_width(self._markup_old_mix(left), width, 1)
  188. right = _fit_width(self._markup_new_mix(right), width)
  189. else:
  190. left = _fit_width(self._markup_common(left), width, 1)
  191. right = _fit_width(self._markup_common(right), width)
  192. yield line_fmt % {
  193. 'left_num': left_num,
  194. 'left': left,
  195. 'right_num': right_num,
  196. 'right': right
  197. }
  198. def _markup_header(self, line):
  199. return colorize(line, 'cyan')
  200. def _markup_old_path(self, line):
  201. return colorize(line, 'yellow')
  202. def _markup_new_path(self, line):
  203. return colorize(line, 'yellow')
  204. def _markup_hunk_header(self, line):
  205. return colorize(line, 'lightblue')
  206. def _markup_common(self, line):
  207. return colorize(line, 'reset')
  208. def _markup_old(self, line):
  209. return colorize(line, 'lightred')
  210. def _markup_new(self, line):
  211. return colorize(line, 'lightgreen')
  212. def _markup_mix(self, line, base_color):
  213. del_code = ansi_code('reverse') + ansi_code(base_color)
  214. add_code = ansi_code('reverse') + ansi_code(base_color)
  215. chg_code = ansi_code('underline') + ansi_code(base_color)
  216. rst_code = ansi_code('reset') + ansi_code(base_color)
  217. line = line.replace('\x00-', del_code)
  218. line = line.replace('\x00+', add_code)
  219. line = line.replace('\x00^', chg_code)
  220. line = line.replace('\x01', rst_code)
  221. return colorize(line, base_color)
  222. def _markup_old_mix(self, line):
  223. return self._markup_mix(line, 'red')
  224. def _markup_new_mix(self, line):
  225. return self._markup_mix(line, 'green')
  226. class Udiff(Diff):
  227. @staticmethod
  228. def is_old_path(line):
  229. return line.startswith('--- ')
  230. @staticmethod
  231. def is_new_path(line):
  232. return line.startswith('+++ ')
  233. @staticmethod
  234. def is_hunk_header(line):
  235. return line.startswith('@@ -')
  236. @staticmethod
  237. def is_old(line):
  238. return line.startswith('-') and not Udiff.is_old_path(line)
  239. @staticmethod
  240. def is_new(line):
  241. return line.startswith('+') and not Udiff.is_new_path(line)
  242. @staticmethod
  243. def is_common(line):
  244. return line.startswith(' ')
  245. @staticmethod
  246. def is_eof(line):
  247. # \ No newline at end of file
  248. return line.startswith('\\')
  249. @staticmethod
  250. def is_header(line):
  251. return re.match(r'^[^+@\\ -]', line)
  252. class DiffParser(object):
  253. def __init__(self, stream):
  254. for line in stream[:20]:
  255. if line.startswith('+++ '):
  256. self._type = 'udiff'
  257. break
  258. else:
  259. raise RuntimeError('unknown diff type')
  260. try:
  261. self._diffs = self._parse(stream)
  262. except (AssertionError, IndexError):
  263. raise RuntimeError('invalid patch format')
  264. def get_diffs(self):
  265. return self._diffs
  266. def _parse(self, stream):
  267. if self._type == 'udiff':
  268. return self._parse_udiff(stream)
  269. else:
  270. raise RuntimeError('unsupported diff format')
  271. def _parse_udiff(self, stream):
  272. """parse all diff lines here, construct a list of Udiff objects"""
  273. out_diffs = []
  274. headers = []
  275. old_path = None
  276. new_path = None
  277. hunks = []
  278. hunk = None
  279. while stream:
  280. # 'common' line occurs before 'old_path' is considered as header
  281. # too, this happens with `git log -p` and `git show <commit>`
  282. #
  283. if Udiff.is_header(stream[0]) or \
  284. (Udiff.is_common(stream[0]) and old_path is None):
  285. if headers and old_path:
  286. # Encounter a new header
  287. assert new_path is not None
  288. assert hunk is not None
  289. hunks.append(hunk)
  290. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  291. headers = []
  292. old_path = None
  293. new_path = None
  294. hunks = []
  295. hunk = None
  296. else:
  297. headers.append(stream.pop(0))
  298. elif Udiff.is_old_path(stream[0]):
  299. if old_path:
  300. # Encounter a new patch set
  301. assert new_path is not None
  302. assert hunk is not None
  303. hunks.append(hunk)
  304. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  305. headers = []
  306. old_path = None
  307. new_path = None
  308. hunks = []
  309. hunk = None
  310. else:
  311. old_path = stream.pop(0)
  312. elif Udiff.is_new_path(stream[0]):
  313. assert old_path is not None
  314. assert new_path is None
  315. new_path = stream.pop(0)
  316. elif Udiff.is_hunk_header(stream[0]):
  317. assert old_path is not None
  318. assert new_path is not None
  319. if hunk:
  320. # Encounter a new hunk header
  321. hunks.append(hunk)
  322. hunk = None
  323. else:
  324. # @@ -3,7 +3,6 @@
  325. hunk_header = stream.pop(0)
  326. a = hunk_header.split()[1].split(',') # -3 7
  327. old_addr = (int(a[0][1:]), int(a[1]))
  328. b = hunk_header.split()[2].split(',') # +3 6
  329. if len(b) > 1:
  330. new_addr = (int(b[0][1:]), int(b[1]))
  331. else:
  332. # @@ -0,0 +1 @@
  333. new_addr = (int(b[0][1:]), 0)
  334. hunk = Hunk(hunk_header, old_addr, new_addr)
  335. elif Udiff.is_old(stream[0]) or Udiff.is_new(stream[0]) or \
  336. Udiff.is_common(stream[0]):
  337. assert old_path is not None
  338. assert new_path is not None
  339. assert hunk is not None
  340. hunk_line = stream.pop(0)
  341. hunk.append(hunk_line[0], hunk_line[1:])
  342. elif Udiff.is_eof(stream[0]):
  343. # ignore
  344. stream.pop(0)
  345. else:
  346. raise RuntimeError('unknown patch format: %s' % stream[0])
  347. # The last patch
  348. if hunk:
  349. hunks.append(hunk)
  350. if old_path:
  351. if new_path:
  352. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  353. else:
  354. raise RuntimeError('unknown patch format after "%s"' % old_path)
  355. elif headers:
  356. raise RuntimeError('unknown patch format: %s' % \
  357. ('\n'.join(headers)))
  358. return out_diffs
  359. class DiffMarkup(object):
  360. def __init__(self, stream):
  361. self._diffs = DiffParser(stream).get_diffs()
  362. def markup(self, side_by_side=False, width=0):
  363. """Returns a generator"""
  364. if side_by_side:
  365. return self._markup_side_by_side(width)
  366. else:
  367. return self._markup_traditional()
  368. def _markup_traditional(self):
  369. for diff in self._diffs:
  370. for line in diff.markup_traditional():
  371. yield line
  372. def _markup_side_by_side(self, width):
  373. for diff in self._diffs:
  374. for line in diff.markup_side_by_side(width):
  375. yield line
  376. def markup_to_pager(stream):
  377. markup = DiffMarkup(stream)
  378. color_diff = markup.markup(side_by_side=opts.side_by_side,
  379. width=opts.width)
  380. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  381. pager = subprocess.Popen(['less', '-FRSXK'],
  382. stdin=subprocess.PIPE, stdout=sys.stdout)
  383. for line in color_diff:
  384. pager.stdin.write(line.encode('utf-8'))
  385. pager.stdin.close()
  386. pager.wait()
  387. def check_command_status(arguments):
  388. """Return True if command returns 0."""
  389. try:
  390. return subprocess.call(
  391. arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
  392. except OSError:
  393. return False
  394. def revision_control_diff(path):
  395. """Return diff from revision control system."""
  396. if check_command_status(['git', 'rev-parse']):
  397. return subprocess.Popen(['git', 'diff'], stdout=subprocess.PIPE).stdout
  398. elif check_command_status(['svn', 'info']):
  399. return subprocess.Popen(['svn', 'diff'], stdout=subprocess.PIPE).stdout
  400. elif check_command_status(['hg', 'summary']):
  401. return subprocess.Popen(['hg', 'diff'], stdout=subprocess.PIPE).stdout
  402. def decode(line):
  403. """Decode UTF-8 if necessary."""
  404. try:
  405. return line.decode('utf-8')
  406. except AttributeError:
  407. return line
  408. if __name__ == '__main__':
  409. import optparse
  410. usage = '''%s [options] [diff]''' % os.path.basename(sys.argv[0])
  411. description= ('''View incremental, colored diff in unified format or '''
  412. '''in side by side mode with auto pager, read stdin if '''
  413. '''diff (patch) file is not given''')
  414. parser = optparse.OptionParser(usage=usage, description=description)
  415. parser.add_option('-s', '--side-by-side', action='store_true',
  416. help=('show in side-by-side mode'))
  417. parser.add_option('-w', '--width', type='int', default=80, metavar='N',
  418. help='set text width (side-by-side mode only), default is 80')
  419. opts, args = parser.parse_args()
  420. if len(args) >= 1:
  421. if IS_PY3:
  422. # Python3 needs the newline='' to keep '\r' (DOS format)
  423. diff_hdl = open(args[0], mode='rt', newline='')
  424. else:
  425. diff_hdl = open(args[0], mode='rt')
  426. elif sys.stdin.isatty():
  427. diff_hdl = revision_control_diff(os.getcwd())
  428. if not diff_hdl:
  429. parser.print_help()
  430. sys.exit(1)
  431. else:
  432. diff_hdl = sys.stdin
  433. # FIXME: can't use generator for now due to current implementation in parser
  434. stream = [decode(l) for l in diff_hdl.readlines()]
  435. # Don't let empty diff pass thru
  436. if not stream:
  437. sys.exit(0)
  438. if diff_hdl is not sys.stdin:
  439. diff_hdl.close()
  440. if sys.stdout.isatty():
  441. try:
  442. markup_to_pager(stream)
  443. except IOError:
  444. e = sys.exc_info()[1]
  445. if e.errno == errno.EPIPE:
  446. pass
  447. else:
  448. # pipe out stream untouched to make sure it is still a patch
  449. sys.stdout.write(''.join(stream))
  450. sys.exit(0)
  451. # vim:set et sts=4 sw=4 tw=80: