cdiff.py 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Term based tool to view **colored**, **incremental** diff in *git/svn/hg*
  5. workspace, given patch or two files, or from stdin, with **side by side** and
  6. **auto pager** support. Requires python (>= 2.5.0) and ``less``.
  7. """
  8. META_INFO = {
  9. 'version' : '0.5',
  10. 'license' : 'BSD-3',
  11. 'author' : 'Matthew Wang',
  12. 'email' : 'mattwyl(@)gmail(.)com',
  13. 'url' : 'https://github.com/ymattw/cdiff',
  14. 'keywords' : 'colored incremental side-by-side diff',
  15. 'description' : ('View colored, incremental diff in workspace, given patch '
  16. 'or two files, or from stdin, with side by side and auto '
  17. 'pager support')
  18. }
  19. import sys
  20. if sys.hexversion < 0x02050000:
  21. raise SystemExit("*** Requires python >= 2.5.0")
  22. IS_PY3 = sys.hexversion >= 0x03000000
  23. import os
  24. import re
  25. import subprocess
  26. import errno
  27. import difflib
  28. COLORS = {
  29. 'reset' : '\x1b[0m',
  30. 'underline' : '\x1b[4m',
  31. 'reverse' : '\x1b[7m',
  32. 'red' : '\x1b[31m',
  33. 'green' : '\x1b[32m',
  34. 'yellow' : '\x1b[33m',
  35. 'blue' : '\x1b[34m',
  36. 'magenta' : '\x1b[35m',
  37. 'cyan' : '\x1b[36m',
  38. 'lightred' : '\x1b[1;31m',
  39. 'lightgreen' : '\x1b[1;32m',
  40. 'lightyellow' : '\x1b[1;33m',
  41. 'lightblue' : '\x1b[1;34m',
  42. 'lightmagenta' : '\x1b[1;35m',
  43. 'lightcyan' : '\x1b[1;36m',
  44. }
  45. # Keys for checking and values for diffing.
  46. REVISION_CONTROL = (
  47. (['git', 'rev-parse'], ['git', 'diff']),
  48. (['svn', 'info'], ['svn', 'diff']),
  49. (['hg', 'summary'], ['hg', 'diff'])
  50. )
  51. def ansi_code(color):
  52. return COLORS.get(color, '')
  53. def colorize(text, start_color, end_color='reset'):
  54. return ansi_code(start_color) + text + ansi_code(end_color)
  55. class Hunk(object):
  56. def __init__(self, hunk_headers, hunk_meta, old_addr, new_addr):
  57. self._hunk_headers = hunk_headers
  58. self._hunk_meta = hunk_meta
  59. self._old_addr = old_addr # tuple (start, offset)
  60. self._new_addr = new_addr # tuple (start, offset)
  61. self._hunk_list = [] # list of tuple (attr, line)
  62. def get_hunk_headers(self):
  63. return self._hunk_headers
  64. def get_hunk_meta(self):
  65. return self._hunk_meta
  66. def get_old_addr(self):
  67. return self._old_addr
  68. def get_new_addr(self):
  69. return self._new_addr
  70. def append(self, attr, line):
  71. """attr: '-': old, '+': new, ' ': common"""
  72. self._hunk_list.append((attr, line))
  73. def mdiff(self):
  74. r"""The difflib._mdiff() function returns an interator which returns a
  75. tuple: (from line tuple, to line tuple, boolean flag)
  76. from/to line tuple -- (line num, line text)
  77. line num -- integer or None (to indicate a context separation)
  78. line text -- original line text with following markers inserted:
  79. '\0+' -- marks start of added text
  80. '\0-' -- marks start of deleted text
  81. '\0^' -- marks start of changed text
  82. '\1' -- marks end of added/deleted/changed text
  83. boolean flag -- None indicates context separation, True indicates
  84. either "from" or "to" line contains a change, otherwise False.
  85. """
  86. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  87. def _get_old_text(self):
  88. out = []
  89. for (attr, line) in self._hunk_list:
  90. if attr != '+':
  91. out.append(line)
  92. return out
  93. def _get_new_text(self):
  94. out = []
  95. for (attr, line) in self._hunk_list:
  96. if attr != '-':
  97. out.append(line)
  98. return out
  99. def __iter__(self):
  100. for hunk_line in self._hunk_list:
  101. yield hunk_line
  102. class Diff(object):
  103. def __init__(self, headers, old_path, new_path, hunks):
  104. self._headers = headers
  105. self._old_path = old_path
  106. self._new_path = new_path
  107. self._hunks = hunks
  108. # Follow detector and the parse_hunk_meta() are suppose to be overwritten
  109. # by derived class. No is_header() anymore, all non-recognized lines are
  110. # considered as headers
  111. #
  112. def is_old_path(self, line):
  113. return False
  114. def is_new_path(self, line):
  115. return False
  116. def is_hunk_meta(self, line):
  117. return False
  118. def parse_hunk_meta(self, line):
  119. """Returns a 2-eliment tuple, each of them is a tuple in form of (start,
  120. offset)"""
  121. return False
  122. def is_old(self, line):
  123. return False
  124. def is_new(self, line):
  125. return False
  126. def is_common(self, line):
  127. return False
  128. def is_eof(self, line):
  129. return False
  130. def markup_traditional(self):
  131. """Returns a generator"""
  132. for line in self._headers:
  133. yield self._markup_header(line)
  134. yield self._markup_old_path(self._old_path)
  135. yield self._markup_new_path(self._new_path)
  136. for hunk in self._hunks:
  137. for hunk_header in hunk.get_hunk_headers():
  138. yield self._markup_hunk_header(hunk_header)
  139. yield self._markup_hunk_meta(hunk.get_hunk_meta())
  140. for old, new, changed in hunk.mdiff():
  141. if changed:
  142. if not old[0]:
  143. # The '+' char after \x00 is kept
  144. # DEBUG: yield 'NEW: %s %s\n' % (old, new)
  145. line = new[1].strip('\x00\x01')
  146. yield self._markup_new(line)
  147. elif not new[0]:
  148. # The '-' char after \x00 is kept
  149. # DEBUG: yield 'OLD: %s %s\n' % (old, new)
  150. line = old[1].strip('\x00\x01')
  151. yield self._markup_old(line)
  152. else:
  153. # DEBUG: yield 'CHG: %s %s\n' % (old, new)
  154. yield self._markup_old('-') + \
  155. self._markup_old_mix(old[1])
  156. yield self._markup_new('+') + \
  157. self._markup_new_mix(new[1])
  158. else:
  159. yield self._markup_common(' ' + old[1])
  160. def markup_side_by_side(self, width):
  161. """Returns a generator"""
  162. def _normalize(line):
  163. return line.replace('\t', ' '*8).replace('\n', '').replace('\r', '')
  164. def _fit_width(markup, width, pad=False):
  165. """str len does not count correctly if left column contains ansi
  166. color code. Only left side need to set `pad`
  167. """
  168. out = []
  169. count = 0
  170. ansi_color_regex = r'\x1b\[(1;)?\d{1,2}m'
  171. patt = re.compile('^(%s)(.*)' % ansi_color_regex)
  172. repl = re.compile(ansi_color_regex)
  173. while markup and count < width:
  174. if patt.match(markup):
  175. out.append(patt.sub(r'\1', markup))
  176. markup = patt.sub(r'\3', markup)
  177. else:
  178. # FIXME: utf-8 wchar might break the rule here, e.g.
  179. # u'\u554a' takes double width of a single letter, also this
  180. # depends on your terminal font. I guess audience of this
  181. # tool never put that kind of symbol in their code :-)
  182. #
  183. out.append(markup[0])
  184. count += 1
  185. markup = markup[1:]
  186. if count == width and repl.sub('', markup):
  187. # stripped: output fulfil and still have ascii in markup
  188. out[-1] = ansi_code('reset') + colorize('>', 'lightmagenta')
  189. elif count < width and pad:
  190. pad_len = width - count
  191. out.append('%*s' % (pad_len, ''))
  192. return ''.join(out)
  193. # Setup line width and number width
  194. if width <= 0:
  195. width = 80
  196. (start, offset) = self._hunks[-1].get_old_addr()
  197. max1 = start + offset - 1
  198. (start, offset) = self._hunks[-1].get_new_addr()
  199. max2 = start + offset - 1
  200. num_width = max(len(str(max1)), len(str(max2)))
  201. left_num_fmt = colorize('%%(left_num)%ds' % num_width, 'yellow')
  202. right_num_fmt = colorize('%%(right_num)%ds' % num_width, 'yellow')
  203. line_fmt = left_num_fmt + ' %(left)s ' + ansi_code('reset') + \
  204. right_num_fmt + ' %(right)s\n'
  205. # yield header, old path and new path
  206. for line in self._headers:
  207. yield self._markup_header(line)
  208. yield self._markup_old_path(self._old_path)
  209. yield self._markup_new_path(self._new_path)
  210. # yield hunks
  211. for hunk in self._hunks:
  212. for hunk_header in hunk.get_hunk_headers():
  213. yield self._markup_hunk_header(hunk_header)
  214. yield self._markup_hunk_meta(hunk.get_hunk_meta())
  215. for old, new, changed in hunk.mdiff():
  216. if old[0]:
  217. left_num = str(hunk.get_old_addr()[0] + int(old[0]) - 1)
  218. else:
  219. left_num = ' '
  220. if new[0]:
  221. right_num = str(hunk.get_new_addr()[0] + int(new[0]) - 1)
  222. else:
  223. right_num = ' '
  224. left = _normalize(old[1])
  225. right = _normalize(new[1])
  226. if changed:
  227. if not old[0]:
  228. left = '%*s' % (width, ' ')
  229. right = right.lstrip('\x00+').rstrip('\x01')
  230. right = _fit_width(self._markup_new(right), width)
  231. elif not new[0]:
  232. left = left.lstrip('\x00-').rstrip('\x01')
  233. left = _fit_width(self._markup_old(left), width)
  234. right = ''
  235. else:
  236. left = _fit_width(self._markup_old_mix(left), width, 1)
  237. right = _fit_width(self._markup_new_mix(right), width)
  238. else:
  239. left = _fit_width(self._markup_common(left), width, 1)
  240. right = _fit_width(self._markup_common(right), width)
  241. yield line_fmt % {
  242. 'left_num': left_num,
  243. 'left': left,
  244. 'right_num': right_num,
  245. 'right': right
  246. }
  247. def _markup_header(self, line):
  248. return colorize(line, 'cyan')
  249. def _markup_old_path(self, line):
  250. return colorize(line, 'yellow')
  251. def _markup_new_path(self, line):
  252. return colorize(line, 'yellow')
  253. def _markup_hunk_header(self, line):
  254. return colorize(line, 'lightcyan')
  255. def _markup_hunk_meta(self, line):
  256. return colorize(line, 'lightblue')
  257. def _markup_common(self, line):
  258. return colorize(line, 'reset')
  259. def _markup_old(self, line):
  260. return colorize(line, 'lightred')
  261. def _markup_new(self, line):
  262. return colorize(line, 'lightgreen')
  263. def _markup_mix(self, line, base_color):
  264. del_code = ansi_code('reverse') + ansi_code(base_color)
  265. add_code = ansi_code('reverse') + ansi_code(base_color)
  266. chg_code = ansi_code('underline') + ansi_code(base_color)
  267. rst_code = ansi_code('reset') + ansi_code(base_color)
  268. line = line.replace('\x00-', del_code)
  269. line = line.replace('\x00+', add_code)
  270. line = line.replace('\x00^', chg_code)
  271. line = line.replace('\x01', rst_code)
  272. return colorize(line, base_color)
  273. def _markup_old_mix(self, line):
  274. return self._markup_mix(line, 'red')
  275. def _markup_new_mix(self, line):
  276. return self._markup_mix(line, 'green')
  277. class Udiff(Diff):
  278. def is_old_path(self, line):
  279. return line.startswith('--- ')
  280. def is_new_path(self, line):
  281. return line.startswith('+++ ')
  282. def is_hunk_meta(self, line):
  283. return line.startswith('@@ -') or line.startswith('## -')
  284. def parse_hunk_meta(self, hunk_meta):
  285. # @@ -3,7 +3,6 @@
  286. a = hunk_meta.split()[1].split(',') # -3 7
  287. if len(a) > 1:
  288. old_addr = (int(a[0][1:]), int(a[1]))
  289. else:
  290. # @@ -1 +1,2 @@
  291. old_addr = (int(a[0][1:]), 0)
  292. b = hunk_meta.split()[2].split(',') # +3 6
  293. if len(b) > 1:
  294. new_addr = (int(b[0][1:]), int(b[1]))
  295. else:
  296. # @@ -0,0 +1 @@
  297. new_addr = (int(b[0][1:]), 0)
  298. return (old_addr, new_addr)
  299. def is_old(self, line):
  300. """Exclude header line from svn log --diff output"""
  301. return line.startswith('-') and not self.is_old_path(line) and \
  302. not re.match(r'^-{4,}$', line.rstrip())
  303. def is_new(self, line):
  304. return line.startswith('+') and not self.is_new_path(line)
  305. def is_common(self, line):
  306. return line.startswith(' ')
  307. def is_eof(self, line):
  308. # \ No newline at end of file
  309. # \ No newline at end of property
  310. return line.startswith(r'\ No newline at end of')
  311. class PatchStream(object):
  312. def __init__(self, diff_hdl):
  313. self._diff_hdl = diff_hdl
  314. self._header_chunk_size = 0
  315. self._header_chunk = []
  316. # Test whether stream is empty by read 1 line
  317. line = self._diff_hdl.readline()
  318. if line is None:
  319. self._is_empty = True
  320. else:
  321. self._header_chunk.append(line)
  322. self._header_chunk_size += 1
  323. self._is_empty = False
  324. def is_empty(self):
  325. return self._is_empty
  326. def read_header_chunks(self, header_chunk_size):
  327. """Returns a small chunk for patch type detect, suppose to call once"""
  328. for i in range(1, header_chunk_size):
  329. line = self._diff_hdl.readline()
  330. if line is None:
  331. break
  332. self._header_chunk.append(line)
  333. self._header_chunk_size += 1
  334. yield line
  335. def __iter__(self):
  336. for line in self._header_chunk:
  337. yield line
  338. for line in self._diff_hdl:
  339. yield line
  340. class DiffParser(object):
  341. def __init__(self, stream):
  342. """Detect Udiff with 3 conditions, '## ' uaually indicates svn property
  343. changes in output from `svn log --diff`
  344. """
  345. self._stream = stream
  346. flag = 0
  347. for line in self._stream.read_header_chunks(100):
  348. line = decode(line)
  349. if line.startswith('--- '):
  350. flag |= 1
  351. elif line.startswith('+++ '):
  352. flag |= 2
  353. elif line.startswith('@@ ') or line.startswith('## '):
  354. flag |= 4
  355. if flag & 7:
  356. self._type = 'udiff'
  357. break
  358. else:
  359. raise RuntimeError('unknown diff type')
  360. def get_diff_generator(self):
  361. try:
  362. return self._parse()
  363. except (AssertionError, IndexError):
  364. raise RuntimeError('invalid patch format')
  365. def _parse(self):
  366. """parse all diff lines, construct a list of Diff objects"""
  367. if self._type == 'udiff':
  368. difflet = Udiff(None, None, None, None)
  369. else:
  370. raise RuntimeError('unsupported diff format')
  371. diff = Diff([], None, None, [])
  372. headers = []
  373. for line in self._stream:
  374. line = decode(line)
  375. if difflet.is_old_path(line):
  376. if diff._old_path and diff._new_path and len(diff._hunks) > 0:
  377. # One diff constructed
  378. yield diff
  379. diff = Diff([], None, None, [])
  380. diff = Diff(headers, line, None, [])
  381. headers = []
  382. elif difflet.is_new_path(line):
  383. diff._new_path = line
  384. elif difflet.is_hunk_meta(line):
  385. hunk_meta = line
  386. old_addr, new_addr = difflet.parse_hunk_meta(hunk_meta)
  387. hunk = Hunk(headers, hunk_meta, old_addr, new_addr)
  388. headers = []
  389. diff._hunks.append(hunk)
  390. elif len(diff._hunks) > 0 and (difflet.is_old(line) or \
  391. difflet.is_new(line) or difflet.is_common(line)):
  392. hunk_line = line
  393. diff._hunks[-1].append(hunk_line[0], hunk_line[1:])
  394. elif difflet.is_eof(line):
  395. # ignore
  396. pass
  397. else:
  398. # All other non-recognized lines are considered as headers or
  399. # hunk headers respectively
  400. #
  401. headers.append(line)
  402. if headers:
  403. raise RuntimeError('dangling header(s):\n%s' % ''.join(headers))
  404. # Validate and yield the last patch set
  405. assert diff._old_path is not None
  406. assert diff._new_path is not None
  407. assert len(diff._hunks) > 0
  408. assert len(diff._hunks[-1]._hunk_meta) > 0
  409. yield diff
  410. class DiffMarkup(object):
  411. def __init__(self, stream):
  412. self._diffs = DiffParser(stream).get_diff_generator()
  413. def markup(self, side_by_side=False, width=0):
  414. """Returns a generator"""
  415. if side_by_side:
  416. return self._markup_side_by_side(width)
  417. else:
  418. return self._markup_traditional()
  419. def _markup_traditional(self):
  420. for diff in self._diffs:
  421. for line in diff.markup_traditional():
  422. yield line
  423. def _markup_side_by_side(self, width):
  424. for diff in self._diffs:
  425. for line in diff.markup_side_by_side(width):
  426. yield line
  427. def markup_to_pager(stream, opts):
  428. markup = DiffMarkup(stream)
  429. color_diff = markup.markup(side_by_side=opts.side_by_side,
  430. width=opts.width)
  431. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  432. pager = subprocess.Popen(['less', '-FRSX'],
  433. stdin=subprocess.PIPE, stdout=sys.stdout)
  434. for line in color_diff:
  435. pager.stdin.write(line.encode('utf-8'))
  436. pager.stdin.close()
  437. pager.wait()
  438. def check_command_status(arguments):
  439. """Return True if command returns 0."""
  440. try:
  441. return subprocess.call(
  442. arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
  443. except OSError:
  444. return False
  445. def revision_control_diff():
  446. """Return diff from revision control system."""
  447. for check, diff in REVISION_CONTROL:
  448. if check_command_status(check):
  449. return subprocess.Popen(diff, stdout=subprocess.PIPE).stdout
  450. def decode(line):
  451. """Decode UTF-8 if necessary."""
  452. try:
  453. return line.decode('utf-8')
  454. except AttributeError:
  455. return line
  456. def main():
  457. import optparse
  458. supported_vcs = [check[0] for check, _ in REVISION_CONTROL]
  459. usage = """
  460. %prog [options]
  461. %prog [options] <patch>
  462. %prog [options] <file1> <file2>"""
  463. parser = optparse.OptionParser(usage=usage,
  464. description=META_INFO['description'],
  465. version='%%prog %s' % META_INFO['version'])
  466. parser.add_option('-c', '--color', default='auto', metavar='WHEN',
  467. help='colorize mode "auto" (default), "always", or "never"')
  468. parser.add_option('-s', '--side-by-side', action='store_true',
  469. help='show in side-by-side mode')
  470. parser.add_option('-w', '--width', type='int', default=80, metavar='N',
  471. help='set text width (side-by-side mode only), default is 80')
  472. opts, args = parser.parse_args()
  473. if len(args) > 2:
  474. parser.print_help()
  475. return 1
  476. elif len(args) == 2:
  477. diff_hdl = subprocess.Popen(['diff', '-u', args[0], args[1]],
  478. stdout=subprocess.PIPE).stdout
  479. elif len(args) == 1:
  480. if IS_PY3:
  481. # Python3 needs the newline='' to keep '\r' (DOS format)
  482. diff_hdl = open(args[0], mode='rt', newline='')
  483. else:
  484. diff_hdl = open(args[0], mode='rt')
  485. elif sys.stdin.isatty():
  486. diff_hdl = revision_control_diff()
  487. if not diff_hdl:
  488. sys.stderr.write(('*** Not in a supported workspace, supported '
  489. 'are: %s\n\n') % ', '.join(supported_vcs))
  490. parser.print_help()
  491. return 1
  492. else:
  493. diff_hdl = sys.stdin
  494. stream = PatchStream(diff_hdl)
  495. # Don't let empty diff pass thru
  496. if stream.is_empty():
  497. return 0
  498. if opts.color == 'always' or (opts.color == 'auto' and sys.stdout.isatty()):
  499. try:
  500. markup_to_pager(stream, opts)
  501. except IOError:
  502. e = sys.exc_info()[1]
  503. if e.errno == errno.EPIPE:
  504. pass
  505. else:
  506. # pipe out stream untouched to make sure it is still a patch
  507. sys.stdout.write(''.join(stream))
  508. if diff_hdl is not sys.stdin:
  509. diff_hdl.close()
  510. return 0
  511. if __name__ == '__main__':
  512. sys.exit(main())
  513. # vim:set et sts=4 sw=4 tw=80: