cdiff.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. #!/usr/bin/env python
  2. import sys
  3. import os
  4. import re
  5. import difflib
  6. COLORS = {
  7. 'reset' : '\x1b[0m',
  8. 'red' : '\x1b[31m',
  9. 'green' : '\x1b[32m',
  10. 'yellow' : '\x1b[33m',
  11. 'blue' : '\x1b[34m',
  12. 'magenta' : '\x1b[35m',
  13. 'cyan' : '\x1b[36m',
  14. 'lightred' : '\x1b[1;31m',
  15. 'lightgreen' : '\x1b[1;32m',
  16. 'lightyellow' : '\x1b[1;33m',
  17. 'lightblue ' : '\x1b[1;34m',
  18. 'lightmagenta' : '\x1b[1;35m',
  19. 'lightcyan' : '\x1b[1;36m',
  20. }
  21. def ansi_code(color):
  22. return COLORS.get(color, '')
  23. def colorize(text, start_color, end_color='reset'):
  24. return ansi_code(start_color) + text + ansi_code(end_color)
  25. class Hunk(object):
  26. def __init__(self, hunk_header, old_addr, old_offset, new_addr, new_offset):
  27. self._hunk_header = hunk_header
  28. self._old_addr = old_addr
  29. self._old_offset = old_offset
  30. self._new_addr = new_addr
  31. self._new_offset = new_offset
  32. self._hunk_list = [] # 2-element group (attr, line)
  33. def get_header(self):
  34. return self._hunk_header
  35. def get_old_addr(self):
  36. return (self._old_addr, self._old_offset)
  37. def get_new_addr(self):
  38. return (self._new_addr, self._new_offset)
  39. def append(self, attr, line):
  40. """attr: '-': old, '+': new, ' ': common"""
  41. self._hunk_list.append((attr, line))
  42. def mdiff(self):
  43. """The difflib._mdiff() function returns an interator which returns a
  44. tuple: (from line tuple, to line tuple, boolean flag)
  45. from/to line tuple -- (line num, line text)
  46. line num -- integer or None (to indicate a context separation)
  47. line text -- original line text with following markers inserted:
  48. '\0+' -- marks start of added text
  49. '\0-' -- marks start of deleted text
  50. '\0^' -- marks start of changed text
  51. '\1' -- marks end of added/deleted/changed text
  52. boolean flag -- None indicates context separation, True indicates
  53. either "from" or "to" line contains a change, otherwise False.
  54. """
  55. return difflib._mdiff(self._get_old_text(), self._get_new_text())
  56. def _get_old_text(self):
  57. out = []
  58. for (attr, line) in self._hunk_list:
  59. if attr != '+':
  60. out.append(line)
  61. return out
  62. def _get_new_text(self):
  63. out = []
  64. for (attr, line) in self._hunk_list:
  65. if attr != '-':
  66. out.append(line)
  67. return out
  68. def __iter__(self):
  69. for hunk_line in self._hunk_list:
  70. yield hunk_line
  71. class Diff(object):
  72. def __init__(self, headers, old_path, new_path, hunks):
  73. self._headers = headers
  74. self._old_path = old_path
  75. self._new_path = new_path
  76. self._hunks = hunks
  77. def markup_traditional(self):
  78. out = []
  79. for line in self._headers:
  80. out.append(self._markup_header(line))
  81. out.append(self._markup_old_path(self._old_path))
  82. out.append(self._markup_new_path(self._new_path))
  83. for hunk in self._hunks:
  84. out.append(self._markup_hunk_header(hunk.get_header()))
  85. save_line = ''
  86. for from_info, to_info, changed in hunk.mdiff():
  87. if changed:
  88. if not from_info[0]:
  89. line = to_info[1].strip('\x00\x01')
  90. out.append(self._markup_new(line))
  91. elif not to_info[0]:
  92. line = from_info[1].strip('\x00\x01')
  93. out.append(self._markup_old(line))
  94. else:
  95. out.append(self._markup_old('-' +
  96. self._markup_old_mix(from_info[1])))
  97. out.append(self._markup_new('+' +
  98. self._markup_new_mix(to_info[1])))
  99. else:
  100. out.append(self._markup_common(' ' + from_info[1]))
  101. return ''.join(out)
  102. def markup_side_by_side(self, show_number, width):
  103. """Do not really need to parse the hunks..."""
  104. return 'TODO: show_number=%s, width=%d' % (show_number, width)
  105. def _markup_header(self, line):
  106. return colorize(line, 'cyan')
  107. def _markup_old_path(self, line):
  108. return colorize(line, 'yellow')
  109. def _markup_new_path(self, line):
  110. return colorize(line, 'yellow')
  111. def _markup_hunk_header(self, line):
  112. return colorize(line, 'blue')
  113. def _markup_common(self, line):
  114. return colorize(line, 'reset')
  115. def _markup_old(self, line):
  116. return colorize(line, 'lightred')
  117. def _markup_new(self, line):
  118. return colorize(line, 'lightgreen')
  119. def _markup_mix(self, line, end_color):
  120. line = line.replace('\x00-', ansi_code('red'))
  121. line = line.replace('\x00+', ansi_code('green'))
  122. line = line.replace('\x00^', ansi_code('lightyellow'))
  123. line = line.replace('\x01', ansi_code(end_color))
  124. return colorize(line, end_color)
  125. def _markup_old_mix(self, line):
  126. return self._markup_mix(line, 'red')
  127. def _markup_new_mix(self, line):
  128. return self._markup_mix(line, 'green')
  129. class Udiff(Diff):
  130. @staticmethod
  131. def is_old_path(line):
  132. return line.startswith('--- ')
  133. @staticmethod
  134. def is_new_path(line):
  135. return line.startswith('+++ ')
  136. @staticmethod
  137. def is_hunk_header(line):
  138. return line.startswith('@@ -')
  139. @staticmethod
  140. def is_old(line):
  141. return line.startswith('-') and not Udiff.is_old_path(line)
  142. @staticmethod
  143. def is_new(line):
  144. return line.startswith('+') and not Udiff.is_new_path(line)
  145. @staticmethod
  146. def is_common(line):
  147. return line.startswith(' ')
  148. @staticmethod
  149. def is_eof(line):
  150. # \ No newline at end of file
  151. return line.startswith('\\')
  152. @staticmethod
  153. def is_header(line):
  154. return re.match(r'^[^+@\\ -]', line)
  155. class DiffParser(object):
  156. def __init__(self, stream):
  157. for line in stream[:10]:
  158. if line.startswith('+++ '):
  159. self._type = 'udiff'
  160. break
  161. else:
  162. raise RuntimeError('unknown diff type')
  163. try:
  164. self._diffs = self._parse(stream)
  165. except (AssertionError, IndexError):
  166. raise RuntimeError('invalid patch format')
  167. def get_diffs(self):
  168. return self._diffs
  169. def _parse(self, stream):
  170. if self._type == 'udiff':
  171. return self._parse_udiff(stream)
  172. else:
  173. raise RuntimeError('unsupported diff format')
  174. def _parse_udiff(self, stream):
  175. """parse all diff lines here, construct a list of Udiff objects"""
  176. out_diffs = []
  177. headers = []
  178. old_path = None
  179. new_path = None
  180. hunks = []
  181. hunk = None
  182. while stream:
  183. if Udiff.is_header(stream[0]):
  184. if headers and old_path:
  185. # Encounter a new header
  186. assert new_path is not None
  187. assert hunk is not None
  188. hunks.append(hunk)
  189. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  190. headers = []
  191. old_path = None
  192. new_path = None
  193. hunks = []
  194. hunk = None
  195. else:
  196. headers.append(stream.pop(0))
  197. elif Udiff.is_old_path(stream[0]):
  198. if old_path:
  199. # Encounter a new patch set
  200. assert new_path is not None
  201. assert hunk is not None
  202. hunks.append(hunk)
  203. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  204. headers = []
  205. old_path = None
  206. new_path = None
  207. hunks = []
  208. hunk = None
  209. else:
  210. old_path = stream.pop(0)
  211. elif Udiff.is_new_path(stream[0]):
  212. assert old_path is not None
  213. assert new_path is None
  214. new_path = stream.pop(0)
  215. elif Udiff.is_hunk_header(stream[0]):
  216. assert old_path is not None
  217. assert new_path is not None
  218. if hunk:
  219. # Encounter a new hunk header
  220. hunks.append(hunk)
  221. hunk = None
  222. else:
  223. # @@ -3,7 +3,6 @@
  224. hunk_header = stream.pop(0)
  225. addr_info = hunk_header.split()[1]
  226. assert addr_info.startswith('-')
  227. old_addr = addr_info.split(',')[0]
  228. old_offset = addr_info.split(',')[1]
  229. addr_info = hunk_header.split()[2]
  230. assert addr_info.startswith('+')
  231. new_addr = addr_info.split(',')[0]
  232. new_offset = addr_info.split(',')[1]
  233. hunk = Hunk(hunk_header, old_addr, old_offset, new_addr,
  234. new_offset)
  235. elif Udiff.is_old(stream[0]) or Udiff.is_new(stream[0]) or \
  236. Udiff.is_common(stream[0]):
  237. assert old_path is not None
  238. assert new_path is not None
  239. assert hunk is not None
  240. hunk_line = stream.pop(0)
  241. hunk.append(hunk_line[0], hunk_line[1:])
  242. elif Udiff.is_eof(stream[0]):
  243. # ignore
  244. stream.pop(0)
  245. else:
  246. raise RuntimeError('unknown patch format: %s' % stream[0])
  247. # The last patch
  248. if hunk:
  249. hunks.append(hunk)
  250. if old_path:
  251. if new_path:
  252. out_diffs.append(Diff(headers, old_path, new_path, hunks))
  253. else:
  254. raise RuntimeError('unknown patch format after "%s"' % old_path)
  255. elif headers:
  256. raise RuntimeError('unknown patch format: %s' % \
  257. ('\n'.join(headers)))
  258. return out_diffs
  259. class DiffMarkup(object):
  260. def __init__(self, stream):
  261. self._diffs = DiffParser(stream).get_diffs()
  262. def markup(self, side_by_side=False, show_number=False, width=0):
  263. if side_by_side:
  264. return self._markup_side_by_side(show_number, width)
  265. else:
  266. return self._markup_traditional()
  267. def _markup_traditional(self):
  268. out = []
  269. for diff in self._diffs:
  270. out.append(diff.markup_traditional())
  271. return out
  272. def _markup_side_by_side(self, show_number, width):
  273. """width of 0 or negative means auto detect terminal width"""
  274. out = []
  275. for diff in self._diffs:
  276. out.append(diff.markup_side_by_side(show_number, width))
  277. return out
  278. if __name__ == '__main__':
  279. import optparse
  280. import subprocess
  281. usage = '''
  282. %(prog)s [options] [diff]
  283. View diff (patch) file if given, otherwise read stdin''' % \
  284. {'prog': os.path.basename(sys.argv[0])}
  285. parser = optparse.OptionParser(usage)
  286. parser.add_option('-s', '--side-by-side', action='store_true',
  287. help=('show in side-by-side mode'))
  288. parser.add_option('-n', '--number', action='store_true',
  289. help='show line number')
  290. parser.add_option('-w', '--width', type='int', default=0,
  291. help='set line width (side-by-side mode only)')
  292. opts, args = parser.parse_args()
  293. if opts.width < 0:
  294. opts.width = 0
  295. if len(args) >= 1:
  296. diff_hdl = open(args[0], 'r')
  297. elif sys.stdin.isatty():
  298. sys.stderr.write('Try --help option for usage\n')
  299. sys.exit(1)
  300. else:
  301. diff_hdl = sys.stdin
  302. stream = diff_hdl.readlines()
  303. if diff_hdl is not sys.stdin:
  304. diff_hdl.close()
  305. if sys.stdout.isatty():
  306. markup = DiffMarkup(stream)
  307. color_diff = markup.markup(side_by_side=opts.side_by_side,
  308. show_number=opts.number, width=opts.width)
  309. # args stolen fron git source: github.com/git/git/blob/master/pager.c
  310. pager = subprocess.Popen(['less', '-FRSXK'],
  311. stdin=subprocess.PIPE, stdout=sys.stdout)
  312. pager.stdin.write(''.join(color_diff))
  313. pager.stdin.close()
  314. pager.wait()
  315. else:
  316. # pipe out stream untouched to make sure it is still a patch
  317. sys.stdout.write(''.join(stream))
  318. sys.exit(0)
  319. # vim:set et sts=4 sw=4 tw=80: