Unified Diff に色を付ける Python スクリプト

バックアップ代わりに。

cudiff.py

#!/usr/bin/python
# -*- encoding: utf-8 -*-

# cudiff - Coloring Unified DIFF
# 標準入力から unified diff を受け取って色を付ける

import sys
import difflib

import codecs
sys.stdin  = codecs.getreader('utf_8')(sys.stdin)
sys.stdout = codecs.getwriter('utf_8')(sys.stdout)

class TermColor:
    BG_RED     = "\x1b[41m"  # BackGround
    BG_GREEN   = "\x1b[42m"
    BG_YELLOW  = "\x1b[43m"
    BG_BLUE    = "\x1b[44m"
    BG_MAGENTA = "\x1b[45m"
    BG_CYAN    = "\x1b[46m"
    BG_WHITE   = "\x1b[47m"
    FG_BLACK   = "\x1b[30m"  # ForeGround
    FG_RED     = "\x1b[31m"
    FG_GREEN   = "\x1b[32m"
    FG_BLUE    = "\x1b[34m"
    FG_MAGENTA = "\x1b[35m"
    FG_CYAN    = "\x1b[36m"
    FG_WHITE   = "\x1b[37m"
    DEFAULT    = "\x1b[0m"   # DEFAULT

class CUDiffer:  # difflib.Differクラス参照
    def __init__(self):
        self.set_mode_terminal()
        # lf.set_mode_html()

    def set_mode_terminal(self):
        # Embedded text {DELete, INSert, REPlace, EQUal, TERMination}
        self.e_del_a = TermColor.BG_RED     + TermColor.FG_WHITE
        self.e_ins_b = TermColor.BG_MAGENTA + TermColor.FG_WHITE
        self.e_rep_a = TermColor.BG_RED     + TermColor.FG_WHITE
        self.e_rep_b = TermColor.BG_MAGENTA + TermColor.FG_WHITE
        self.e_equ_a = TermColor.BG_WHITE   + TermColor.FG_BLACK
        self.e_equ_b = TermColor.BG_WHITE   + TermColor.FG_BLACK
        self.e_term  = TermColor.DEFAULT

    def compare(self, a, b):
        r_a = ""  # Result A
        r_b = ""  # Result B

        sm = difflib.SequenceMatcher(None, a, b)
        for tag, a1,a2, b1,b2 in sm.get_opcodes():
            if   tag == 'delete':
                r_a += self.e_del_a + a[a1:a2] + self.e_term
            elif tag == 'insert':
                r_b += self.e_ins_b + b[b1:b2] + self.e_term
            elif tag == 'replace':
                r_a += self.e_rep_a + a[a1:a2] + self.e_term
                r_b += self.e_rep_b + b[b1:b2] + self.e_term
            elif tag == 'equal':
                r_a += self.e_equ_a + a[a1:a2] + self.e_term
                r_b += self.e_equ_b + b[b1:b2] + self.e_term
            else:
                raise ValueError('unknown tag %r' % (tag,))

        return (r_a, r_b)

class Liner:
    def __init__(self):
        self.differ = CUDiffer()
        self.buff_old = []
        self.buff_new = []
        self.output = ""
    
    def do(self, line):
        ignore = (line.find('+++ ', 0,4) >= 0 or 
                  line.find('--- ', 0,4) >= 0 )
        # TODO 行の先頭が"++ "だった場合に誤判定する。
        if   line.find('-', 0,1) == 0 and not ignore:
            self.buff_old.append(line)
        elif line.find('+', 0,1) == 0 and not ignore:
            self.buff_new.append(line)
        else:
            self.clear_buffer()
            self.output += line

    def clear_buffer(self):
        buff_len_old = len(self.buff_old)
        buff_len_new = len(self.buff_new)
        if buff_len_old == 0 and buff_len_new == 0:
            return
        buff_len = buff_len_old if buff_len_old > buff_len_new else buff_len_new

        outputs_old = ""
        outputs_new = ""
        for i in range(buff_len):
            i_o = self.buff_old[i] if i < buff_len_old else ''
            i_n = self.buff_new[i] if i < buff_len_new else ''

            (o_o, o_n) = self.differ.compare(i_o, i_n)
            outputs_old += o_o
            outputs_new += o_n

        self.buff_old = []
        self.buff_new = []

        self.output += outputs_old + outputs_new

def main():
    liner = Liner()
    for line in sys.stdin:
        liner.do(line)
    liner.clear_buffer()
    print liner.output

main()