import re class _glob(object): def __repr__(self): return "" def __eq__(self, other): return False Glob = _glob() GLOB_REPR = '(.*)' escape = re.escape class GlobbedString(tuple): def as_re(self): def to_re(obj): if obj is Glob: return GLOB_REPR else: return escape(obj) return ''.join(to_re(x) for x in self) def as_backref(self): def enum_globs(l): count = 0 for item in l: if item is Glob: count += 1 yield '\\'+str(count) else: yield item return ''.join(enum_globs(self)) def generate(*lines): '''Returns a tuple-like object containing interlaced strings and Glob objects. Describing similarities between strings. >>> generate('line1','line2') ('line', ) >>> generate('1line','2line') (, 'line') >>> generate('1line','line2') (, 'line', ) >>> generate('3line1','4line2') (, 'line', ) >>> generate('object.LeftBorder = 17', 'object.RightBorder = 992') ('object.', , 'tBorder = ', ) >>> generate('object.LeftBorder = 17', 'object.RightBorder = 192', 'object.TopBorder = 300',) ('object.', , 'Border = ', ) ''' if len(lines) == 1: return GlobbedString(lines) first_two, rest = lines[:2], lines[2:] fit = _generate(*first_two) def globify(line): ret = [] for c in line: if isinstance(c, str): ret.extend(list(c)) else: ret.append(c) return ret for next in rest: fit = _generate(globify(fit), next) return GlobbedString(fit) def _generate(str1, str2): paths = {} # find all similar subsequences for index1, char1 in enumerate(str1): for index2, char2 in enumerate(str2): if char1 == char2: if 0 in (index1, index2) or paths.get((index1, index2)) is None: # create a path paths[index1+1, index2+1] = ((index1, index2), char1) else: # remove old path and set a new one start, string = paths[index1, index2] del paths[index1, index2] paths[index1+1, index2+1] = (start, string + char1) paths = [(string, start, end) for (end, (start, string)) in paths.iteritems()] def in_field((p_start, p_end), start, end): return ( p_start[0] >= start[0] and p_start[1] >= start[1] and p_end[0] <= end[0] and p_end[1] <= end[1]) def recurr(paths, start, end): if start == end: return tuple() my_paths = [p for p in paths if in_field((p[1],p[2]), start, end)] if not my_paths: return (Glob,) longest, l_start, l_end = sorted(my_paths, key=lambda p: len(p[0]))[-1] ret = list(recurr(my_paths, start, l_start)) ret.append(longest) ret.extend(list(recurr(my_paths, l_end, end))) return tuple(ret) return recurr(paths, (0,0), (len(str1), len(str2))) def _test(): import doctest doctest.testmod() if __name__ == '__main__': _test()