| module HTMLDiff |
| |
| Match = Struct.new(:start_in_old, :start_in_new, :size) |
| class Match |
| def end_in_old |
| self.start_in_old + self.size |
| end |
| |
| def end_in_new |
| self.start_in_new + self.size |
| end |
| end |
| |
| Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new) |
| |
| class DiffBuilder |
| |
| def initialize(old_version, new_version) |
| @old_version, @new_version = old_version, new_version |
| split_inputs_to_words |
| index_new_words |
| end |
| |
| def split_inputs_to_words |
| @old_words = explode(@old_version) |
| @new_words = explode(@new_version) |
| end |
| |
| def index_new_words |
| @word_indices = Hash.new { |h, word| h[word] = [] } |
| @new_words.each_with_index { |word, i| @word_indices[word] << i } |
| end |
| |
| def operations |
| position_in_old = position_in_new = 0 |
| operations = [] |
| |
| matches = matching_blocks |
| # an empty match at the end forces the loop below to handle the unmatched tails |
| # I'm sure it can be done more gracefully, but not at 23:52 |
| matches << Match.new(@old_words.length, @new_words.length, 0) |
| |
| matches.each_with_index do |match, i| |
| match_starts_at_current_position_in_old = (position_in_old == match.start_in_old) |
| match_starts_at_current_position_in_new = (position_in_new == match.start_in_new) |
| |
| action_upto_match_positions = |
| case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new] |
| when [false, false] |
| :replace |
| when [true, false] |
| :insert |
| when [false, true] |
| :delete |
| else |
| # this happens if the first few words are same in both versions |
| :none |
| end |
| |
| if action_upto_match_positions != :none |
| operation_upto_match_positions = |
| Operation.new(action_upto_match_positions, |
| position_in_old, match.start_in_old, |
| position_in_new, match.start_in_new) |
| operations << operation_upto_match_positions |
| end |
| if match.size != 0 |
| match_operation = Operation.new(:equal, |
| match.start_in_old, match.end_in_old, |
| match.start_in_new, match.end_in_new) |
| operations << match_operation |
| end |
| |
| position_in_old = match.end_in_old |
| position_in_new = match.end_in_new |
| end |
| |
| operations |
| end |
| |
| def matching_blocks |
| matching_blocks = [] |
| recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks) |
| matching_blocks |
| end |
| |
| def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) |
| match = find_match(start_in_old, end_in_old, start_in_new, end_in_new) |
| if match |
| if start_in_old < match.start_in_old and start_in_new < match.start_in_new |
| recursively_find_matching_blocks( |
| start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) |
| end |
| matching_blocks << match |
| if match.end_in_old < end_in_old and match.end_in_new < end_in_new |
| recursively_find_matching_blocks( |
| match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks) |
| end |
| end |
| end |
| |
| def find_match(start_in_old, end_in_old, start_in_new, end_in_new) |
| |
| best_match_in_old = start_in_old |
| best_match_in_new = start_in_new |
| best_match_size = 0 |
| |
| match_length_at = Hash.new { |h, index| h[index] = 0 } |
| |
| start_in_old.upto(end_in_old - 1) do |index_in_old| |
| |
| new_match_length_at = Hash.new { |h, index| h[index] = 0 } |
| |
| @word_indices[@old_words[index_in_old]].each do |index_in_new| |
| next if index_in_new < start_in_new |
| break if index_in_new >= end_in_new |
| |
| new_match_length = match_length_at[index_in_new - 1] + 1 |
| new_match_length_at[index_in_new] = new_match_length |
| |
| if new_match_length > best_match_size |
| best_match_in_old = index_in_old - new_match_length + 1 |
| best_match_in_new = index_in_new - new_match_length + 1 |
| best_match_size = new_match_length |
| end |
| end |
| match_length_at = new_match_length_at |
| end |
| |
| # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left( |
| # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new) |
| # best_match_in_old, best_match_in_new, match_size = add_matching_words_right( |
| # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new) |
| |
| return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil) |
| end |
| |
| def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) |
| while match_in_old > start_in_old and |
| match_in_new > start_in_new and |
| @old_words[match_in_old - 1] == @new_words[match_in_new - 1] |
| match_in_old -= 1 |
| match_in_new -= 1 |
| match_size += 1 |
| end |
| [match_in_old, match_in_new, match_size] |
| end |
| |
| def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) |
| while match_in_old + match_size < end_in_old and |
| match_in_new + match_size < end_in_new and |
| @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size] |
| match_size += 1 |
| end |
| [match_in_old, match_in_new, match_size] |
| end |
| |
| def explode(sequence) |
| sequence.is_a?(String) ? sequence.split(//) : sequence |
| end |
| |
| end # of class Diff Builder |
| |
| end |