// Package diff provides a diff implementation
// for finite, indexable sequences with comparable elements
package diff
import (
bits "0dev.org/bits"
)
// Interface abstracts the required knowledge to perform a diff
// on any two comparable, fixed-length sequences
type Interface interface {
// The sequences' lengths
Len() (int, int)
// True when the sequences' elements at those indices are equal
Equal(int, int) bool
}
// A Mark struct marks a length in sequence starting with an offset
type Mark struct {
From int
Length int
}
// A Delta struct is the result of a Diff operation
type Delta struct {
Added []Mark
Removed []Mark
}
// Diffs the provided data and returns e Delta struct
// with added entries' indices in the second sequence and removed from the first
func Diff(data Interface) Delta {
var len1, len2 = data.Len()
var mat matrix = matrix{v: bits.NewBit(uint(len1 * len2)), lenX: len1, lenY: len2}
for i := 0; i < len1; i++ {
for j := 0; j < len2; j++ {
mat.v.Poke(mat.at(i, j), data.Equal(i, j))
}
}
return recursiveDiff(box{0, 0, len1, len2}, mat)
}
type match struct {
x, y int
length int
}
type box struct {
x, y int
lenX, lenY int
}
// A helper structure that stores absolute dimension along a linear bit vector
// so that it can always properly translate (x, y) -> z on the vector
type matrix struct {
v bits.Vector
lenX, lenY int
}
// Translates (x, y) to an absolute position on the bit vector
func (m *matrix) at(x, y int) uint {
return uint(y + (x * m.lenY))
}
func recursiveDiff(bounds box, mat matrix) Delta {
var m match = largest(bounds, mat)
if m.length == 0 { // Recursion terminates
var immediate Delta
if bounds.lenY-bounds.y > 0 {
immediate.Added = []Mark{Mark{bounds.y, bounds.lenY}}
}
if bounds.lenX-bounds.x > 0 {
immediate.Removed = []Mark{Mark{bounds.x, bounds.lenX}}
}
return immediate
}
var left Delta = recursiveDiff(box{bounds.x, bounds.y, m.x, m.y}, mat)
var right Delta = recursiveDiff(box{m.x + m.length, m.y + m.length, bounds.lenX, bounds.lenY}, mat)
var result Delta
result.Added = append(left.Added, right.Added...)
result.Removed = append(left.Removed, right.Removed...)
return result
}
// Finds the largest common substring by looking at the provided match matrix
// starting from (bounds.x, bounds.y) with lengths bounds.lenX, bounds.lenY
func largest(bounds box, mat matrix) (result match) {
for i := bounds.x; i < bounds.lenX && result.length <= (bounds.lenX-bounds.x); i++ {
var m match = search(i, bounds.y, bounds.lenX, bounds.lenY, mat)
if m.length > result.length {
result = m
}
}
for j := bounds.y + 1; j < bounds.lenY && result.length <= (bounds.lenY-bounds.y); j++ {
var m match = search(bounds.x, j, bounds.lenX, bounds.lenY, mat)
if m.length > result.length {
result = m
}
}
return
}
// Searches the main diagonal for the longest sequential match line
func search(x, y, lenX, lenY int, mat matrix) (result match) {
var inMatch bool
var m match
for step := 0; step+x < lenX && step+y < lenY; step++ {
if mat.v.Peek(mat.at(step+x, step+y)) {
if !inMatch { // Create a new current record if there is none ...
inMatch, m.x, m.y, m.length = true, step+x, step+y, 1
} else { // ... otherwise just increment the existing
m.length++
}
if m.length > result.length {
result = m // Store it if it is longer ...
}
} else { // End of current of match
inMatch = false // ... and reset the current one
}
}
return
}