Index: src/0dev.org/predictor/predictor.go ================================================================== --- src/0dev.org/predictor/predictor.go +++ src/0dev.org/predictor/predictor.go @@ -10,10 +10,19 @@ type context struct { table [1 << 16]byte input []byte hash uint16 } + +// The following hash code is the heart of the algorithm: +// It builds a sliding hash sum of the previous 3-and-a-bit +// characters which will be used to index the guess table. +// A better hash function would result in additional compression, +// at the expense of time. +func (ctx *context) update(val uint16) { + ctx.hash = (ctx.hash << 4) ^ val +} type compressor func([]byte) error func (w compressor) Write(data []byte) (int, error) { return len(data), w(data) @@ -83,11 +92,11 @@ } else { // Guess was wrong, output char ctx.table[ctx.hash] = current buf = append(buf, current) } - ctx.hash = (ctx.hash << 4) ^ uint16(current) + ctx.update(uint16(current)) } if _, err := writer.Write(buf); err != nil { return err } @@ -123,13 +132,13 @@ var ctx context ctx.input = make([]byte, 0, 8) return decompressor(func(output []byte) (int, error) { var ( - err error - flags byte - rc, available, predicted, total int + err error + flags, predicted byte + rc, total, copied int ) // Sanity check for space to read into if len(output) == 0 { return 0, nil @@ -158,26 +167,25 @@ // Extend the buffer, copy the prediction header // and calculate the number of subsequent bytes to read ctx.input = ctx.input[:8] flags = ctx.input[0] - predicted = int(bits.Hamming(flags)) - available = 8 - predicted + predicted = bits.Hamming(flags) // Read the non-predicted bytes and place them in the end of the buffer rc, err = reader.Read(ctx.input[predicted:]) retryData: - if rc < int(available) && err == nil { + if rc < int(8-predicted) && err == nil { // Retry the read if we have fewer bytes than what the prediction header indicates var r int - r, err = reader.Read(ctx.input[predicted+rc:]) + r, err = reader.Read(ctx.input[int(predicted)+rc:]) rc += r goto retryData } // Continue on any error, try to decompress and return it along the result // rc now contains the amount of actual bytes in this cycle (usually 8) - rc += predicted + rc += int(predicted) // Walk the buffer, filling in the predicted blanks, // relocating read bytes and and updating the guess table for i, a := 0, predicted; i < rc; i++ { if (flags & (1 << uint(i))) > 0 { @@ -188,30 +196,30 @@ ctx.input[i], a = ctx.input[a], a+1 // Guess failed, update the table ctx.table[ctx.hash] = ctx.input[i] } // Update the hash - ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i]) + ctx.update(uint16(ctx.input[i])) } // Copy the decompressed data to the output ctx.input = ctx.input[:rc] - available = copy(output, ctx.input) + copied = copy(output, ctx.input) - total += available + total += copied // Check for remaining bytes that dont fit in the output buffer - if available < rc { - ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])] + if copied < rc { + ctx.input = ctx.input[:copy(ctx.input, ctx.input[copied:])] } else { // Clear the buffer ctx.input = ctx.input[:0] - output = output[available:] + output = output[copied:] if len(output) > 0 && err == nil { goto readHeader } } return total, err }) }