Index: src/0dev.org/bits/bits.go ================================================================== --- src/0dev.org/bits/bits.go +++ src/0dev.org/bits/bits.go @@ -1,11 +1,33 @@ -// Package bits provides a bit vector interface and several implementations +// Package bits provides various constructs that work with bits package bits import ( "strconv" ) + +var hamming = [256]byte{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8} + +// Returns the number of raised bits in the given byte +func Hamming(b byte) byte { + return hamming[b] +} // The Vector interface defines methods on the bit vector type Vector interface { // Retrieves the bit at the designated position Peek(uint) bool Index: src/0dev.org/bits/bits_test.go ================================================================== --- src/0dev.org/bits/bits_test.go +++ src/0dev.org/bits/bits_test.go @@ -2,10 +2,25 @@ import ( "strconv" "testing" ) + +func TestHamming(t *testing.T) { + for i := 0; i < 256; i++ { + b, result := i, byte(0) + for b > 0 { + if (b & 1) > 0 { + result++ + } + b = b >> 1 + } + if result != Hamming(byte(i)) { + t.Error("Invalid hamming weight reported for ", i) + } + } +} var sizes []uint = []uint{0, 31, 32, 33, 61, 63, 64, 127, 128, 129} func TestBitSize(t *testing.T) { for _, size := range sizes { Index: src/0dev.org/predictor/predictor.go ================================================================== --- src/0dev.org/predictor/predictor.go +++ src/0dev.org/predictor/predictor.go @@ -5,22 +5,23 @@ import ( "io" ) type context struct { - table [65536]byte - hash uint16 - input []byte + table [1 << 16]byte + buffer [1 << 3]byte + input []byte + hash uint16 } // Returns a closure over the provided writer that compresses data when called. // // It can buffer data as the predictor mandates 8-byte blocks with a header. // A call with no data will force a flush. func Compressor(writer io.Writer) func([]byte) error { var ctx context - ctx.input = make([]byte, 8) + ctx.input = ctx.buffer[:] // Forward declaration as it is required for recursion var write func(data []byte) error write = func(data []byte) error { @@ -96,5 +97,89 @@ return nil } return write } + +type reader func([]byte) (int, error) + +func (r reader) Read(output []byte) (int, error) { + return r(output) +} + +// TODO - document +func Decompressor(reader io.Reader) reader { + var ctx context + ctx.input = ctx.buffer[:0] + + return func(output []byte) (int, error) { + var ( + err error + flags byte + readCount int + ) + + // Sanity check for space to read into + if len(output) == 0 { + return 0, nil + } + + // Check whether we have leftover data in the buffer + if len(ctx.input) > 0 { + readCount = copy(output, ctx.input) + ctx.input = ctx.input[readCount:] + return readCount, nil + } + + // // The buffer will shrink as it empties, restore it if it is needed + // if len(ctx.input) == 0 { + // ctx.input = ctx.buffer[:1] + // } + + // Read the flags + readCount, err = reader.Read(ctx.buffer[:1]) + if readCount == 0 || err != nil { + return readCount, err + } + + // This is single-iteration only but it is fine according to io.Reader's contract ?! + // TODO - read all bytes from a block based on the hamming weight of the flag + // and just shuffle them for predictions instead of bite-sized reads ;) + + flags = ctx.buffer[0] + + var i uint = 0 + for ; i < 8; i++ { + if flags&(1< 0 { + // Guess was right + ctx.buffer[i] = ctx.table[ctx.hash] + } else { + readCount, err = reader.Read(ctx.buffer[i:(i + 1)]) + + if err == io.EOF { + break + } + + if err != nil { + return readCount, err + } + + if readCount == 0 { // treat as EoF + break + } + + ctx.table[ctx.hash] = ctx.buffer[i] + } + + ctx.hash = (ctx.hash << 4) ^ uint16(ctx.buffer[i]) + } + + readCount = copy(output, ctx.buffer[:i]) + + // Place any remaining bytes in the buffer + if uint(readCount) < i { + ctx.input = ctx.buffer[readCount:i] + } + + return readCount, nil + } +} Index: src/0dev.org/predictor/predictor_test.go ================================================================== --- src/0dev.org/predictor/predictor_test.go +++ src/0dev.org/predictor/predictor_test.go @@ -1,10 +1,11 @@ package predictor import ( diff "0dev.org/diff" "bytes" + "io/ioutil" "testing" ) func TestRFC(t *testing.T) { input := []byte{0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, @@ -34,11 +35,21 @@ if err != nil { t.Error(err) } result := buf.Bytes() - delta := diff.Diff(diff.D{len(result), len(output), func(i, j int) bool { return result[i] == output[i] }}) + delta := diff.Diff(diff.D{len(result), len(output), func(i, j int) bool { return result[i] == output[j] }}) + + if len(delta.Added) > 0 || len(delta.Removed) > 0 { + t.Error("Unexpected compressed output", delta) + } + + data := bytes.NewBuffer(result) + in := Decompressor(data) + + result, err = ioutil.ReadAll(in) + delta = diff.Diff(diff.D{len(result), len(input), func(i, j int) bool { return result[i] == input[j] }}) if len(delta.Added) > 0 || len(delta.Removed) > 0 { t.Error("Unexpected compressed output", delta) } }