Index: src/0dev.org/predictor/predictor.go ================================================================== --- src/0dev.org/predictor/predictor.go +++ src/0dev.org/predictor/predictor.go @@ -1,10 +1,11 @@ // Package predictor implements the predictor compression/decompression algorithm // as specified by RFC1978 - PPP Predictor Compression Protocol package predictor import ( + bits "0dev.org/bits" "io" ) type context struct { table [1 << 16]byte @@ -70,11 +71,10 @@ // ... and stage the rest of the data in the buffer ctx.input = append(ctx.input, data[blockSize-bufferLength:]...) return nil } - // TODO allocate this on ctx.buffer ... var buf []byte = make([]byte, 1, blockSize+1) for block := 0; block < len(data)/blockSize; block++ { for i := 0; i < blockSize; i++ { var current byte = data[(block*blockSize)+i] if ctx.table[ctx.hash] == current { @@ -117,83 +117,99 @@ return r(output) } // Returns an io.Reader implementation that wraps the provided io.Reader // and decompresses data according to the predictor algorithm -func Decompressor(wrapped io.Reader) io.Reader { +func Decompressor(reader io.Reader) io.Reader { var ctx context ctx.input = make([]byte, 0, 8) return decompressor(func(output []byte) (int, error) { var ( - err error - flags byte - readCount int + err error + flags byte + rc, available, predicted, total int ) // Sanity check for space to read into if len(output) == 0 { return 0, nil } // Check whether we have leftover data in the buffer if len(ctx.input) > 0 { - readCount = copy(output, ctx.input) - - // Check whether we still have leftover data in the buffer :) - if readCount < len(ctx.input) { - ctx.input = ctx.input[:copy(ctx.input, ctx.input[readCount:])] - } - return readCount, nil - } - - // This is single-iteration only but it is fine according to io.Reader's contract ?! - // TODO - read all bytes from a block based on the hamming weight of the flag - // and just shuffle them for predictions instead of bite-sized reads ;) - - // Read the flags - readCount, err = wrapped.Read(ctx.input[:1]) - if readCount == 0 || err != nil { - return readCount, err - } - - ctx.input = ctx.input[:8] - flags = ctx.input[0] - - var i uint = 0 - for ; i < 8; i++ { - if flags&(1< 0 { - // Guess was right - ctx.input[i] = ctx.table[ctx.hash] - } else { - readCount, err = wrapped.Read(ctx.input[i:(i + 1)]) - - if err == io.EOF { - break - } - - if err != nil { - return readCount, err - } - - if readCount == 0 { // treat as EoF - break - } - - ctx.table[ctx.hash] = ctx.input[i] - } - + rc = copy(output, ctx.input) + + // Check whether we still have leftover data in the buffer :) + if rc < len(ctx.input) { + ctx.input = ctx.input[:copy(ctx.input, ctx.input[rc:])] + } + return rc, nil + } + + // Read the next prediction header + readHeader: + rc, err = reader.Read(ctx.input[:1]) + // Fail on error unless it is EOF + if err != nil && err != io.EOF { + return total, err + } else if rc == 0 { + return total, err + } + + // Extend the buffer, copy the prediction header + // and calculate the number of subsequent bytes to read + ctx.input = ctx.input[:8] + flags = ctx.input[0] + predicted = int(bits.Hamming(flags)) + available = 8 - predicted + + // Read the non-predicted bytes and place them in the end of the buffer + rc, err = reader.Read(ctx.input[predicted:]) + retryData: + if rc < int(available) && err == nil { + // Retry the read if we have fewer bytes than what the prediction header indicates + var r int + r, err = reader.Read(ctx.input[predicted+rc:]) + rc += r + goto retryData + } // Continue on any error, try to decompress and return it along the result + + // Walk the buffer, filling in the predicted blanks, + // relocating read bytes and and updating the guess table + for i, a := uint(0), predicted; i < 8; i++ { + if (flags & (1 << i)) > 0 { + // Guess succeeded, fill in from the table + ctx.input[i] = ctx.table[ctx.hash] + rc++ + } else { + // Relocate a read byte + ctx.input[i], a = ctx.input[a], a+1 + // Guess failed, update the table + ctx.table[ctx.hash] = ctx.input[i] + } + // Update the hash ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i]) } - readCount = copy(output, ctx.input[:i]) + // rc now contains the precise amount of populated data + ctx.input = ctx.input[:rc] + available = copy(output, ctx.input) + + total += available - // Place any remaining bytes in the buffer - if uint(readCount) < i { - ctx.input = ctx.input[readCount:i] + // Check for remaining bytes that dont fit in the output buffer + if available < rc { + ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])] } else { + // Clear the buffer ctx.input = ctx.input[:0] + + output = output[available:] + if len(output) > 0 && err == nil { + goto readHeader + } } - return readCount, nil + return total, err }) }