// Package predictor implements the predictor compression/decompression algorithm
// as specified by RFC1978 - PPP Predictor Compression Protocol
package predictor
import (
bits "0dev.org/bits"
"io"
)
type context struct {
table [1 << 16]byte
input []byte
hash uint16
}
type compressor func([]byte) error
func (w compressor) Write(data []byte) (int, error) {
return len(data), w(data)
}
// Returns an io.Writer implementation that wraps the provided io.Writer
// and compresses data according to the predictor algorithm
//
// It can buffer data as the predictor mandates 8-byte blocks with a header.
// A call with no data will force a flush.
func Compressor(writer io.Writer) io.Writer {
var ctx context
ctx.input = make([]byte, 0, 8)
// Forward declaration as it is required for recursion
var write compressor
write = func(data []byte) error {
var (
blockSize int = 8
bufferLength int = len(ctx.input)
)
// Force a flush if we are called with no data to write
if len(data) == 0 {
// Nothing to flush if the buffer is empty though
if len(ctx.input) == 0 {
return nil
}
// We can't have more than 7 bytes in the buffer so this is safe
data, blockSize, bufferLength = ctx.input, len(ctx.input), 0
}
// Check if there are pending bytes in the buffer
if len(data) < blockSize || bufferLength > 0 {
// If the current buffer + new data can fit into a block
if (len(data) + bufferLength) <= blockSize {
ctx.input = append(ctx.input, data...)
// Flush the block if the buffer fills it
if len(ctx.input) == blockSize {
return write(nil)
}
// ... otherwise just return
return nil
}
// The current buffer + new data overflow the block size
// Complete the block, flush it ...
ctx.input = append(ctx.input, data[:blockSize-bufferLength]...)
if err := write(nil); err != nil {
return err
}
// ... and stage the rest of the data in the buffer
ctx.input = append(ctx.input, data[blockSize-bufferLength:]...)
return nil
}
var buf []byte = make([]byte, 1, blockSize+1)
for block := 0; block < len(data)/blockSize; block++ {
for i := 0; i < blockSize; i++ {
var current byte = data[(block*blockSize)+i]
if ctx.table[ctx.hash] == current {
// Guess was right - don't output
buf[0] |= 1 << uint(i)
} else {
// Guess was wrong, output char
ctx.table[ctx.hash] = current
buf = append(buf, current)
}
ctx.hash = (ctx.hash << 4) ^ uint16(current)
}
if _, err := writer.Write(buf); err != nil {
return err
}
// Reset the flags and buffer for the next iteration
buf, buf[0] = buf[:1], 0
}
if remaining := len(data) % blockSize; remaining > 0 {
ctx.input = ctx.input[:remaining]
copy(ctx.input, data[len(data)-remaining:])
} else {
ctx.input = ctx.input[:0]
}
return nil
}
return write
}
// A function type alias so that it can have methods attached to it
type decompressor func([]byte) (int, error)
// Required to implement io.Reader
func (r decompressor) Read(output []byte) (int, error) {
return r(output)
}
// Returns an io.Reader implementation that wraps the provided io.Reader
// and decompresses data according to the predictor algorithm
func Decompressor(reader io.Reader) io.Reader {
var ctx context
ctx.input = make([]byte, 0, 8)
return decompressor(func(output []byte) (int, error) {
var (
err error
flags byte
readCount, available int
)
// Sanity check for space to read into
if len(output) == 0 {
return 0, nil
}
// Check whether we have leftover data in the buffer
if len(ctx.input) > 0 {
readCount = copy(output, ctx.input)
// Check whether we still have leftover data in the buffer :)
if readCount < len(ctx.input) {
ctx.input = ctx.input[:copy(ctx.input, ctx.input[readCount:])]
}
return readCount, nil
}
// Read the next prediction header
readCount, err = reader.Read(ctx.input[:1])
// Fail on error unless it is EOF
if err != nil && err != io.EOF {
return 0, err
} else if readCount == 0 {
return 0, err
}
// Extend the buffer, copy the prediction header
// and calculate the number of subsequent bytes to read
ctx.input = ctx.input[:8]
flags = ctx.input[0]
available = 8 - int(bits.Hamming(flags))
// Read the non-predicted bytes according to header.
readCount, err = reader.Read(ctx.input[:available])
retryData:
if readCount < int(available) && err == nil {
// Retry the read if we have fewer bytes than what the prediction header indicates
var rc int
rc, err = reader.Read(ctx.input[readCount:available])
readCount += rc
goto retryData
} // Continue on any error, try to decompress and return it along the result
// Spread the read bytes right to left to avoid overlapping
for i, a := 7, available-1; i >= 0; i-- {
if ((flags >> uint(i)) & 1) == 0 {
ctx.input[i] = ctx.input[a]
a--
}
}
// Walk the buffer, fill in the predicted blanks and update the guess table
for i := uint(0); i < 8; i++ {
if (flags & (1 << i)) > 0 {
// Guess succeeded, fill in from the table
ctx.input[i] = ctx.table[ctx.hash]
readCount++
} else {
// Guess failed, update the table
ctx.table[ctx.hash] = ctx.input[i]
}
// Update the hash
ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
}
// readCount now contains the precise amount of populated data
ctx.input = ctx.input[:readCount]
available = copy(output, ctx.input)
// Check for remaining bytes that dont fit in the output buffer
if available < readCount {
ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])]
} else {
// Clear the buffer
ctx.input = ctx.input[:0]
}
return available, err
})
}