// Package predictor implements the predictor compression/decompression algorithm
// as specified by RFC1978 - PPP Predictor Compression Protocol
package predictor
import (
"io"
)
type context struct {
table [1 << 16]byte
buffer [1 << 3]byte
input []byte
hash uint16
}
// Returns a closure over the provided writer that compresses data when called.
//
// It can buffer data as the predictor mandates 8-byte blocks with a header.
// A call with no data will force a flush.
func Compressor(writer io.Writer) func([]byte) error {
var ctx context
ctx.input = ctx.buffer[:]
// Forward declaration as it is required for recursion
var write func(data []byte) error
write = func(data []byte) error {
var (
err error
blockSize int = 8
bufferLength int = len(ctx.input)
)
// Force a flush if we are called with no data to write
if len(data) == 0 {
// We can't have more than 7 bytes in the buffer so this is safe
blockSize = len(ctx.input)
goto write
}
// Check if there are pending bytes in the buffer
if bufferLength > 0 && bufferLength < 8 {
// Check whether we have enough bytes for a complete block
if len(data) > 8-bufferLength {
// Fill the buffer ...
ctx.input = append(ctx.input, data[:8-bufferLength]...)
// ... and recurse, calling ourselves with the full buffer
err = write(ctx.input)
if err != nil {
return err
}
// Clear the buffer
ctx.input = ctx.input[:0]
// Handle remaining bytes, if any
var remaining []byte = data[8-bufferLength:]
if len(remaining) > 0 {
// Recurse, calling ourselves with the rest of the bytes
err = write(data[8-bufferLength:])
if err != nil {
return err
}
}
} else {
// Add the insufficient data to the buffer and return
ctx.input = append(ctx.input, data...)
return nil
}
}
write:
var buf []byte = make([]byte, 1, blockSize+1)
for block := 0; block < len(data)/blockSize; block++ {
for i := 0; i < blockSize; i++ {
var current byte = data[(block*blockSize)+i]
if ctx.table[ctx.hash] == current {
// Guess was right - don't output
buf[0] |= 1 << uint(i)
} else {
// Guess was wrong, output char
ctx.table[ctx.hash] = current
buf = append(buf, current)
}
ctx.hash = (ctx.hash << 4) ^ uint16(current)
}
_, err = writer.Write(buf)
if err != nil {
return err
}
// Reset the flags and buffer for the next iteration
buf[0] ^= buf[0]
buf = buf[:1]
}
return nil
}
return write
}
type reader func([]byte) (int, error)
func (r reader) Read(output []byte) (int, error) {
return r(output)
}
// TODO - document
func Decompressor(reader io.Reader) reader {
var ctx context
ctx.input = ctx.buffer[:0]
return func(output []byte) (int, error) {
var (
err error
flags byte
readCount int
)
// Sanity check for space to read into
if len(output) == 0 {
return 0, nil
}
// Check whether we have leftover data in the buffer
if len(ctx.input) > 0 {
readCount = copy(output, ctx.input)
ctx.input = ctx.input[readCount:]
return readCount, nil
}
// // The buffer will shrink as it empties, restore it if it is needed
// if len(ctx.input) == 0 {
// ctx.input = ctx.buffer[:1]
// }
// Read the flags
readCount, err = reader.Read(ctx.buffer[:1])
if readCount == 0 || err != nil {
return readCount, err
}
// This is single-iteration only but it is fine according to io.Reader's contract ?!
// TODO - read all bytes from a block based on the hamming weight of the flag
// and just shuffle them for predictions instead of bite-sized reads ;)
flags = ctx.buffer[0]
var i uint = 0
for ; i < 8; i++ {
if flags&(1<<i) > 0 {
// Guess was right
ctx.buffer[i] = ctx.table[ctx.hash]
} else {
readCount, err = reader.Read(ctx.buffer[i:(i + 1)])
if err == io.EOF {
break
}
if err != nil {
return readCount, err
}
if readCount == 0 { // treat as EoF
break
}
ctx.table[ctx.hash] = ctx.buffer[i]
}
ctx.hash = (ctx.hash << 4) ^ uint16(ctx.buffer[i])
}
readCount = copy(output, ctx.buffer[:i])
// Place any remaining bytes in the buffer
if uint(readCount) < i {
ctx.input = ctx.buffer[readCount:i]
}
return readCount, nil
}
}