Check-in [b838653282]
Overview
Comment:Fixed issues with both compressor and decompressor, added more tests
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:b838653282d5752f8d5ba9f78bcf5064d4b22d35
User & Date: spaskalev on 2014-12-16 04:03:55
Other Links: manifest | tags
Context
2014-12-16
16:04
Removed goto from predictor's compressor, added more tests that invoke both compress/decompress check-in: 10013ae789 user: spaskalev tags: trunk
04:03
Fixed issues with both compressor and decompressor, added more tests check-in: b838653282 user: spaskalev tags: trunk
01:55
Added hamming weight lookup table for bytes in package bits. Added a PoC predictor decompressor implementation check-in: 60ca5b4b7b user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [380067e183] to [f0ba5a860c].

    15     15   
    16     16   // Returns a closure over the provided writer that compresses data when called.
    17     17   //
    18     18   // It can buffer data as the predictor mandates 8-byte blocks with a header.
    19     19   // A call with no data will force a flush.
    20     20   func Compressor(writer io.Writer) func([]byte) error {
    21     21   	var ctx context
    22         -	ctx.input = ctx.buffer[:]
           22  +	ctx.input = ctx.buffer[:0]
    23     23   
    24     24   	// Forward declaration as it is required for recursion
    25     25   	var write func(data []byte) error
    26     26   
    27     27   	write = func(data []byte) error {
    28     28   		var (
    29     29   			err          error
    30     30   			blockSize    int = 8
    31     31   			bufferLength int = len(ctx.input)
    32     32   		)
    33     33   
    34     34   		// Force a flush if we are called with no data to write
    35     35   		if len(data) == 0 {
           36  +			if len(ctx.input) == 0 {
           37  +				return nil
           38  +			}
           39  +			data = ctx.input
           40  +
    36     41   			// We can't have more than 7 bytes in the buffer so this is safe
    37     42   			blockSize = len(ctx.input)
    38     43   			goto write
    39     44   		}
    40     45   
    41     46   		// Check if there are pending bytes in the buffer
    42         -		if bufferLength > 0 && bufferLength < 8 {
    43         -
           47  +		if len(data) < blockSize || bufferLength > 0 {
    44     48   			// Check whether we have enough bytes for a complete block
    45     49   			if len(data) > 8-bufferLength {
    46     50   				// Fill the buffer ...
    47     51   				ctx.input = append(ctx.input, data[:8-bufferLength]...)
    48     52   				// ... and recurse, calling ourselves with the full buffer
    49     53   				err = write(ctx.input)
    50     54   				if err != nil {
................................................................................
    68     72   				ctx.input = append(ctx.input, data...)
    69     73   				return nil
    70     74   			}
    71     75   		}
    72     76   
    73     77   	write:
    74     78   		var buf []byte = make([]byte, 1, blockSize+1)
    75         -		for block := 0; block < len(data)/blockSize; block++ {
           79  +
           80  +		var blocks int = len(data) / blockSize
           81  +		if blocks == 0 {
           82  +			blocks++
           83  +		}
           84  +
           85  +		for block := 0; block < blocks; block++ {
    76     86   			for i := 0; i < blockSize; i++ {
    77     87   				var current byte = data[(block*blockSize)+i]
    78     88   				if ctx.table[ctx.hash] == current {
    79     89   					// Guess was right - don't output
    80     90   					buf[0] |= 1 << uint(i)
    81     91   				} else {
    82     92   					// Guess was wrong, output char
................................................................................
    90    100   				return err
    91    101   			}
    92    102   
    93    103   			// Reset the flags and buffer for the next iteration
    94    104   			buf[0] ^= buf[0]
    95    105   			buf = buf[:1]
    96    106   		}
          107  +
          108  +		var remaining int = len(data) % blockSize
          109  +		if remaining > 0 {
          110  +			ctx.input = ctx.buffer[:remaining]
          111  +			copy(ctx.input, data[len(data)-remaining:])
          112  +		} else {
          113  +			ctx.input = ctx.buffer[:0]
          114  +		}
          115  +
    97    116   		return nil
    98    117   	}
    99    118   
   100    119   	return write
   101    120   }
   102    121   
   103    122   type reader func([]byte) (int, error)
   104    123   
   105    124   func (r reader) Read(output []byte) (int, error) {
   106    125   	return r(output)
   107    126   }
   108    127   
   109    128   // TODO - document
   110         -func Decompressor(reader io.Reader) reader {
          129  +func Decompressor(wrapped io.Reader) io.Reader {
   111    130   	var ctx context
   112    131   	ctx.input = ctx.buffer[:0]
   113    132   
   114         -	return func(output []byte) (int, error) {
          133  +	return reader(func(output []byte) (int, error) {
   115    134   		var (
   116    135   			err       error
   117    136   			flags     byte
   118    137   			readCount int
   119    138   		)
   120    139   
   121    140   		// Sanity check for space to read into
................................................................................
   126    145   		// Check whether we have leftover data in the buffer
   127    146   		if len(ctx.input) > 0 {
   128    147   			readCount = copy(output, ctx.input)
   129    148   			ctx.input = ctx.input[readCount:]
   130    149   			return readCount, nil
   131    150   		}
   132    151   
   133         -		// // The buffer will shrink as it empties, restore it if it is needed
   134         -		// if len(ctx.input) == 0 {
   135         -		// 	ctx.input = ctx.buffer[:1]
   136         -		// }
          152  +		// This is single-iteration only but it is fine according to io.Reader's contract ?!
          153  +		// TODO - read all bytes from a block based on the hamming weight of the flag
          154  +		// and just shuffle them for predictions instead of bite-sized reads ;)
   137    155   
   138    156   		// Read the flags
   139         -		readCount, err = reader.Read(ctx.buffer[:1])
          157  +		ctx.input = ctx.buffer[:1]
          158  +		readCount, err = wrapped.Read(ctx.input)
   140    159   		if readCount == 0 || err != nil {
   141    160   			return readCount, err
   142    161   		}
   143    162   
   144         -		// This is single-iteration only but it is fine according to io.Reader's contract ?!
   145         -		// TODO - read all bytes from a block based on the hamming weight of the flag
   146         -		// and just shuffle them for predictions instead of bite-sized reads ;)
   147         -
   148         -		flags = ctx.buffer[0]
          163  +		flags = ctx.input[0]
          164  +		ctx.input = ctx.buffer[:8]
   149    165   
   150    166   		var i uint = 0
   151    167   		for ; i < 8; i++ {
   152    168   			if flags&(1<<i) > 0 {
   153    169   				// Guess was right
   154         -				ctx.buffer[i] = ctx.table[ctx.hash]
          170  +				ctx.input[i] = ctx.table[ctx.hash]
   155    171   			} else {
   156         -				readCount, err = reader.Read(ctx.buffer[i:(i + 1)])
          172  +				readCount, err = wrapped.Read(ctx.input[i:(i + 1)])
   157    173   
   158    174   				if err == io.EOF {
   159    175   					break
   160    176   				}
   161    177   
   162    178   				if err != nil {
   163    179   					return readCount, err
   164    180   				}
   165    181   
   166    182   				if readCount == 0 { // treat as EoF
   167    183   					break
   168    184   				}
   169    185   
   170         -				ctx.table[ctx.hash] = ctx.buffer[i]
          186  +				ctx.table[ctx.hash] = ctx.input[i]
   171    187   			}
   172    188   
   173         -			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.buffer[i])
          189  +			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
   174    190   		}
   175    191   
   176         -		readCount = copy(output, ctx.buffer[:i])
          192  +		readCount = copy(output, ctx.input[:i])
   177    193   
   178    194   		// Place any remaining bytes in the buffer
   179    195   		if uint(readCount) < i {
   180         -			ctx.input = ctx.buffer[readCount:i]
          196  +			ctx.input = ctx.input[readCount:i]
          197  +		} else {
          198  +			ctx.input = ctx.buffer[:0]
   181    199   		}
   182    200   
   183    201   		return readCount, nil
   184         -	}
          202  +	})
   185    203   }

Modified src/0dev.org/predictor/predictor_test.go from [05441cad24] to [0cbbd433d4].

     3      3   import (
     4      4   	diff "0dev.org/diff"
     5      5   	"bytes"
     6      6   	"io/ioutil"
     7      7   	"testing"
     8      8   )
     9      9   
    10         -func TestRFC(t *testing.T) {
    11         -	input := []byte{0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
    12         -		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
    13         -		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
    14         -		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
    15         -		0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x0a,
    16         -		0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x0a,
    17         -		0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}
           10  +// Sample input from RFC1978 - PPP Predictor Compression Protocol
           11  +var input = []byte{0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
           12  +	0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
           13  +	0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
           14  +	0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
           15  +	0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x0a,
           16  +	0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x0a,
           17  +	0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}
    18     18   
    19         -	output := []byte{0x60, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x60,
    20         -		0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x6f, 0x41,
    21         -		0x0a, 0x6f, 0x41, 0x0a, 0x41, 0x42, 0x41, 0x42,
    22         -		0x41, 0x42, 0x0a, 0x60, 0x42, 0x41, 0x42, 0x41,
    23         -		0x42, 0x0a, 0x60, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}
           19  +// Sample output from RFC1978 - PPP Predictor Compression Protocol
           20  +var output = []byte{0x60, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x60,
           21  +	0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x6f, 0x41,
           22  +	0x0a, 0x6f, 0x41, 0x0a, 0x41, 0x42, 0x41, 0x42,
           23  +	0x41, 0x42, 0x0a, 0x60, 0x42, 0x41, 0x42, 0x41,
           24  +	0x42, 0x0a, 0x60, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}
    24     25   
    25         -	var buf bytes.Buffer
    26         -	var err error
           26  +func TestCompressor(t *testing.T) {
           27  +	var (
           28  +		buf bytes.Buffer
           29  +		err error
           30  +	)
    27     31   
    28     32   	out := Compressor(&buf)
    29     33   	err = out(input)
    30     34   	if err != nil {
    31     35   		t.Error(err)
    32     36   	}
    33     37   
    34         -	err = out([]byte{})
           38  +	err = out(nil)
    35     39   	if err != nil {
    36     40   		t.Error(err)
    37     41   	}
    38     42   
    39     43   	result := buf.Bytes()
    40     44   	delta := diff.Diff(diff.D{len(result), len(output), func(i, j int) bool { return result[i] == output[j] }})
    41     45   
    42     46   	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
    43     47   		t.Error("Unexpected compressed output", delta)
    44     48   	}
           49  +}
           50  +
           51  +func TestDecompressor(t *testing.T) {
           52  +	in := Decompressor(bytes.NewReader(output))
           53  +	result, err := ioutil.ReadAll(in)
           54  +	if err != nil {
           55  +		t.Error("Unexpected error while decompressing", err)
           56  +	}
           57  +
           58  +	delta := diff.Diff(diff.D{len(result), len(input),
           59  +		func(i, j int) bool { return result[i] == input[j] }})
           60  +
           61  +	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
           62  +		t.Error("Unexpected decompressed output", delta)
           63  +	}
           64  +}
           65  +
           66  +func TestPartial(t *testing.T) {
           67  +	var (
           68  +		input []byte = []byte{0, 1, 2, 3, 4, 5, 6}
           69  +		buf   bytes.Buffer
           70  +		err   error
           71  +	)
           72  +
           73  +	out := Compressor(&buf)
           74  +	err = out(input)
           75  +	if err != nil {
           76  +		t.Error(err)
           77  +	}
           78  +
           79  +	err = out(nil)
           80  +	if err != nil {
           81  +		t.Error(err)
           82  +	}
    45     83   
    46         -	data := bytes.NewBuffer(result)
    47         -	in := Decompressor(data)
           84  +	compressed := buf.Bytes()
           85  +	decompressed, err := ioutil.ReadAll(Decompressor(bytes.NewReader(compressed)))
    48     86   
    49         -	result, err = ioutil.ReadAll(in)
    50         -	delta = diff.Diff(diff.D{len(result), len(input), func(i, j int) bool { return result[i] == input[j] }})
           87  +	delta := diff.Diff(diff.D{len(input), len(decompressed),
           88  +		func(i, j int) bool { return input[i] == decompressed[j] }})
    51     89   
    52     90   	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
    53         -		t.Error("Unexpected compressed output", delta)
           91  +		t.Error("Unexpected decompressed output", delta)
           92  +		t.Errorf("%#x", input)
           93  +		t.Errorf("%#x", decompressed)
    54     94   	}
    55     95   }