Check-in [b838653282]
Overview
Comment:Fixed issues with both compressor and decompressor, added more tests
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: b838653282d5752f8d5ba9f78bcf5064d4b22d35
User & Date: spaskalev on 2014-12-16 04:03:55
Other Links: manifest | tags
Context
2014-12-16
16:04
Removed goto from predictor's compressor, added more tests that invoke both compress/decompress check-in: 10013ae789 user: spaskalev tags: trunk
04:03
Fixed issues with both compressor and decompressor, added more tests check-in: b838653282 user: spaskalev tags: trunk
01:55
Added hamming weight lookup table for bytes in package bits. Added a PoC predictor decompressor implementation check-in: 60ca5b4b7b user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [380067e183] to [f0ba5a860c].

15
16
17
18
19
20
21
22

23
24
25
26
27
28
29
30
31
32
33
34
35





36
37
38
39
40
41
42

43
44
45
46
47
48
49
50
15
16
17
18
19
20
21

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

47

48
49
50
51
52
53
54







-
+













+
+
+
+
+






-
+
-








// Returns a closure over the provided writer that compresses data when called.
//
// It can buffer data as the predictor mandates 8-byte blocks with a header.
// A call with no data will force a flush.
func Compressor(writer io.Writer) func([]byte) error {
	var ctx context
	ctx.input = ctx.buffer[:]
	ctx.input = ctx.buffer[:0]

	// Forward declaration as it is required for recursion
	var write func(data []byte) error

	write = func(data []byte) error {
		var (
			err          error
			blockSize    int = 8
			bufferLength int = len(ctx.input)
		)

		// Force a flush if we are called with no data to write
		if len(data) == 0 {
			if len(ctx.input) == 0 {
				return nil
			}
			data = ctx.input

			// We can't have more than 7 bytes in the buffer so this is safe
			blockSize = len(ctx.input)
			goto write
		}

		// Check if there are pending bytes in the buffer
		if bufferLength > 0 && bufferLength < 8 {
		if len(data) < blockSize || bufferLength > 0 {

			// Check whether we have enough bytes for a complete block
			if len(data) > 8-bufferLength {
				// Fill the buffer ...
				ctx.input = append(ctx.input, data[:8-bufferLength]...)
				// ... and recurse, calling ourselves with the full buffer
				err = write(ctx.input)
				if err != nil {
68
69
70
71
72
73
74






75

76
77
78
79
80
81
82
72
73
74
75
76
77
78
79
80
81
82
83
84

85
86
87
88
89
90
91
92







+
+
+
+
+
+
-
+







				ctx.input = append(ctx.input, data...)
				return nil
			}
		}

	write:
		var buf []byte = make([]byte, 1, blockSize+1)

		var blocks int = len(data) / blockSize
		if blocks == 0 {
			blocks++
		}

		for block := 0; block < len(data)/blockSize; block++ {
		for block := 0; block < blocks; block++ {
			for i := 0; i < blockSize; i++ {
				var current byte = data[(block*blockSize)+i]
				if ctx.table[ctx.hash] == current {
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
				} else {
					// Guess was wrong, output char
90
91
92
93
94
95
96









97
98
99
100
101
102
103
104
105
106
107
108
109
110

111
112
113
114

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135



136
137
138

139

140
141
142
143
144
145
146
147
148


149
150
151
152
153
154

155
156

157
158
159
160
161
162
163
164
165
166
167
168
169
170

171
172
173

174
175
176

177
178
179


180

181
182
183
184

185
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

129
130
131
132

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151



152
153
154

155
156
157

158
159
160
161
162





163
164
165
166
167
168
169

170
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185

186
187
188

189
190
191

192
193
194
195
196
197

198
199
200
201

202
203







+
+
+
+
+
+
+
+
+













-
+



-
+


















-
-
-
+
+
+
-


+
-
+




-
-
-
-
-
+
+





-
+

-
+













-
+


-
+


-
+



+
+
-
+



-
+

				return err
			}

			// Reset the flags and buffer for the next iteration
			buf[0] ^= buf[0]
			buf = buf[:1]
		}

		var remaining int = len(data) % blockSize
		if remaining > 0 {
			ctx.input = ctx.buffer[:remaining]
			copy(ctx.input, data[len(data)-remaining:])
		} else {
			ctx.input = ctx.buffer[:0]
		}

		return nil
	}

	return write
}

type reader func([]byte) (int, error)

func (r reader) Read(output []byte) (int, error) {
	return r(output)
}

// TODO - document
func Decompressor(reader io.Reader) reader {
func Decompressor(wrapped io.Reader) io.Reader {
	var ctx context
	ctx.input = ctx.buffer[:0]

	return func(output []byte) (int, error) {
	return reader(func(output []byte) (int, error) {
		var (
			err       error
			flags     byte
			readCount int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			readCount = copy(output, ctx.input)
			ctx.input = ctx.input[readCount:]
			return readCount, nil
		}

		// // The buffer will shrink as it empties, restore it if it is needed
		// if len(ctx.input) == 0 {
		// 	ctx.input = ctx.buffer[:1]
		// This is single-iteration only but it is fine according to io.Reader's contract ?!
		// TODO - read all bytes from a block based on the hamming weight of the flag
		// and just shuffle them for predictions instead of bite-sized reads ;)
		// }

		// Read the flags
		ctx.input = ctx.buffer[:1]
		readCount, err = reader.Read(ctx.buffer[:1])
		readCount, err = wrapped.Read(ctx.input)
		if readCount == 0 || err != nil {
			return readCount, err
		}

		// This is single-iteration only but it is fine according to io.Reader's contract ?!
		// TODO - read all bytes from a block based on the hamming weight of the flag
		// and just shuffle them for predictions instead of bite-sized reads ;)

		flags = ctx.buffer[0]
		flags = ctx.input[0]
		ctx.input = ctx.buffer[:8]

		var i uint = 0
		for ; i < 8; i++ {
			if flags&(1<<i) > 0 {
				// Guess was right
				ctx.buffer[i] = ctx.table[ctx.hash]
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				readCount, err = reader.Read(ctx.buffer[i:(i + 1)])
				readCount, err = wrapped.Read(ctx.input[i:(i + 1)])

				if err == io.EOF {
					break
				}

				if err != nil {
					return readCount, err
				}

				if readCount == 0 { // treat as EoF
					break
				}

				ctx.table[ctx.hash] = ctx.buffer[i]
				ctx.table[ctx.hash] = ctx.input[i]
			}

			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.buffer[i])
			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
		}

		readCount = copy(output, ctx.buffer[:i])
		readCount = copy(output, ctx.input[:i])

		// Place any remaining bytes in the buffer
		if uint(readCount) < i {
			ctx.input = ctx.input[readCount:i]
		} else {
			ctx.input = ctx.buffer[readCount:i]
			ctx.input = ctx.buffer[:0]
		}

		return readCount, nil
	}
	})
}

Modified src/0dev.org/predictor/predictor_test.go from [05441cad24] to [0cbbd433d4].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17








18

19
20
21
22
23





24


25
26



27
28
29
30
31
32
33
34

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50











51
52
53
































54
55
1
2
3
4
5
6
7
8
9








10
11
12
13
14
15
16
17
18
19





20
21
22
23
24
25
26
27


28
29
30
31
32
33
34
35
36
37

38
39
40
41
42
43
44
45
46
47
48






49
50
51
52
53
54
55
56
57
58
59
60
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95









-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

+
-
-
-
-
-
+
+
+
+
+

+
+
-
-
+
+
+







-
+










-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+


-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


package predictor

import (
	diff "0dev.org/diff"
	"bytes"
	"io/ioutil"
	"testing"
)

func TestRFC(t *testing.T) {
	input := []byte{0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x0a,
		0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x0a,
		0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}
// Sample input from RFC1978 - PPP Predictor Compression Protocol
var input = []byte{0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
	0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
	0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
	0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
	0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x0a,
	0x42, 0x41, 0x42, 0x41, 0x42, 0x41, 0x42, 0x0a,
	0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}

// Sample output from RFC1978 - PPP Predictor Compression Protocol
	output := []byte{0x60, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x60,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x6f, 0x41,
		0x0a, 0x6f, 0x41, 0x0a, 0x41, 0x42, 0x41, 0x42,
		0x41, 0x42, 0x0a, 0x60, 0x42, 0x41, 0x42, 0x41,
		0x42, 0x0a, 0x60, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}
var output = []byte{0x60, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x60,
	0x41, 0x41, 0x41, 0x41, 0x41, 0x0a, 0x6f, 0x41,
	0x0a, 0x6f, 0x41, 0x0a, 0x41, 0x42, 0x41, 0x42,
	0x41, 0x42, 0x0a, 0x60, 0x42, 0x41, 0x42, 0x41,
	0x42, 0x0a, 0x60, 0x78, 0x78, 0x78, 0x78, 0x78, 0x0a}

func TestCompressor(t *testing.T) {
	var (
	var buf bytes.Buffer
	var err error
		buf bytes.Buffer
		err error
	)

	out := Compressor(&buf)
	err = out(input)
	if err != nil {
		t.Error(err)
	}

	err = out([]byte{})
	err = out(nil)
	if err != nil {
		t.Error(err)
	}

	result := buf.Bytes()
	delta := diff.Diff(diff.D{len(result), len(output), func(i, j int) bool { return result[i] == output[j] }})

	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
		t.Error("Unexpected compressed output", delta)
	}

	data := bytes.NewBuffer(result)
	in := Decompressor(data)

	result, err = ioutil.ReadAll(in)
	delta = diff.Diff(diff.D{len(result), len(input), func(i, j int) bool { return result[i] == input[j] }})
}

func TestDecompressor(t *testing.T) {
	in := Decompressor(bytes.NewReader(output))
	result, err := ioutil.ReadAll(in)
	if err != nil {
		t.Error("Unexpected error while decompressing", err)
	}

	delta := diff.Diff(diff.D{len(result), len(input),
		func(i, j int) bool { return result[i] == input[j] }})

	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
		t.Error("Unexpected compressed output", delta)
		t.Error("Unexpected decompressed output", delta)
	}
}

func TestPartial(t *testing.T) {
	var (
		input []byte = []byte{0, 1, 2, 3, 4, 5, 6}
		buf   bytes.Buffer
		err   error
	)

	out := Compressor(&buf)
	err = out(input)
	if err != nil {
		t.Error(err)
	}

	err = out(nil)
	if err != nil {
		t.Error(err)
	}

	compressed := buf.Bytes()
	decompressed, err := ioutil.ReadAll(Decompressor(bytes.NewReader(compressed)))

	delta := diff.Diff(diff.D{len(input), len(decompressed),
		func(i, j int) bool { return input[i] == decompressed[j] }})

	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
		t.Error("Unexpected decompressed output", delta)
		t.Errorf("%#x", input)
		t.Errorf("%#x", decompressed)
	}
}