Changes On Branch 42ba1f458d7b62fd

Changes In Branch decompressor2 Excluding Merge-Ins

This is equivalent to a diff from 1a4bdf36e2 to 42ba1f458d

2014-12-22
16:41
Integrate the decompressor2 branch into trunk now that it is faster. check-in: 6d10a1d28f user: spaskalev tags: trunk
16:35
Decompressor - try to fill as much as possible in the output buffer in a single pass. Closed-Leaf check-in: 42ba1f458d user: spaskalev tags: decompressor2
15:34
Fixed a nasty variable shadowing bug :) check-in: e9b80a705b user: spaskalev tags: decompressor2
2014-12-21
22:52
Removed TODOs, renamed readCount->rc, wrapped->reader check-in: 630530df49 user: spaskalev tags: trunk
22:12
Check in the new decompressor implementation in a separate branch check-in: bd1368b81f user: spaskalev tags: decompressor2
19:38
Added debug/pprof to ease basic cpu profiling check-in: 1a4bdf36e2 user: spaskalev tags: trunk
17:23
Fixed a rare case of losing data from the decompressor's internal result buffer. check-in: 7b74fd57f8 user: spaskalev tags: trunk

Modified src/0dev.org/predictor/predictor.go from [d2a3bd9d21] to [71e92568a2].

1
2
3
4
5

6
7
8
9
10
11
12
1
2
3
4
5
6
7
8
9
10
11
12
13





+







// Package predictor implements the predictor compression/decompression algorithm
// as specified by RFC1978 - PPP Predictor Compression Protocol
package predictor

import (
	bits "0dev.org/bits"
	"io"
)

type context struct {
	table [1 << 16]byte
	input []byte
	hash  uint16
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
69
70
71
72
73
74
75

76
77
78
79
80
81
82







-







				return err
			}
			// ... and stage the rest of the data in the buffer
			ctx.input = append(ctx.input, data[blockSize-bufferLength:]...)
			return nil
		}

		// TODO allocate this on ctx.buffer ...
		var buf []byte = make([]byte, 1, blockSize+1)
		for block := 0; block < len(data)/blockSize; block++ {
			for i := 0; i < blockSize; i++ {
				var current byte = data[(block*blockSize)+i]
				if ctx.table[ctx.hash] == current {
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129
130



131
132
133
134
135
136
137
138
139
140

141
142
143
144


145
146

147
148
149
150
151
152
153
154
155
156








157
158


159
160


161





162
163
164
165











166

167
168
169

170
171
172
173

174
175
176
177

178
179
180
181
182
183
184

185
186
187


188

189

190
191
192




193

194
195
196
197








198
199
115
116
117
118
119
120
121

122
123
124
125
126
127



128
129
130
131
132
133
134
135
136
137
138
139

140
141
142


143
144
145

146
147
148








149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170




171
172
173
174
175
176
177
178
179
180
181
182
183
184


185




186




187




188
189

190
191
192
193
194
195

196
197
198



199
200
201
202
203
204
205



206
207
208
209
210
211
212
213
214
215







-
+





-
-
-
+
+
+









-
+


-
-
+
+

-
+


-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+


+
+


+
+

+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+

+

-
-
+
-
-
-
-
+
-
-
-
-
+
-
-
-
-


-
+



+
+
-
+

+
-
-
-
+
+
+
+

+

-
-
-
+
+
+
+
+
+
+
+


// Required to implement io.Reader
func (r decompressor) Read(output []byte) (int, error) {
	return r(output)
}

// Returns an io.Reader implementation that wraps the provided io.Reader
// and decompresses data according to the predictor algorithm
func Decompressor(wrapped io.Reader) io.Reader {
func Decompressor(reader io.Reader) io.Reader {
	var ctx context
	ctx.input = make([]byte, 0, 8)

	return decompressor(func(output []byte) (int, error) {
		var (
			err       error
			flags     byte
			readCount int
			err                             error
			flags                           byte
			rc, available, predicted, total int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			readCount = copy(output, ctx.input)
			rc = copy(output, ctx.input)

			// Check whether we still have leftover data in the buffer :)
			if readCount < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[readCount:])]
			if rc < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[rc:])]
			}
			return readCount, nil
			return rc, nil
		}

		// This is single-iteration only but it is fine according to io.Reader's contract ?!
		// TODO - read all bytes from a block based on the hamming weight of the flag
		// and just shuffle them for predictions instead of bite-sized reads ;)

		// Read the flags
		readCount, err = wrapped.Read(ctx.input[:1])
		if readCount == 0 || err != nil {
			return readCount, err
		// Read the next prediction header
	readHeader:
		rc, err = reader.Read(ctx.input[:1])
		// Fail on error unless it is EOF
		if err != nil && err != io.EOF {
			return total, err
		} else if rc == 0 {
			return total, err
		}

		// Extend the buffer, copy the prediction header
		//  and calculate the number of subsequent bytes to read
		ctx.input = ctx.input[:8]
		flags = ctx.input[0]
		predicted = int(bits.Hamming(flags))
		available = 8 - predicted

		// Read the non-predicted bytes and place them in the end of the buffer
		rc, err = reader.Read(ctx.input[predicted:])
	retryData:
		if rc < int(available) && err == nil {
			// Retry the read if we have fewer bytes than what the prediction header indicates
		var i uint = 0
		for ; i < 8; i++ {
			if flags&(1<<i) > 0 {
				// Guess was right
			var r int
			r, err = reader.Read(ctx.input[predicted+rc:])
			rc += r
			goto retryData
		} // Continue on any error, try to decompress and return it along the result

		// Walk the buffer, filling in the predicted blanks,
		// relocating read bytes and and updating the guess table
		for i, a := uint(0), predicted; i < 8; i++ {
			if (flags & (1 << i)) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
				rc++
			} else {
				readCount, err = wrapped.Read(ctx.input[i:(i + 1)])

				// Relocate a read byte
				if err == io.EOF {
					break
				}

				ctx.input[i], a = ctx.input[a], a+1
				if err != nil {
					return readCount, err
				}

				// Guess failed, update the table
				if readCount == 0 { // treat as EoF
					break
				}

				ctx.table[ctx.hash] = ctx.input[i]
			}

			// Update the hash
			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
		}

		// rc now contains the precise amount of populated data
		ctx.input = ctx.input[:rc]
		readCount = copy(output, ctx.input[:i])
		available = copy(output, ctx.input)

		total += available
		// Place any remaining bytes in the buffer
		if uint(readCount) < i {
			ctx.input = ctx.input[readCount:i]

		// Check for remaining bytes that dont fit in the output buffer
		if available < rc {
			ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])]
		} else {
			// Clear the buffer
			ctx.input = ctx.input[:0]
		}

		return readCount, nil

			output = output[available:]
			if len(output) > 0 && err == nil {
				goto readHeader
			}
		}

		return total, err
	})
}