Check-in [bd1368b81f]
Overview
Comment:Check in the new decompressor implementation in a separate branch
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | decompressor2
Files: files | file ages | folders
SHA1: bd1368b81facc3a0f23d6d04f2ed9aba7074e115
User & Date: spaskalev on 2014-12-21 22:12:43
Other Links: branch diff | manifest | tags
Context
2014-12-21
23:26
Closing the decompressor2 branch as this implementation is slower than the naive one. check-in: 52e14c83da user: spaskalev tags: decompressor2
22:12
Check in the new decompressor implementation in a separate branch check-in: bd1368b81f user: spaskalev tags: decompressor2
19:38
Added debug/pprof to ease basic cpu profiling check-in: 1a4bdf36e2 user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [d2a3bd9d21] to [84146b7c8c].

1
2
3
4
5

6
7
8
9
10
11
12
// Package predictor implements the predictor compression/decompression algorithm
// as specified by RFC1978 - PPP Predictor Compression Protocol
package predictor

import (

	"io"
)

type context struct {
	table [1 << 16]byte
	input []byte
	hash  uint16





>







1
2
3
4
5
6
7
8
9
10
11
12
13
// Package predictor implements the predictor compression/decompression algorithm
// as specified by RFC1978 - PPP Predictor Compression Protocol
package predictor

import (
	bits "0dev.org/bits"
	"io"
)

type context struct {
	table [1 << 16]byte
	input []byte
	hash  uint16
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
				return err
			}
			// ... and stage the rest of the data in the buffer
			ctx.input = append(ctx.input, data[blockSize-bufferLength:]...)
			return nil
		}

		// TODO allocate this on ctx.buffer ...
		var buf []byte = make([]byte, 1, blockSize+1)
		for block := 0; block < len(data)/blockSize; block++ {
			for i := 0; i < blockSize; i++ {
				var current byte = data[(block*blockSize)+i]
				if ctx.table[ctx.hash] == current {
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)







<







69
70
71
72
73
74
75

76
77
78
79
80
81
82
				return err
			}
			// ... and stage the rest of the data in the buffer
			ctx.input = append(ctx.input, data[blockSize-bufferLength:]...)
			return nil
		}


		var buf []byte = make([]byte, 1, blockSize+1)
		for block := 0; block < len(data)/blockSize; block++ {
			for i := 0; i < blockSize; i++ {
				var current byte = data[(block*blockSize)+i]
				if ctx.table[ctx.hash] == current {
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154



155
156
157
158


159
160

161





162
163
164
165
166
167
168


169


170

171
172
173
174
175
176
177

178


179
180
181
182
183
184
185
186
187


188
189
190
191
192
193

194
195
196
197
198
199
// Required to implement io.Reader
func (r decompressor) Read(output []byte) (int, error) {
	return r(output)
}

// Returns an io.Reader implementation that wraps the provided io.Reader
// and decompresses data according to the predictor algorithm
func Decompressor(wrapped io.Reader) io.Reader {
	var ctx context
	ctx.input = make([]byte, 0, 8)

	return decompressor(func(output []byte) (int, error) {
		var (
			err       error
			flags     byte
			readCount int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			readCount = copy(output, ctx.input)

			// Check whether we still have leftover data in the buffer :)
			if readCount < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[readCount:])]
			}
			return readCount, nil
		}

		// This is single-iteration only but it is fine according to io.Reader's contract ?!
		// TODO - read all bytes from a block based on the hamming weight of the flag
		// and just shuffle them for predictions instead of bite-sized reads ;)

		// Read the flags
		readCount, err = wrapped.Read(ctx.input[:1])



		if readCount == 0 || err != nil {
			return readCount, err
		}



		ctx.input = ctx.input[:8]
		flags = ctx.input[0]







		var i uint = 0
		for ; i < 8; i++ {
			if flags&(1<<i) > 0 {
				// Guess was right
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				readCount, err = wrapped.Read(ctx.input[i:(i + 1)])





				if err == io.EOF {

					break
				}

				if err != nil {
					return readCount, err
				}


				if readCount == 0 { // treat as EoF


					break
				}

				ctx.table[ctx.hash] = ctx.input[i]
			}

			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
		}



		readCount = copy(output, ctx.input[:i])

		// Place any remaining bytes in the buffer
		if uint(readCount) < i {
			ctx.input = ctx.input[readCount:i]
		} else {

			ctx.input = ctx.input[:0]
		}

		return readCount, nil
	})
}







|





|
|
|


















<
<
<
<
|
|
>
>
>
|
|


>
>


>

>
>
>
>
>
|
<
<
<
|
<
|
>
>

>
>
|
>
|
|
|
<
<
|
|
>
|
>
>
|
|
|


|



>
>
|

|
|
|

>



|


115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148




149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169



170

171
172
173
174
175
176
177
178
179
180
181


182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
// Required to implement io.Reader
func (r decompressor) Read(output []byte) (int, error) {
	return r(output)
}

// Returns an io.Reader implementation that wraps the provided io.Reader
// and decompresses data according to the predictor algorithm
func Decompressor(reader io.Reader) io.Reader {
	var ctx context
	ctx.input = make([]byte, 0, 8)

	return decompressor(func(output []byte) (int, error) {
		var (
			err                  error
			flags                byte
			readCount, available int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			readCount = copy(output, ctx.input)

			// Check whether we still have leftover data in the buffer :)
			if readCount < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[readCount:])]
			}
			return readCount, nil
		}





		// Read the next prediction header
		readCount, err = reader.Read(ctx.input[:1])
		// Fail on error unless it is EOF
		if err != nil && err != io.EOF {
			return 0, err
		} else if readCount == 0 {
			return 0, err
		}

		// Extend the buffer, copy the prediction header
		//  and calculate the number of subsequent bytes to read
		ctx.input = ctx.input[:8]
		flags = ctx.input[0]
		available = 8 - int(bits.Hamming(flags))

		// Read the non-predicted bytes according to header.
		readCount, err = reader.Read(ctx.input[:available])
	retryData:
		if readCount < int(available) && err == nil {
			// Retry the read if we have fewer bytes than what the prediction header indicates
			var rc int



			rc, err = reader.Read(ctx.input[readCount:available])

			readCount += rc
			goto retryData
		} // Continue on any error, try to decompress and return it along the result

		// Spread the read bytes right to left to avoid overlapping
		for i, a := 7, available-1; i >= 0; i-- {
			if ((flags >> uint(i)) & 1) == 0 {
				ctx.input[i] = ctx.input[a]
				a--
			}
		}



		// Walk the buffer, fill in the predicted blanks and update the guess table
		for i := uint(0); i < 8; i++ {
			if (flags & (1 << i)) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
				readCount++
			} else {
				// Guess failed, update the table
				ctx.table[ctx.hash] = ctx.input[i]
			}
			// Update the hash
			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
		}

		// readCount now contains the precise amount of populated data
		ctx.input = ctx.input[:readCount]
		available = copy(output, ctx.input)

		// Check for remaining bytes that dont fit in the output buffer
		if available < readCount {
			ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])]
		} else {
			// Clear the buffer
			ctx.input = ctx.input[:0]
		}

		return available, err
	})
}