Check-in [ae0940d072]
Overview
Comment:Read available bytes and the end of the buffer and decompress in a singe pass
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | decompressor2
Files: files | file ages | folders
SHA1: ae0940d072f9767465179b78fccf2e835b753f24
User & Date: spaskalev on 2014-12-22 14:23:31
Other Links: branch diff | manifest | tags
Context
2014-12-22
15:34
Fixed a nasty variable shadowing bug :) check-in: e9b80a705b user: spaskalev tags: decompressor2
14:23
Read available bytes and the end of the buffer and decompress in a singe pass check-in: ae0940d072 user: spaskalev tags: decompressor2
2014-12-21
23:26
Closing the decompressor2 branch as this implementation is slower than the naive one. check-in: 52e14c83da user: spaskalev tags: decompressor2
Changes

Modified src/0dev.org/predictor/predictor.go from [84146b7c8c] to [c018a46296].

121
122
123
124
125
126
127
128
129
130



131
132
133
134
135
136
137
138
139
140

141
142
143
144


145
146

147
148
149
150

151
152
153
154

155
156
157
158
159
160
161

162

163
164
165


166
167

168
169
170
171


172
173
174
175
176
177
178
179
180
181
182
183
184



185
186
187
188

189


190
191
192
193
194
195
196
197
198


199
200
201
202

203
204
205
206
207
208
209
210
211
121
122
123
124
125
126
127



128
129
130
131
132
133
134
135
136
137
138
139

140
141
142


143
144
145

146
147
148
149

150
151
152
153

154
155
156
157
158
159
160
161
162

163
164


165
166
167

168
169
170


171
172
173
174
175










176
177
178
179
180
181

182
183
184
185
186
187
188
189
190
191
192


193
194
195
196
197

198
199
200
201
202
203
204
205
206
207







-
-
-
+
+
+









-
+


-
-
+
+

-
+



-
+



-
+







+
-
+

-
-
+
+

-
+


-
-
+
+



-
-
-
-
-
-
-
-
-
-
+
+
+



-
+

+
+







-
-
+
+



-
+









// and decompresses data according to the predictor algorithm
func Decompressor(reader io.Reader) io.Reader {
	var ctx context
	ctx.input = make([]byte, 0, 8)

	return decompressor(func(output []byte) (int, error) {
		var (
			err                  error
			flags                byte
			readCount, available int
			err                      error
			flags                    byte
			rc, available, predicted int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			readCount = copy(output, ctx.input)
			rc = copy(output, ctx.input)

			// Check whether we still have leftover data in the buffer :)
			if readCount < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[readCount:])]
			if rc < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[rc:])]
			}
			return readCount, nil
			return rc, nil
		}

		// Read the next prediction header
		readCount, err = reader.Read(ctx.input[:1])
		rc, err = reader.Read(ctx.input[:1])
		// Fail on error unless it is EOF
		if err != nil && err != io.EOF {
			return 0, err
		} else if readCount == 0 {
		} else if rc == 0 {
			return 0, err
		}

		// Extend the buffer, copy the prediction header
		//  and calculate the number of subsequent bytes to read
		ctx.input = ctx.input[:8]
		flags = ctx.input[0]
		predicted = int(bits.Hamming(flags))
		available = 8 - int(bits.Hamming(flags))
		available = 8 - predicted

		// Read the non-predicted bytes according to header.
		readCount, err = reader.Read(ctx.input[:available])
		// Read the non-predicted bytes and place them in the end of the buffer
		rc, err = reader.Read(ctx.input[predicted:])
	retryData:
		if readCount < int(available) && err == nil {
		if rc < int(available) && err == nil {
			// Retry the read if we have fewer bytes than what the prediction header indicates
			var rc int
			rc, err = reader.Read(ctx.input[readCount:available])
			readCount += rc
			rc, err = reader.Read(ctx.input[predicted+rc:])
			rc += rc
			goto retryData
		} // Continue on any error, try to decompress and return it along the result

		// Spread the read bytes right to left to avoid overlapping
		for i, a := 7, available-1; i >= 0; i-- {
			if ((flags >> uint(i)) & 1) == 0 {
				ctx.input[i] = ctx.input[a]
				a--
			}
		}

		// Walk the buffer, fill in the predicted blanks and update the guess table
		for i := uint(0); i < 8; i++ {
		// Walk the buffer, filling in the predicted blanks,
		// relocating read bytes and and updating the guess table
		for i, a := uint(0), predicted; i < 8; i++ {
			if (flags & (1 << i)) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
				readCount++
				rc++
			} else {
				// Relocate a read byte
				ctx.input[i], a = ctx.input[a], a+1
				// Guess failed, update the table
				ctx.table[ctx.hash] = ctx.input[i]
			}
			// Update the hash
			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
		}

		// readCount now contains the precise amount of populated data
		ctx.input = ctx.input[:readCount]
		// rc now contains the precise amount of populated data
		ctx.input = ctx.input[:rc]
		available = copy(output, ctx.input)

		// Check for remaining bytes that dont fit in the output buffer
		if available < readCount {
		if available < rc {
			ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])]
		} else {
			// Clear the buffer
			ctx.input = ctx.input[:0]
		}

		return available, err
	})
}