Check-in [9dfd3cb1a2]
Overview
Comment:Extracted the predictor's hash function as a method of the context struct. Minor changes to the decompressor's variables.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 9dfd3cb1a202cf8747262b5d7e6dc243268f9b8e
User & Date: spaskalev on 2014-12-22 19:28:49
Other Links: manifest | tags
Context
2014-12-22
19:52
Added documentation for the decompressor check-in: 89bfe97384 user: spaskalev tags: trunk
19:28
Extracted the predictor's hash function as a method of the context struct. Minor changes to the decompressor's variables. check-in: 9dfd3cb1a2 user: spaskalev tags: trunk
17:15
Calculate the decompressed block length outside of the predictor loop check-in: 27ecac81d3 user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [46cd9d5cef] to [a4885df9dd].

8
9
10
11
12
13
14









15
16
17
18
19
20
21
)

type context struct {
	table [1 << 16]byte
	input []byte
	hash  uint16
}










type compressor func([]byte) error

func (w compressor) Write(data []byte) (int, error) {
	return len(data), w(data)
}








>
>
>
>
>
>
>
>
>







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
)

type context struct {
	table [1 << 16]byte
	input []byte
	hash  uint16
}

// The following hash code is the heart of the algorithm:
// It builds a sliding hash sum of the previous 3-and-a-bit
// characters which will be used to index the guess table.
// A better hash function would result in additional compression,
// at the expense of time.
func (ctx *context) update(val uint16) {
	ctx.hash = (ctx.hash << 4) ^ val
}

type compressor func([]byte) error

func (w compressor) Write(data []byte) (int, error) {
	return len(data), w(data)
}

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
				} else {
					// Guess was wrong, output char
					ctx.table[ctx.hash] = current
					buf = append(buf, current)
				}
				ctx.hash = (ctx.hash << 4) ^ uint16(current)
			}

			if _, err := writer.Write(buf); err != nil {
				return err
			}

			// Reset the flags and buffer for the next iteration







|







90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
				} else {
					// Guess was wrong, output char
					ctx.table[ctx.hash] = current
					buf = append(buf, current)
				}
				ctx.update(uint16(current))
			}

			if _, err := writer.Write(buf); err != nil {
				return err
			}

			// Reset the flags and buffer for the next iteration
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
// and decompresses data according to the predictor algorithm
func Decompressor(reader io.Reader) io.Reader {
	var ctx context
	ctx.input = make([]byte, 0, 8)

	return decompressor(func(output []byte) (int, error) {
		var (
			err                             error
			flags                           byte
			rc, available, predicted, total int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}








|
|
|







130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// and decompresses data according to the predictor algorithm
func Decompressor(reader io.Reader) io.Reader {
	var ctx context
	ctx.input = make([]byte, 0, 8)

	return decompressor(func(output []byte) (int, error) {
		var (
			err               error
			flags, predicted  byte
			rc, total, copied int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
			return total, err
		}

		// Extend the buffer, copy the prediction header
		//  and calculate the number of subsequent bytes to read
		ctx.input = ctx.input[:8]
		flags = ctx.input[0]
		predicted = int(bits.Hamming(flags))
		available = 8 - predicted

		// Read the non-predicted bytes and place them in the end of the buffer
		rc, err = reader.Read(ctx.input[predicted:])
	retryData:
		if rc < int(available) && err == nil {
			// Retry the read if we have fewer bytes than what the prediction header indicates
			var r int
			r, err = reader.Read(ctx.input[predicted+rc:])
			rc += r
			goto retryData
		} // Continue on any error, try to decompress and return it along the result

		// rc now contains the amount of actual bytes in this cycle (usually 8)
		rc += predicted

		// Walk the buffer, filling in the predicted blanks,
		// relocating read bytes and and updating the guess table
		for i, a := 0, predicted; i < rc; i++ {
			if (flags & (1 << uint(i))) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				// Relocate a read byte
				ctx.input[i], a = ctx.input[a], a+1
				// Guess failed, update the table
				ctx.table[ctx.hash] = ctx.input[i]
			}
			// Update the hash
			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
		}

		// Copy the decompressed data to the output
		ctx.input = ctx.input[:rc]
		available = copy(output, ctx.input)

		total += available

		// Check for remaining bytes that dont fit in the output buffer
		if available < rc {
			ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])]
		} else {
			// Clear the buffer
			ctx.input = ctx.input[:0]

			output = output[available:]
			if len(output) > 0 && err == nil {
				goto readHeader
			}
		}

		return total, err
	})
}







|
<




|


|





|














|




|

|


|
|




|








165
166
167
168
169
170
171
172

173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
			return total, err
		}

		// Extend the buffer, copy the prediction header
		//  and calculate the number of subsequent bytes to read
		ctx.input = ctx.input[:8]
		flags = ctx.input[0]
		predicted = bits.Hamming(flags)


		// Read the non-predicted bytes and place them in the end of the buffer
		rc, err = reader.Read(ctx.input[predicted:])
	retryData:
		if rc < int(8-predicted) && err == nil {
			// Retry the read if we have fewer bytes than what the prediction header indicates
			var r int
			r, err = reader.Read(ctx.input[int(predicted)+rc:])
			rc += r
			goto retryData
		} // Continue on any error, try to decompress and return it along the result

		// rc now contains the amount of actual bytes in this cycle (usually 8)
		rc += int(predicted)

		// Walk the buffer, filling in the predicted blanks,
		// relocating read bytes and and updating the guess table
		for i, a := 0, predicted; i < rc; i++ {
			if (flags & (1 << uint(i))) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				// Relocate a read byte
				ctx.input[i], a = ctx.input[a], a+1
				// Guess failed, update the table
				ctx.table[ctx.hash] = ctx.input[i]
			}
			// Update the hash
			ctx.update(uint16(ctx.input[i]))
		}

		// Copy the decompressed data to the output
		ctx.input = ctx.input[:rc]
		copied = copy(output, ctx.input)

		total += copied

		// Check for remaining bytes that dont fit in the output buffer
		if copied < rc {
			ctx.input = ctx.input[:copy(ctx.input, ctx.input[copied:])]
		} else {
			// Clear the buffer
			ctx.input = ctx.input[:0]

			output = output[copied:]
			if len(output) > 0 && err == nil {
				goto readHeader
			}
		}

		return total, err
	})
}