Check-in [89bfe97384]
Overview
Comment:Added documentation for the decompressor
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 89bfe973847c1cc1e9bf81a1d583bbeabd4f444c
User & Date: spaskalev on 2014-12-22 19:52:05
Other Links: manifest | tags
Context
2014-12-23
07:52
Added package ioutil with io.Writer and io.Reader function wrappers check-in: 2bcd5307ea user: spaskalev tags: trunk
2014-12-22
19:52
Added documentation for the decompressor check-in: 89bfe97384 user: spaskalev tags: trunk
19:28
Extracted the predictor's hash function as a method of the context struct. Minor changes to the decompressor's variables. check-in: 9dfd3cb1a2 user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [a4885df9dd] to [290c8bbbe5].

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
}

// The following hash code is the heart of the algorithm:
// It builds a sliding hash sum of the previous 3-and-a-bit
// characters which will be used to index the guess table.
// A better hash function would result in additional compression,
// at the expense of time.
func (ctx *context) update(val uint16) {
	ctx.hash = (ctx.hash << 4) ^ val
}

type compressor func([]byte) error

func (w compressor) Write(data []byte) (int, error) {
	return len(data), w(data)
}







|
|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
}

// The following hash code is the heart of the algorithm:
// It builds a sliding hash sum of the previous 3-and-a-bit
// characters which will be used to index the guess table.
// A better hash function would result in additional compression,
// at the expense of time.
func (ctx *context) update(val byte) {
	ctx.hash = (ctx.hash << 4) ^ uint16(val)
}

type compressor func([]byte) error

func (w compressor) Write(data []byte) (int, error) {
	return len(data), w(data)
}
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
				} else {
					// Guess was wrong, output char
					ctx.table[ctx.hash] = current
					buf = append(buf, current)
				}
				ctx.update(uint16(current))
			}

			if _, err := writer.Write(buf); err != nil {
				return err
			}

			// Reset the flags and buffer for the next iteration







|







90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
				} else {
					// Guess was wrong, output char
					ctx.table[ctx.hash] = current
					buf = append(buf, current)
				}
				ctx.update(current)
			}

			if _, err := writer.Write(buf); err != nil {
				return err
			}

			// Reset the flags and buffer for the next iteration
146
147
148
149
150
151
152


153
154
155
156
157
158
159

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			rc = copy(output, ctx.input)

			// Check whether we still have leftover data in the buffer :)
			if rc < len(ctx.input) {


				ctx.input = ctx.input[:copy(ctx.input, ctx.input[rc:])]
			}
			return rc, nil
		}

		// Read the next prediction header
	readHeader:







>
>







146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			rc = copy(output, ctx.input)

			// Check whether we still have leftover data in the buffer :)
			if rc < len(ctx.input) {
				// Shift the remaining bytes at the start of the buffer
				//  and resize the buffer accordingly
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[rc:])]
			}
			return rc, nil
		}

		// Read the next prediction header
	readHeader:
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211


212
213
214
215
216

217
218
219
220
221
222
223
224
225
		// Walk the buffer, filling in the predicted blanks,
		// relocating read bytes and and updating the guess table
		for i, a := 0, predicted; i < rc; i++ {
			if (flags & (1 << uint(i))) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				// Relocate a read byte
				ctx.input[i], a = ctx.input[a], a+1
				// Guess failed, update the table
				ctx.table[ctx.hash] = ctx.input[i]
			}
			// Update the hash
			ctx.update(uint16(ctx.input[i]))
		}

		// Copy the decompressed data to the output
		ctx.input = ctx.input[:rc]
		copied = copy(output, ctx.input)

		total += copied

		// Check for remaining bytes that dont fit in the output buffer
		if copied < rc {


			ctx.input = ctx.input[:copy(ctx.input, ctx.input[copied:])]
		} else {
			// Clear the buffer
			ctx.input = ctx.input[:0]


			output = output[copied:]
			if len(output) > 0 && err == nil {
				goto readHeader
			}
		}

		return total, err
	})
}







|





|


|
<
|
<




>
>
|




>









190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206

207

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
		// Walk the buffer, filling in the predicted blanks,
		// relocating read bytes and and updating the guess table
		for i, a := 0, predicted; i < rc; i++ {
			if (flags & (1 << uint(i))) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				// Relocate a read byte and advance the read byte index
				ctx.input[i], a = ctx.input[a], a+1
				// Guess failed, update the table
				ctx.table[ctx.hash] = ctx.input[i]
			}
			// Update the hash
			ctx.update(ctx.input[i])
		}

		// Copy the decompressed data to the output and accumulate the count

		copied = copy(output, ctx.input[:rc])

		total += copied

		// Check for remaining bytes that dont fit in the output buffer
		if copied < rc {
			// Shift the remaining bytes at the start of the buffer
			//  and resize the buffer accordingly
			ctx.input = ctx.input[:copy(ctx.input, ctx.input[copied:rc])]
		} else {
			// Clear the buffer
			ctx.input = ctx.input[:0]

			// Loop for another pass if there is available space in the output
			output = output[copied:]
			if len(output) > 0 && err == nil {
				goto readHeader
			}
		}

		return total, err
	})
}