Check-in [630530df49]
Overview
Comment:Removed TODOs, renamed readCount->rc, wrapped->reader
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 630530df496a3b03f5655e2176c34952717d8779
User & Date: spaskalev on 2014-12-21 22:52:05
Other Links: manifest | tags
Context
2014-12-21
23:24
The decompressor now tries to fill in the whole output buffer in a single call. check-in: 1f73d90f65 user: spaskalev tags: trunk
22:52
Removed TODOs, renamed readCount->rc, wrapped->reader check-in: 630530df49 user: spaskalev tags: trunk
19:38
Added debug/pprof to ease basic cpu profiling check-in: 1a4bdf36e2 user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [d2a3bd9d21] to [090d2f3fd4].

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
68
69
70
71
72
73
74

75
76
77
78
79
80
81







-







				return err
			}
			// ... and stage the rest of the data in the buffer
			ctx.input = append(ctx.input, data[blockSize-bufferLength:]...)
			return nil
		}

		// TODO allocate this on ctx.buffer ...
		var buf []byte = make([]byte, 1, blockSize+1)
		for block := 0; block < len(data)/blockSize; block++ {
			for i := 0; i < blockSize; i++ {
				var current byte = data[(block*blockSize)+i]
				if ctx.table[ctx.hash] == current {
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129
130

131
132
133
134
135
136
137
138
139
140

141
142
143
144


145
146

147
148
149
150
151
152
153
154
155
156



157
158
159
160
161
162
163
164
165
166
167
168

169
170
171
172
173
174
175

176
177
178

179
180
181
182
183
184
185
186
187
188

189


190
191
192


193
194
195
196
197

198
199
114
115
116
117
118
119
120

121
122
123
124
125
126
127
128

129
130
131
132
133
134
135
136
137
138

139
140
141


142
143
144

145
146
147




148



149
150
151
152
153
154
155
156
157
158
159
160
161
162

163
164
165
166
167
168
169

170
171
172

173
174
175
176
177
178
179
180
181
182

183
184
185
186
187


188
189
190
191
192
193

194
195
196







-
+







-
+









-
+


-
-
+
+

-
+


-
-
-
-

-
-
-
+
+
+











-
+






-
+


-
+









-
+

+
+

-
-
+
+




-
+


// Required to implement io.Reader
func (r decompressor) Read(output []byte) (int, error) {
	return r(output)
}

// Returns an io.Reader implementation that wraps the provided io.Reader
// and decompresses data according to the predictor algorithm
func Decompressor(wrapped io.Reader) io.Reader {
func Decompressor(reader io.Reader) io.Reader {
	var ctx context
	ctx.input = make([]byte, 0, 8)

	return decompressor(func(output []byte) (int, error) {
		var (
			err       error
			flags     byte
			readCount int
			rc, total int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			readCount = copy(output, ctx.input)
			rc = copy(output, ctx.input)

			// Check whether we still have leftover data in the buffer :)
			if readCount < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[readCount:])]
			if rc < len(ctx.input) {
				ctx.input = ctx.input[:copy(ctx.input, ctx.input[rc:])]
			}
			return readCount, nil
			return rc, nil
		}

		// This is single-iteration only but it is fine according to io.Reader's contract ?!
		// TODO - read all bytes from a block based on the hamming weight of the flag
		// and just shuffle them for predictions instead of bite-sized reads ;)

		// Read the flags
		readCount, err = wrapped.Read(ctx.input[:1])
		if readCount == 0 || err != nil {
			return readCount, err
		rc, err = reader.Read(ctx.input[:1])
		if rc == 0 || (err != nil && err != io.EOF) {
			return rc, err
		}

		ctx.input = ctx.input[:8]
		flags = ctx.input[0]

		var i uint = 0
		for ; i < 8; i++ {
			if flags&(1<<i) > 0 {
				// Guess was right
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				readCount, err = wrapped.Read(ctx.input[i:(i + 1)])
				rc, err = reader.Read(ctx.input[i:(i + 1)])

				if err == io.EOF {
					break
				}

				if err != nil {
					return readCount, err
					return rc, err
				}

				if readCount == 0 { // treat as EoF
				if rc == 0 { // treat as EoF
					break
				}

				ctx.table[ctx.hash] = ctx.input[i]
			}

			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
		}

		readCount = copy(output, ctx.input[:i])
		rc = copy(output, ctx.input[:i])

		total += rc

		// Place any remaining bytes in the buffer
		if uint(readCount) < i {
			ctx.input = ctx.input[readCount:i]
		if uint(rc) < i {
			ctx.input = ctx.input[:copy(ctx.input, ctx.input[rc:i])]
		} else {
			ctx.input = ctx.input[:0]
		}

		return readCount, nil
		return total, err
	})
}