Check-in [60ca5b4b7b]
Overview
Comment:Added hamming weight lookup table for bytes in package bits. Added a PoC predictor decompressor implementation
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:60ca5b4b7bd5302846cc8605ba4b844356c4b7a2
User & Date: spaskalev on 2014-12-16 01:55:53
Other Links: manifest | tags
Context
2014-12-16
04:03
Fixed issues with both compressor and decompressor, added more tests check-in: b838653282 user: spaskalev tags: trunk
01:55
Added hamming weight lookup table for bytes in package bits. Added a PoC predictor decompressor implementation check-in: 60ca5b4b7b user: spaskalev tags: trunk
2014-12-15
21:22
Added diff.D struct; Added an RFC1978 Predictor compressor implementation and a test check-in: 67794bdf14 user: spaskalev tags: trunk
Changes

Modified src/0dev.org/bits/bits.go from [415ad79153] to [3a25035ac5].

1
2
3
4
5
6






















7
8
9
10
11
12
13
// Package bits provides a bit vector interface and several implementations
package bits

import (
	"strconv"
)























// The Vector interface defines methods on the bit vector
type Vector interface {
	// Retrieves the bit at the designated position
	Peek(uint) bool
	// Sets the bit at the designated position
	Poke(uint, bool)
|





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
// Package bits provides various constructs that work with bits
package bits

import (
	"strconv"
)

var hamming = [256]byte{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}

// Returns the number of raised bits in the given byte
func Hamming(b byte) byte {
	return hamming[b]
}

// The Vector interface defines methods on the bit vector
type Vector interface {
	// Retrieves the bit at the designated position
	Peek(uint) bool
	// Sets the bit at the designated position
	Poke(uint, bool)

Modified src/0dev.org/bits/bits_test.go from [3c67937d6c] to [b5ed7a9a33].

1
2
3
4
5
6















7
8
9
10
11
12
13
package bits

import (
	"strconv"
	"testing"
)
















var sizes []uint = []uint{0, 31, 32, 33, 61, 63, 64, 127, 128, 129}

func TestBitSize(t *testing.T) {
	for _, size := range sizes {
		v := NewBit(size)
		if v.Len() < size || v.Len() > size+strconv.IntSize {






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
package bits

import (
	"strconv"
	"testing"
)

func TestHamming(t *testing.T) {
	for i := 0; i < 256; i++ {
		b, result := i, byte(0)
		for b > 0 {
			if (b & 1) > 0 {
				result++
			}
			b = b >> 1
		}
		if result != Hamming(byte(i)) {
			t.Error("Invalid hamming weight reported for ", i)
		}
	}
}

var sizes []uint = []uint{0, 31, 32, 33, 61, 63, 64, 127, 128, 129}

func TestBitSize(t *testing.T) {
	for _, size := range sizes {
		v := NewBit(size)
		if v.Len() < size || v.Len() > size+strconv.IntSize {

Modified src/0dev.org/predictor/predictor.go from [d4a72555aa] to [380067e183].

3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
..
94
95
96
97
98
99
100




















































































package predictor

import (
	"io"
)

type context struct {
	table [65536]byte
	hash  uint16
	input []byte

}

// Returns a closure over the provided writer that compresses data when called.
//
// It can buffer data as the predictor mandates 8-byte blocks with a header.
// A call with no data will force a flush.
func Compressor(writer io.Writer) func([]byte) error {
	var ctx context
	ctx.input = make([]byte, 8)

	// Forward declaration as it is required for recursion
	var write func(data []byte) error

	write = func(data []byte) error {
		var (
			err          error
................................................................................
			buf = buf[:1]
		}
		return nil
	}

	return write
}



























































































|
|
|
>








|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
..
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
package predictor

import (
	"io"
)

type context struct {
	table  [1 << 16]byte
	buffer [1 << 3]byte
	input  []byte
	hash   uint16
}

// Returns a closure over the provided writer that compresses data when called.
//
// It can buffer data as the predictor mandates 8-byte blocks with a header.
// A call with no data will force a flush.
func Compressor(writer io.Writer) func([]byte) error {
	var ctx context
	ctx.input = ctx.buffer[:]

	// Forward declaration as it is required for recursion
	var write func(data []byte) error

	write = func(data []byte) error {
		var (
			err          error
................................................................................
			buf = buf[:1]
		}
		return nil
	}

	return write
}

type reader func([]byte) (int, error)

func (r reader) Read(output []byte) (int, error) {
	return r(output)
}

// TODO - document
func Decompressor(reader io.Reader) reader {
	var ctx context
	ctx.input = ctx.buffer[:0]

	return func(output []byte) (int, error) {
		var (
			err       error
			flags     byte
			readCount int
		)

		// Sanity check for space to read into
		if len(output) == 0 {
			return 0, nil
		}

		// Check whether we have leftover data in the buffer
		if len(ctx.input) > 0 {
			readCount = copy(output, ctx.input)
			ctx.input = ctx.input[readCount:]
			return readCount, nil
		}

		// // The buffer will shrink as it empties, restore it if it is needed
		// if len(ctx.input) == 0 {
		// 	ctx.input = ctx.buffer[:1]
		// }

		// Read the flags
		readCount, err = reader.Read(ctx.buffer[:1])
		if readCount == 0 || err != nil {
			return readCount, err
		}

		// This is single-iteration only but it is fine according to io.Reader's contract ?!
		// TODO - read all bytes from a block based on the hamming weight of the flag
		// and just shuffle them for predictions instead of bite-sized reads ;)

		flags = ctx.buffer[0]

		var i uint = 0
		for ; i < 8; i++ {
			if flags&(1<<i) > 0 {
				// Guess was right
				ctx.buffer[i] = ctx.table[ctx.hash]
			} else {
				readCount, err = reader.Read(ctx.buffer[i:(i + 1)])

				if err == io.EOF {
					break
				}

				if err != nil {
					return readCount, err
				}

				if readCount == 0 { // treat as EoF
					break
				}

				ctx.table[ctx.hash] = ctx.buffer[i]
			}

			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.buffer[i])
		}

		readCount = copy(output, ctx.buffer[:i])

		// Place any remaining bytes in the buffer
		if uint(readCount) < i {
			ctx.input = ctx.buffer[readCount:i]
		}

		return readCount, nil
	}
}

Modified src/0dev.org/predictor/predictor_test.go from [f542b34031] to [05441cad24].

1
2
3
4
5

6
7
8
9
10
11
12
..
32
33
34
35
36
37
38
39










40
41
42
43
44
package predictor

import (
	diff "0dev.org/diff"
	"bytes"

	"testing"
)

func TestRFC(t *testing.T) {
	input := []byte{0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
................................................................................

	err = out([]byte{})
	if err != nil {
		t.Error(err)
	}

	result := buf.Bytes()
	delta := diff.Diff(diff.D{len(result), len(output), func(i, j int) bool { return result[i] == output[i] }})











	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
		t.Error("Unexpected compressed output", delta)
	}
}





>







 







|
>
>
>
>
>
>
>
>
>
>





1
2
3
4
5
6
7
8
9
10
11
12
13
..
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
package predictor

import (
	diff "0dev.org/diff"
	"bytes"
	"io/ioutil"
	"testing"
)

func TestRFC(t *testing.T) {
	input := []byte{0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
		0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0a,
................................................................................

	err = out([]byte{})
	if err != nil {
		t.Error(err)
	}

	result := buf.Bytes()
	delta := diff.Diff(diff.D{len(result), len(output), func(i, j int) bool { return result[i] == output[j] }})

	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
		t.Error("Unexpected compressed output", delta)
	}

	data := bytes.NewBuffer(result)
	in := Decompressor(data)

	result, err = ioutil.ReadAll(in)
	delta = diff.Diff(diff.D{len(result), len(input), func(i, j int) bool { return result[i] == input[j] }})

	if len(delta.Added) > 0 || len(delta.Removed) > 0 {
		t.Error("Unexpected compressed output", delta)
	}
}