Check-in [50507bd510]
Overview
Comment:Extracted predictor's compressor and decompressor code into separate structs that embed Sized{Writer,Reader}
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 50507bd510ea785ba85b12897d7c3be04f114166
User & Date: spaskalev on 2014-12-25 00:43:31
Other Links: manifest | tags
Context
2014-12-25
00:55
Predictor's compressor and decompressor structures now implement io.Writer/io.Reader in order to deal away with function indirection but they do not follow the required semantics. Those are provided by the SizedWriter/SizedReader wrappers returned by the constructor functions. check-in: 4dfcff962c user: spaskalev tags: trunk
00:43
Extracted predictor's compressor and decompressor code into separate structs that embed Sized{Writer,Reader} check-in: 50507bd510 user: spaskalev tags: trunk
00:26
Extracted SizedWriter to a sizedWriter struct with a Write() method. check-in: 46da7a6ae9 user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [43e7f751ed] to [cb877636af].

1
2
3
4
5
6
7
8
9
10


11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31







32


33

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78


79

80
81








82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// Package predictor implements the predictor compression/decompression algorithm
// as specified by RFC1978 - PPP Predictor Compression Protocol
package predictor

import (
	bits "0dev.org/bits"
	iou "0dev.org/ioutil"
	"io"
)



type context struct {
	table [1 << 16]byte
	input []byte
	hash  uint16
}

// The following hash code is the heart of the algorithm:
// It builds a sliding hash sum of the previous 3-and-a-bit
// characters which will be used to index the guess table.
// A better hash function would result in additional compression,
// at the expense of time.
func (ctx *context) update(val byte) {
	ctx.hash = (ctx.hash << 4) ^ uint16(val)
}

// Returns an io.Writer implementation that wraps the provided io.Writer
// and compresses data according to the predictor algorithm
//
// It can buffer data as the predictor mandates 8-byte blocks with a header.
// A call with no data will force a flush.
func Compressor(writer io.Writer) io.Writer {







	var ctx context




	return iou.SizedWriter(iou.WriterFunc(func(data []byte) (int, error) {
		var (
			blockSize  int = 8
			datalength int = len(data)
		)

		if datalength == 0 {
			return 0, nil
		}

		if datalength < blockSize {
			blockSize = datalength
		}

		var buf []byte = make([]byte, 1, blockSize+1)
		for block := 0; block < datalength/blockSize; block++ {
			for i := 0; i < blockSize; i++ {
				var current byte = data[(block*blockSize)+i]
				if ctx.table[ctx.hash] == current {
					// Guess was right - don't output
					buf[0] |= 1 << uint(i)
				} else {
					// Guess was wrong, output char
					ctx.table[ctx.hash] = current
					buf = append(buf, current)
				}
				ctx.update(current)
			}

			if c, err := writer.Write(buf); err != nil {
				return (block * blockSize) + c, err
			}

			// Reset the flags and buffer for the next iteration
			buf, buf[0] = buf[:1], 0
		}

		return datalength, nil
	}), 8)
}

// Returns an io.Reader implementation that wraps the provided io.Reader
// and decompresses data according to the predictor algorithm
func Decompressor(reader io.Reader) io.Reader {
	var ctx context


	ctx.input = make([]byte, 0, 8)


	return iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) {








		var (
			err               error
			flags, predicted  byte
			rc, total, copied int
		)

		// Read the next prediction header
	readHeader:
		rc, err = reader.Read(ctx.input[:1])
		// Fail on error unless it is EOF
		if err != nil && err != io.EOF {
			return total, err
		} else if rc == 0 {
			return total, err
		}

		// Extend the buffer, copy the prediction header
		//  and calculate the number of subsequent bytes to read
		ctx.input = ctx.input[:8]
		flags = ctx.input[0]
		predicted = bits.Hamming(flags)

		// Read the non-predicted bytes and place them in the end of the buffer
		rc, err = reader.Read(ctx.input[predicted:])
	retryData:
		if rc < int(8-predicted) && err == nil {
			// Retry the read if we have fewer bytes than what the prediction header indicates
			var r int
			r, err = reader.Read(ctx.input[int(predicted)+rc:])
			rc += r
			goto retryData
		} // Continue on any error, try to decompress and return it along the result

		// rc now contains the amount of actual bytes in this cycle (usually 8)
		rc += int(predicted)

		// Walk the buffer, filling in the predicted blanks,
		// relocating read bytes and and updating the guess table
		for i, a := 0, predicted; i < rc; i++ {
			if (flags & (1 << uint(i))) > 0 {
				// Guess succeeded, fill in from the table
				ctx.input[i] = ctx.table[ctx.hash]
			} else {
				// Relocate a read byte and advance the read byte index
				ctx.input[i], a = ctx.input[a], a+1
				// Guess failed, update the table
				ctx.table[ctx.hash] = ctx.input[i]
			}
			// Update the hash
			ctx.update(ctx.input[i])
		}

		// Copy the decompressed data to the output and accumulate the count
		copied = copy(output, ctx.input[:rc])
		total += copied

		// Clear the buffer
		ctx.input = ctx.input[:0]

		// Loop for another pass if there is available space in the output
		output = output[copied:]
		if len(output) > 0 && err == nil {
			goto readHeader
		}

		return total, err
	}), 8)
}










>
>


<


















>
>
>
>
>
>
>
|
>
>
|
>
|
|
|
|
|

|
|
|

|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|

|
|
|

|
<





|
>
>
|
>
|
|
>
>
>
>
>
>
>
>
|
|
|
|
|

|
|
|
|
|
|
|
|
|

|
|
|
|
|

|
|
|
|
|
|
|
|
|
|

|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|

|
|

|
|
|
|
|

|
<

1
2
3
4
5
6
7
8
9
10
11
12
13
14

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

169
// Package predictor implements the predictor compression/decompression algorithm
// as specified by RFC1978 - PPP Predictor Compression Protocol
package predictor

import (
	bits "0dev.org/bits"
	iou "0dev.org/ioutil"
	"io"
)

// The context struct contains the predictor's algorithm guess table
// and the current value of its input/output hash
type context struct {
	table [1 << 16]byte

	hash  uint16
}

// The following hash code is the heart of the algorithm:
// It builds a sliding hash sum of the previous 3-and-a-bit
// characters which will be used to index the guess table.
// A better hash function would result in additional compression,
// at the expense of time.
func (ctx *context) update(val byte) {
	ctx.hash = (ctx.hash << 4) ^ uint16(val)
}

// Returns an io.Writer implementation that wraps the provided io.Writer
// and compresses data according to the predictor algorithm
//
// It can buffer data as the predictor mandates 8-byte blocks with a header.
// A call with no data will force a flush.
func Compressor(writer io.Writer) io.Writer {
	var cmp compressor
	cmp.Writer = iou.SizedWriter(iou.WriterFunc(cmp.compress), 8)
	cmp.target = writer
	return &cmp
}

type compressor struct {
	context
	io.Writer
	target io.Writer
}

func (ctx *compressor) compress(data []byte) (int, error) {
	var (
		blockSize  int = 8
		datalength int = len(data)
	)

	if datalength == 0 {
		return 0, nil
	}

	if datalength < blockSize {
		blockSize = datalength
	}

	var buf []byte = make([]byte, 1, blockSize+1)
	for block := 0; block < datalength/blockSize; block++ {
		for i := 0; i < blockSize; i++ {
			var current byte = data[(block*blockSize)+i]
			if ctx.table[ctx.hash] == current {
				// Guess was right - don't output
				buf[0] |= 1 << uint(i)
			} else {
				// Guess was wrong, output char
				ctx.table[ctx.hash] = current
				buf = append(buf, current)
			}
			ctx.update(current)
		}

		if c, err := ctx.target.Write(buf); err != nil {
			return (block * blockSize) + c, err
		}

		// Reset the flags and buffer for the next iteration
		buf, buf[0] = buf[:1], 0
	}

	return datalength, nil

}

// Returns an io.Reader implementation that wraps the provided io.Reader
// and decompresses data according to the predictor algorithm
func Decompressor(reader io.Reader) io.Reader {
	var dcmp decompressor
	dcmp.Reader = iou.SizedReader(iou.ReaderFunc(dcmp.decompress), 8)
	dcmp.source = reader
	dcmp.input = make([]byte, 0, 8)
	return &dcmp
}

type decompressor struct {
	context
	io.Reader
	source io.Reader
	input  []byte
}

func (ctx *decompressor) decompress(output []byte) (int, error) {
	var (
		err               error
		flags, predicted  byte
		rc, total, copied int
	)

	// Read the next prediction header
readHeader:
	rc, err = ctx.source.Read(ctx.input[:1])
	// Fail on error unless it is EOF
	if err != nil && err != io.EOF {
		return total, err
	} else if rc == 0 {
		return total, err
	}

	// Extend the buffer, copy the prediction header
	//  and calculate the number of subsequent bytes to read
	ctx.input = ctx.input[:8]
	flags = ctx.input[0]
	predicted = bits.Hamming(flags)

	// Read the non-predicted bytes and place them in the end of the buffer
	rc, err = ctx.source.Read(ctx.input[predicted:])
retryData:
	if rc < int(8-predicted) && err == nil {
		// Retry the read if we have fewer bytes than what the prediction header indicates
		var r int
		r, err = ctx.source.Read(ctx.input[int(predicted)+rc:])
		rc += r
		goto retryData
	} // Continue on any error, try to decompress and return it along the result

	// rc now contains the amount of actual bytes in this cycle (usually 8)
	rc += int(predicted)

	// Walk the buffer, filling in the predicted blanks,
	// relocating read bytes and and updating the guess table
	for i, a := 0, predicted; i < rc; i++ {
		if (flags & (1 << uint(i))) > 0 {
			// Guess succeeded, fill in from the table
			ctx.input[i] = ctx.table[ctx.hash]
		} else {
			// Relocate a read byte and advance the read byte index
			ctx.input[i], a = ctx.input[a], a+1
			// Guess failed, update the table
			ctx.table[ctx.hash] = ctx.input[i]
		}
		// Update the hash
		ctx.update(ctx.input[i])
	}

	// Copy the decompressed data to the output and accumulate the count
	copied = copy(output, ctx.input[:rc])
	total += copied

	// Clear the buffer
	ctx.input = ctx.input[:0]

	// Loop for another pass if there is available space in the output
	output = output[copied:]
	if len(output) > 0 && err == nil {
		goto readHeader
	}

	return total, err

}