Check-in [a06f897887]
Overview
Comment:a testable bpe implementation that encodes and decodes in a single execution
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bpe
Files: files | file ages | folders
SHA1: a06f897887882bb945b3ab8927f728411a202617
User & Date: spaskalev on 2014-12-28 15:54:10
Other Links: branch diff | manifest | tags
Context
2014-12-28
17:16
Closing the branch, this particular implementation is unfruitfull :) Closed-Leaf check-in: 4fe28f11ac user: spaskalev tags: bpe
15:54
a testable bpe implementation that encodes and decodes in a single execution check-in: a06f897887 user: spaskalev tags: bpe
15:21
bpe encoding flow implementation check-in: 7733ef9df8 user: spaskalev tags: bpe
Changes

Modified src/0dev.org/commands/short/main.go from [c980bb124e] to [0cc7fcc383].

1
2
3
4
5
6
7

8
9
10
11
12
13
14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15







+







package main

import (
	iou "0dev.org/ioutil"
	"fmt"
	"io"
	// "io/ioutil"
	"bytes"
	"os"
	"sort"
)

func main() {
	f, err := os.Open(os.Args[1])
	if err != nil {
27
28
29
30
31
32
33

34
35
36
37
38






39
40
41








42
43
44
45
46
47
48
49
50
51
















52
53
54
55
56
57
58
28
29
30
31
32
33
34
35





36
37
38
39
40
41



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79







+
-
-
-
-
-
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+







-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








	reader, err := apply(rec, f)
	if err != nil {
		os.Stderr.WriteString("Error while constructing application reader. " + err.Error())
		os.Exit(1)
	}

	var buf bytes.Buffer
	// count, err := io.Copy(ioutil.Discard, reader)
	// if err != nil {
	// 	os.Stderr.WriteString("Error while applying recommendations. " + err.Error())
	// 	os.Exit(1)
	// }
	_, err = io.Copy(&buf, reader)
	if err != nil {
		os.Stderr.WriteString("Error while applying recommendations. " + err.Error())
		os.Exit(1)
	}

	// fmt.Println(count)

	_, err = io.Copy(os.Stdout, reader)
	var in *bytes.Reader = bytes.NewReader(buf.Bytes())
	rev, err := apply(rec.reverse(), in)
	if err != nil {
		os.Stderr.WriteString("Error while constructing application reader. " + err.Error())
		os.Exit(1)
	}

	_, err = io.Copy(os.Stdout, rev)
	if err != nil {
		os.Stderr.WriteString("Error while applying recommendations. " + err.Error())
		os.Exit(1)
	}
}

type recommendation struct {
	p2s      map[uint16]byte
	s2p      map[byte]uint16
	unmapped byte
	p2s map[uint16]byte
	s2p map[byte]uint16
}

// Produces a reversed recommendation struct
func (r *recommendation) reverse() *recommendation {
	var rec recommendation
	rec.p2s = make(map[uint16]byte)
	for k, v := range r.s2p {
		rec.p2s[v] = k
	}
	rec.s2p = make(map[byte]uint16)
	for k, v := range r.p2s {
		rec.s2p[v] = k
	}
	return &rec
}

// Returns an io.Reader that reads from the underlying one while applying the given recommendations
func apply(rec *recommendation, reader io.Reader) (io.Reader, error) {
	// The symbol reader replaces symbols with pairs according to the s2p mapping
	symbolReader := iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) {
		var (
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129
136
137
138
139
140
141
142

143
144
145
146
147
148
149
150







-
+







	rec.p2s = make(map[uint16]byte) // Store pair to symbol mappings
	rec.s2p = make(map[byte]uint16) // Store symbol to pair mappings

	for i, pairsLength := 0, len(pairs); i < pairsLength; i++ {
		currentPair := pairs[i]

		// Termination condition for when we are out of symbols
		if len(symbols) == 1 {
		if len(symbols) == 1 { // TODO drop to zero ?
			break
		}

		gain := currentPair.count - 4 // 4 bytes for the default header
		currentSymbol := symbols[0]

		if currentSymbol.count == 0 {
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
169
170
171
172
173
174
175

176
177
178
179
180
181
182







-







			pairsLength--

			// Mark the current pair for replacement by the current symbol
			rec.p2s[currentPair.value] = currentSymbol.value
		}
		symbols = symbols[1:]
	}
	rec.unmapped = symbols[len(symbols)-1].value
	return &rec
}

// Reads the provided input and returns information about the available byte pair and used symbols
func analyze(reader io.Reader) (pairSlice, symbolSlice) {
	var (
		current uint16   // Stores a pair of bytes in it's high and low bits