Check-in [0083d7bfee]
Overview
Comment:Added a symbol-to-pair replacing reader in for the bpe
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bpe
Files: files | file ages | folders
SHA1: 0083d7bfee14fcf5e8d81e0e558366da3157abd4
User & Date: spaskalev on 2014-12-28 13:57:50
Other Links: branch diff | manifest | tags
Context
2014-12-28
14:14
Added 0dev.org/ioutil.ReadByte() function and a test for it. CC at 100% check-in: 879630c89c user: spaskalev tags: bpe
13:57
Added a symbol-to-pair replacing reader in for the bpe check-in: 0083d7bfee user: spaskalev tags: bpe
12:44
Adding swaps recommendation for the bpe. check-in: a64b9a1ac3 user: spaskalev tags: bpe
Changes

Modified src/0dev.org/commands/short/main.go from [528d5e2695] to [a5a47b7f78].

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37



















38
39
40
41
42
43
44
17
18
19
20
21
22
23










24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53







-
-
-
-
-
-
-
-
-
-




+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








	pairs, symbols := analyze(f)

	rec := recommend(pairs, symbols)
	fmt.Println(*rec)
}

type pair struct {
	value uint16
	count uint64
}

type symbol struct {
	value byte
	count uint64
}

type recommendation struct {
	p2s map[uint16]byte
	s2p map[byte]uint16
}

func apply(rec *recommendation, reader io.Reader) {
	symbolReader := iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) {
		for i := 0; i < len(output)-1; i++ {
			// Read a byte from the underlying reader
			count, err := reader.Read(output[i : i+1])
			if err != nil || count != 1 {
				return i + count, err
			}

			// Convert the byte to a pair if there is a mapping for it
			if pair, ok := rec.s2p[output[i]]; ok {
				output[i] = byte(pair >> 8) // extract the high byte from the pair
				i++
				output[i] = byte(pair) // leave only the low byte from the pair
			}
		}
	}), 2)
}

func recommend(pairs pairSlice, symbols symbolSlice) *recommendation {
	var (
		rec         recommendation
		pairsLength = len(pairs)
	)

129
130
131
132
133
134
135










136
137
138
139
140
141
142
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161







+
+
+
+
+
+
+
+
+
+







	for index, value := range symbols {
		allSymbols = append(allSymbols, symbol{value: byte(index), count: value})
	}
	sort.Sort(allSymbols)

	return availablePairs, allSymbols
}

type pair struct {
	value uint16
	count uint64
}

type symbol struct {
	value byte
	count uint64
}

// Implements fmt.Stringer, used for debugging
func (p pair) String() string {
	return fmt.Sprintf("[ %d %d (%d) ]", (p.value >> 8), ((p.value << 8) >> 8), p.count)
}

type pairSlice []pair