Check-in [0083d7bfee]
Overview
Comment:Added a symbol-to-pair replacing reader in for the bpe
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bpe
Files: files | file ages | folders
SHA1: 0083d7bfee14fcf5e8d81e0e558366da3157abd4
User & Date: spaskalev on 2014-12-28 13:57:50
Other Links: branch diff | manifest | tags
Context
2014-12-28
14:14
Added 0dev.org/ioutil.ReadByte() function and a test for it. CC at 100% check-in: 879630c89c user: spaskalev tags: bpe
13:57
Added a symbol-to-pair replacing reader in for the bpe check-in: 0083d7bfee user: spaskalev tags: bpe
12:44
Adding swaps recommendation for the bpe. check-in: a64b9a1ac3 user: spaskalev tags: bpe
Changes

Modified src/0dev.org/commands/short/main.go from [528d5e2695] to [a5a47b7f78].

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37



















38
39
40
41
42
43
44

	pairs, symbols := analyze(f)

	rec := recommend(pairs, symbols)
	fmt.Println(*rec)
}

type pair struct {
	value uint16
	count uint64
}

type symbol struct {
	value byte
	count uint64
}

type recommendation struct {
	p2s map[uint16]byte
	s2p map[byte]uint16
}




















func recommend(pairs pairSlice, symbols symbolSlice) *recommendation {
	var (
		rec         recommendation
		pairsLength = len(pairs)
	)








<
<
<
<
<
<
<
<
<
<




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







17
18
19
20
21
22
23










24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

	pairs, symbols := analyze(f)

	rec := recommend(pairs, symbols)
	fmt.Println(*rec)
}











type recommendation struct {
	p2s map[uint16]byte
	s2p map[byte]uint16
}

func apply(rec *recommendation, reader io.Reader) {
	symbolReader := iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) {
		for i := 0; i < len(output)-1; i++ {
			// Read a byte from the underlying reader
			count, err := reader.Read(output[i : i+1])
			if err != nil || count != 1 {
				return i + count, err
			}

			// Convert the byte to a pair if there is a mapping for it
			if pair, ok := rec.s2p[output[i]]; ok {
				output[i] = byte(pair >> 8) // extract the high byte from the pair
				i++
				output[i] = byte(pair) // leave only the low byte from the pair
			}
		}
	}), 2)
}

func recommend(pairs pairSlice, symbols symbolSlice) *recommendation {
	var (
		rec         recommendation
		pairsLength = len(pairs)
	)

129
130
131
132
133
134
135










136
137
138
139
140
141
142
	for index, value := range symbols {
		allSymbols = append(allSymbols, symbol{value: byte(index), count: value})
	}
	sort.Sort(allSymbols)

	return availablePairs, allSymbols
}











// Implements fmt.Stringer, used for debugging
func (p pair) String() string {
	return fmt.Sprintf("[ %d %d (%d) ]", (p.value >> 8), ((p.value << 8) >> 8), p.count)
}

type pairSlice []pair







>
>
>
>
>
>
>
>
>
>







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
	for index, value := range symbols {
		allSymbols = append(allSymbols, symbol{value: byte(index), count: value})
	}
	sort.Sort(allSymbols)

	return availablePairs, allSymbols
}

type pair struct {
	value uint16
	count uint64
}

type symbol struct {
	value byte
	count uint64
}

// Implements fmt.Stringer, used for debugging
func (p pair) String() string {
	return fmt.Sprintf("[ %d %d (%d) ]", (p.value >> 8), ((p.value << 8) >> 8), p.count)
}

type pairSlice []pair