Overview
| Comment: | Added a symbol-to-pair replacing reader in for the bpe |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | bpe |
| Files: | files | file ages | folders |
| SHA1: |
0083d7bfee14fcf5e8d81e0e558366da |
| User & Date: | spaskalev on 2014-12-28 13:57:50.850 |
| Other Links: | branch diff | manifest | tags |
Context
|
2014-12-28
| ||
| 14:14 | Added 0dev.org/ioutil.ReadByte() function and a test for it. CC at 100% check-in: 879630c89c user: spaskalev tags: bpe | |
| 13:57 | Added a symbol-to-pair replacing reader in for the bpe check-in: 0083d7bfee user: spaskalev tags: bpe | |
| 12:44 | Adding swaps recommendation for the bpe. check-in: a64b9a1ac3 user: spaskalev tags: bpe | |
Changes
Modified src/0dev.org/commands/short/main.go
from [528d5e2695]
to [a5a47b7f78].
| ︙ | ︙ | |||
17 18 19 20 21 22 23 | pairs, symbols := analyze(f) rec := recommend(pairs, symbols) fmt.Println(*rec) } | < < < < < < < < < < > > > > > > > > > > > > > > > > > > > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
pairs, symbols := analyze(f)
rec := recommend(pairs, symbols)
fmt.Println(*rec)
}
type recommendation struct {
p2s map[uint16]byte
s2p map[byte]uint16
}
func apply(rec *recommendation, reader io.Reader) {
symbolReader := iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) {
for i := 0; i < len(output)-1; i++ {
// Read a byte from the underlying reader
count, err := reader.Read(output[i : i+1])
if err != nil || count != 1 {
return i + count, err
}
// Convert the byte to a pair if there is a mapping for it
if pair, ok := rec.s2p[output[i]]; ok {
output[i] = byte(pair >> 8) // extract the high byte from the pair
i++
output[i] = byte(pair) // leave only the low byte from the pair
}
}
}), 2)
}
func recommend(pairs pairSlice, symbols symbolSlice) *recommendation {
var (
rec recommendation
pairsLength = len(pairs)
)
|
| ︙ | ︙ | |||
129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
for index, value := range symbols {
allSymbols = append(allSymbols, symbol{value: byte(index), count: value})
}
sort.Sort(allSymbols)
return availablePairs, allSymbols
}
// Implements fmt.Stringer, used for debugging
func (p pair) String() string {
return fmt.Sprintf("[ %d %d (%d) ]", (p.value >> 8), ((p.value << 8) >> 8), p.count)
}
type pairSlice []pair
| > > > > > > > > > > | 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
for index, value := range symbols {
allSymbols = append(allSymbols, symbol{value: byte(index), count: value})
}
sort.Sort(allSymbols)
return availablePairs, allSymbols
}
type pair struct {
value uint16
count uint64
}
type symbol struct {
value byte
count uint64
}
// Implements fmt.Stringer, used for debugging
func (p pair) String() string {
return fmt.Sprintf("[ %d %d (%d) ]", (p.value >> 8), ((p.value << 8) >> 8), p.count)
}
type pairSlice []pair
|
| ︙ | ︙ |