Overview
| Comment: | a testable bpe implementation that encodes and decodes in a single execution |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | bpe |
| Files: | files | file ages | folders |
| SHA1: |
a06f897887882bb945b3ab8927f72841 |
| User & Date: | spaskalev on 2014-12-28 15:54:10.422 |
| Other Links: | branch diff | manifest | tags |
Context
|
2014-12-28
| ||
| 17:16 | Closing the branch, this particular implementation is unfruitfull :) Closed-Leaf check-in: 4fe28f11ac user: spaskalev tags: bpe | |
| 15:54 | a testable bpe implementation that encodes and decodes in a single execution check-in: a06f897887 user: spaskalev tags: bpe | |
| 15:21 | bpe encoding flow implementation check-in: 7733ef9df8 user: spaskalev tags: bpe | |
Changes
Modified src/0dev.org/commands/short/main.go
from [c980bb124e]
to [0cc7fcc383].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
package main
import (
iou "0dev.org/ioutil"
"fmt"
"io"
// "io/ioutil"
"os"
"sort"
)
func main() {
f, err := os.Open(os.Args[1])
if err != nil {
| > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
package main
import (
iou "0dev.org/ioutil"
"fmt"
"io"
// "io/ioutil"
"bytes"
"os"
"sort"
)
func main() {
f, err := os.Open(os.Args[1])
if err != nil {
|
| ︙ | ︙ | |||
27 28 29 30 31 32 33 |
reader, err := apply(rec, f)
if err != nil {
os.Stderr.WriteString("Error while constructing application reader. " + err.Error())
os.Exit(1)
}
| > | | | | > | < > > > > > | > | | | > | > > > > > > > > > > > > | 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
reader, err := apply(rec, f)
if err != nil {
os.Stderr.WriteString("Error while constructing application reader. " + err.Error())
os.Exit(1)
}
var buf bytes.Buffer
_, err = io.Copy(&buf, reader)
if err != nil {
os.Stderr.WriteString("Error while applying recommendations. " + err.Error())
os.Exit(1)
}
var in *bytes.Reader = bytes.NewReader(buf.Bytes())
rev, err := apply(rec.reverse(), in)
if err != nil {
os.Stderr.WriteString("Error while constructing application reader. " + err.Error())
os.Exit(1)
}
_, err = io.Copy(os.Stdout, rev)
if err != nil {
os.Stderr.WriteString("Error while applying recommendations. " + err.Error())
os.Exit(1)
}
}
type recommendation struct {
p2s map[uint16]byte
s2p map[byte]uint16
}
// Produces a reversed recommendation struct
func (r *recommendation) reverse() *recommendation {
var rec recommendation
rec.p2s = make(map[uint16]byte)
for k, v := range r.s2p {
rec.p2s[v] = k
}
rec.s2p = make(map[byte]uint16)
for k, v := range r.p2s {
rec.s2p[v] = k
}
return &rec
}
// Returns an io.Reader that reads from the underlying one while applying the given recommendations
func apply(rec *recommendation, reader io.Reader) (io.Reader, error) {
// The symbol reader replaces symbols with pairs according to the s2p mapping
symbolReader := iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) {
var (
|
| ︙ | ︙ | |||
115 116 117 118 119 120 121 |
rec.p2s = make(map[uint16]byte) // Store pair to symbol mappings
rec.s2p = make(map[byte]uint16) // Store symbol to pair mappings
for i, pairsLength := 0, len(pairs); i < pairsLength; i++ {
currentPair := pairs[i]
// Termination condition for when we are out of symbols
| | | 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
rec.p2s = make(map[uint16]byte) // Store pair to symbol mappings
rec.s2p = make(map[byte]uint16) // Store symbol to pair mappings
for i, pairsLength := 0, len(pairs); i < pairsLength; i++ {
currentPair := pairs[i]
// Termination condition for when we are out of symbols
if len(symbols) == 1 { // TODO drop to zero ?
break
}
gain := currentPair.count - 4 // 4 bytes for the default header
currentSymbol := symbols[0]
if currentSymbol.count == 0 {
|
| ︙ | ︙ | |||
148 149 150 151 152 153 154 | pairsLength-- // Mark the current pair for replacement by the current symbol rec.p2s[currentPair.value] = currentSymbol.value } symbols = symbols[1:] } | < | 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
pairsLength--
// Mark the current pair for replacement by the current symbol
rec.p2s[currentPair.value] = currentSymbol.value
}
symbols = symbols[1:]
}
return &rec
}
// Reads the provided input and returns information about the available byte pair and used symbols
func analyze(reader io.Reader) (pairSlice, symbolSlice) {
var (
current uint16 // Stores a pair of bytes in it's high and low bits
|
| ︙ | ︙ |