Overview
Comment: | bpe encoding flow implementation |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | bpe |
Files: | files | file ages | folders |
SHA1: |
7733ef9df8da017acf08ded0cdc63f29 |
User & Date: | spaskalev on 2014-12-28 15:21:51 |
Other Links: | branch diff | manifest | tags |
Context
2014-12-28
| ||
15:54 | a testable bpe implementation that encodes and decodes in a single execution check-in: a06f897887 user: spaskalev tags: bpe | |
15:21 | bpe encoding flow implementation check-in: 7733ef9df8 user: spaskalev tags: bpe | |
14:14 | Added 0dev.org/ioutil.ReadByte() function and a test for it. CC at 100% check-in: 879630c89c user: spaskalev tags: bpe | |
Changes
Modified src/0dev.org/commands/short/main.go from [2e2860e831] to [c980bb124e].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | package main import ( iou "0dev.org/ioutil" "fmt" "io" "os" "sort" ) func main() { f, err := os.Open(os.Args[1]) if err != nil { os.Stderr.WriteString("Unable to open input file. " + err.Error()) os.Exit(1) } pairs, symbols := analyze(f) | > > > > > | > > > > > > > > > > > > > | | > > > > | > > | | > > | > > | > > | | | | | | > > > | | | | > > > > > > > > | < | | > > | > > > > | | > > | > | > < | < < < | | | < < | > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | package main import ( iou "0dev.org/ioutil" "fmt" "io" // "io/ioutil" "os" "sort" ) func main() { f, err := os.Open(os.Args[1]) if err != nil { os.Stderr.WriteString("Unable to open input file. " + err.Error()) os.Exit(1) } pairs, symbols := analyze(f) _, err = f.Seek(0, 0) if err != nil { os.Stderr.WriteString("Unable to reset input file position. " + err.Error()) os.Exit(1) } rec := recommend(pairs, symbols) reader, err := apply(rec, f) if err != nil { os.Stderr.WriteString("Error while constructing application reader. " + err.Error()) os.Exit(1) } // count, err := io.Copy(ioutil.Discard, reader) // if err != nil { // os.Stderr.WriteString("Error while applying recommendations. " + err.Error()) // os.Exit(1) // } // fmt.Println(count) _, err = io.Copy(os.Stdout, reader) if err != nil { os.Stderr.WriteString("Error while applying recommendations. " + err.Error()) os.Exit(1) } } type recommendation struct { p2s map[uint16]byte s2p map[byte]uint16 unmapped byte } // Returns an io.Reader that reads from the underlying one while applying the given recommendations func apply(rec *recommendation, reader io.Reader) (io.Reader, error) { // The symbol reader replaces symbols with pairs according to the s2p mapping symbolReader := iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) { var ( i int = 0 err error ) for ; i < len(output)-1 && err == nil; i++ { // Read a byte from the underlying reader count, err := reader.Read(output[i : i+1]) // If we can't read anything else - return immediatelly if count == 0 { return i, err } // Convert the byte to a pair if there is a mapping for it if p, ok := rec.s2p[output[i]]; ok { output[i] = byte(p >> 8) // extract the high byte from the pair i++ output[i] = byte(p) // leave only the low byte from the pair } } return i, nil }), 2) currentByte, err := iou.ReadByte(symbolReader) if err != nil { return nil, err } // The pair reader replaces pairs with symbols according to the p2s mapping pairReader := iou.ReaderFunc(func(output []byte) (int, error) { var ( total int err error ) start: if len(output) <= 1 || err != nil { return total, err } nextByte, err := iou.ReadByte(symbolReader) if s, ok := rec.p2s[(uint16(currentByte)<<8 | uint16(nextByte))]; ok { output[0] = s currentByte, err = iou.ReadByte(symbolReader) } else { output[0] = currentByte currentByte = nextByte } output, total = output[1:], total+1 goto start }) return pairReader, nil } func recommend(pairs pairSlice, symbols symbolSlice) *recommendation { var rec recommendation rec.p2s = make(map[uint16]byte) // Store pair to symbol mappings rec.s2p = make(map[byte]uint16) // Store symbol to pair mappings for i, pairsLength := 0, len(pairs); i < pairsLength; i++ { currentPair := pairs[i] // Termination condition for when we are out of symbols if len(symbols) == 1 { break } gain := currentPair.count - 4 // 4 bytes for the default header currentSymbol := symbols[0] if currentSymbol.count == 0 { // Termination condition for possitive compression effect if gain <= 0 { break } // Mark the recommendation rec.p2s[currentPair.value] = currentSymbol.value } else { // if the current symbol is present in the data // Decrease the gain by a symbol -> p`, p -> symbol replacement gain = gain - 2 - currentSymbol.count // 2 more bytes for header + replacement cost // Termination condition for possitive compression effect if gain <= 0 { break } // Mark this symbol for replacement by the last unused pair rec.s2p[currentSymbol.value] = pairs[pairsLength-1].value pairsLength-- // Mark the current pair for replacement by the current symbol rec.p2s[currentPair.value] = currentSymbol.value } symbols = symbols[1:] } rec.unmapped = symbols[len(symbols)-1].value return &rec } // Reads the provided input and returns information about the available byte pair and used symbols func analyze(reader io.Reader) (pairSlice, symbolSlice) { var ( current uint16 // Stores a pair of bytes in it's high and low bits |
︙ | ︙ |