Index: src/0dev.org/commands/short/main.go ================================================================== --- src/0dev.org/commands/short/main.go +++ src/0dev.org/commands/short/main.go @@ -14,12 +14,79 @@ os.Stderr.WriteString("Unable to open input file. " + err.Error()) os.Exit(1) } pairs, symbols := analyze(f) - fmt.Println(pairs) - fmt.Println(symbols) + + rec := recommend(pairs, symbols) + fmt.Println(*rec) +} + +type pair struct { + value uint16 + count uint64 +} + +type symbol struct { + value byte + count uint64 +} + +type recommendation struct { + p2s map[uint16]byte + s2p map[byte]uint16 +} + +func recommend(pairs pairSlice, symbols symbolSlice) *recommendation { + var ( + rec recommendation + pairsLength = len(pairs) + ) + + rec.p2s = make(map[uint16]byte) // Store pair to symbol mappings + rec.s2p = make(map[byte]uint16) // Store symbol to pair mappings + + for i := 0; i < pairsLength; i++ { + currentPair := pairs[i] + + // Termination condition for when we are out of symbols + if len(symbols) == 0 { + break + } + + gain := currentPair.count - 4 // 4 bytes for the default header + currentSymbol := symbols[0] + + if currentSymbol.count == 0 { + // Termination condition for possitive compression effect + if gain <= 0 { + break + } + + // Mark the recommendation and proceed with the next pair + rec.p2s[currentPair.value] = currentSymbol.value + continue + } else { // if the current symbol is present in the data + // Decrease the gain by a symbol -> p`, p -> symbol replacement + gain -= 2 // Additional 2 bytes for the more complex header + gain -= currentSymbol.count // Account for swaping the symbol to a pair in order to free it + + // Termination condition for possitive compression effect + if gain <= 0 { + break + } + + // Mark this symbol for replacement by the last unused pair + rec.s2p[currentSymbol.value] = pairs[pairsLength-1].value + pairsLength-- + + // Mark the current pair for replacement by the current symbol + rec.p2s[currentPair.value] = currentSymbol.value + } + } + + return &rec } // Reads the provided input and returns information about the available byte pair and used symbols func analyze(reader io.Reader) (pairSlice, symbolSlice) { var ( @@ -48,16 +115,14 @@ symbols[value]++ } return len(data), nil }), reader) - // Extract and sort all available byte pairs + // Extract and sort all byte pairs availablePairs := make(pairSlice, 0) for index, value := range pairs { - if value > 0 { - availablePairs = append(availablePairs, pair{value: uint16(index), count: value}) - } + availablePairs = append(availablePairs, pair{value: uint16(index), count: value}) } sort.Sort(availablePairs) // Extract and sort all symbols (including the ones with zero counts) allSymbols := make(symbolSlice, 0) @@ -67,15 +132,10 @@ sort.Sort(allSymbols) return availablePairs, allSymbols } -type pair struct { - value uint16 - count uint64 -} - // Implements fmt.Stringer, used for debugging func (p pair) String() string { return fmt.Sprintf("[ %d %d (%d) ]", (p.value >> 8), ((p.value << 8) >> 8), p.count) } @@ -92,24 +152,19 @@ func (s pairSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -type symbol struct { - value byte - count uint64 -} - type symbolSlice []symbol func (s symbolSlice) Len() int { return len(s) } func (s symbolSlice) Less(i, j int) bool { - // Sort in descending order - return s[i].count > s[j].count + // Sort in ascending order + return s[i].count < s[j].count } func (s symbolSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }