ADDED src/0dev.org/commands/short/main.go Index: src/0dev.org/commands/short/main.go ================================================================== --- src/0dev.org/commands/short/main.go +++ src/0dev.org/commands/short/main.go @@ -0,0 +1,115 @@ +package main + +import ( + iou "0dev.org/ioutil" + "fmt" + "io" + "os" + "sort" +) + +func main() { + f, err := os.Open(os.Args[1]) + if err != nil { + os.Stderr.WriteString("Unable to open input file. " + err.Error()) + os.Exit(1) + } + + pairs, symbols := analyze(f) + fmt.Println(pairs) + fmt.Println(symbols) +} + +// Reads the provided input and returns information about the available byte pair and used symbols +func analyze(reader io.Reader) (pairSlice, symbolSlice) { + var ( + current uint16 // Stores a pair of bytes in it's high and low bits + buffer []byte = make([]byte, 1) + pairs []uint64 = make([]uint64, 65536) // all possible pairs, 512kb + symbols []uint64 = make([]uint64, 256) // all possible characters, 2kb + ) + + // Read the first byte and store in the low bits of the current pair + if c, err := reader.Read(buffer); err != nil || c != 1 { + os.Stderr.WriteString("Error reading input.") + os.Exit(1) + } + current = uint16(buffer[0]) + + // Read all of the data and note the counts of bytes and byte pairs + io.Copy(iou.WriterFunc(func(data []byte) (int, error) { + for _, value := range data { + // Store pairs frequency + current <<= 8 // Shift the previous byte from low to high + current |= uint16(value) // Add the current byte to low + pairs[current]++ + + // Store bytes frequency + symbols[value]++ + } + return len(data), nil + }), reader) + + // Extract and sort all available byte pairs + availablePairs := make(pairSlice, 0) + for index, value := range pairs { + if value > 0 { + availablePairs = append(availablePairs, pair{value: uint16(index), count: value}) + } + } + sort.Sort(availablePairs) + + // Extract and sort all symbols (including the ones with zero counts) + allSymbols := make(symbolSlice, 0) + for index, value := range symbols { + allSymbols = append(allSymbols, symbol{value: byte(index), count: value}) + } + sort.Sort(allSymbols) + + return availablePairs, allSymbols +} + +type pair struct { + value uint16 + count uint64 +} + +// Implements fmt.Stringer, used for debugging +func (p pair) String() string { + return fmt.Sprintf("[ %d %d (%d) ]", (p.value >> 8), ((p.value << 8) >> 8), p.count) +} + +type pairSlice []pair + +func (s pairSlice) Len() int { + return len(s) +} + +func (s pairSlice) Less(i, j int) bool { + // Sort in descending order + return s[i].count > s[j].count +} + +func (s pairSlice) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +type symbol struct { + value byte + count uint64 +} + +type symbolSlice []symbol + +func (s symbolSlice) Len() int { + return len(s) +} + +func (s symbolSlice) Less(i, j int) bool { + // Sort in descending order + return s[i].count > s[j].count +} + +func (s symbolSlice) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +}