Check-in [9dfd3cb1a2]
Overview
Comment:Extracted the predictor's hash function as a method of the context struct. Minor changes to the decompressor's variables.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 9dfd3cb1a202cf8747262b5d7e6dc243268f9b8e
User & Date: spaskalev on 2014-12-22 19:28:49
Other Links: manifest | tags
Context
2014-12-22
19:52
Added documentation for the decompressor check-in: 89bfe97384 user: spaskalev tags: trunk
19:28
Extracted the predictor's hash function as a method of the context struct. Minor changes to the decompressor's variables. check-in: 9dfd3cb1a2 user: spaskalev tags: trunk
17:15
Calculate the decompressed block length outside of the predictor loop check-in: 27ecac81d3 user: spaskalev tags: trunk
Changes

Modified src/0dev.org/predictor/predictor.go from [46cd9d5cef] to [a4885df9dd].

     8      8   )
     9      9   
    10     10   type context struct {
    11     11   	table [1 << 16]byte
    12     12   	input []byte
    13     13   	hash  uint16
    14     14   }
           15  +
           16  +// The following hash code is the heart of the algorithm:
           17  +// It builds a sliding hash sum of the previous 3-and-a-bit
           18  +// characters which will be used to index the guess table.
           19  +// A better hash function would result in additional compression,
           20  +// at the expense of time.
           21  +func (ctx *context) update(val uint16) {
           22  +	ctx.hash = (ctx.hash << 4) ^ val
           23  +}
    15     24   
    16     25   type compressor func([]byte) error
    17     26   
    18     27   func (w compressor) Write(data []byte) (int, error) {
    19     28   	return len(data), w(data)
    20     29   }
    21     30   
................................................................................
    81     90   					// Guess was right - don't output
    82     91   					buf[0] |= 1 << uint(i)
    83     92   				} else {
    84     93   					// Guess was wrong, output char
    85     94   					ctx.table[ctx.hash] = current
    86     95   					buf = append(buf, current)
    87     96   				}
    88         -				ctx.hash = (ctx.hash << 4) ^ uint16(current)
           97  +				ctx.update(uint16(current))
    89     98   			}
    90     99   
    91    100   			if _, err := writer.Write(buf); err != nil {
    92    101   				return err
    93    102   			}
    94    103   
    95    104   			// Reset the flags and buffer for the next iteration
................................................................................
   121    130   // and decompresses data according to the predictor algorithm
   122    131   func Decompressor(reader io.Reader) io.Reader {
   123    132   	var ctx context
   124    133   	ctx.input = make([]byte, 0, 8)
   125    134   
   126    135   	return decompressor(func(output []byte) (int, error) {
   127    136   		var (
   128         -			err                             error
   129         -			flags                           byte
   130         -			rc, available, predicted, total int
          137  +			err               error
          138  +			flags, predicted  byte
          139  +			rc, total, copied int
   131    140   		)
   132    141   
   133    142   		// Sanity check for space to read into
   134    143   		if len(output) == 0 {
   135    144   			return 0, nil
   136    145   		}
   137    146   
................................................................................
   156    165   			return total, err
   157    166   		}
   158    167   
   159    168   		// Extend the buffer, copy the prediction header
   160    169   		//  and calculate the number of subsequent bytes to read
   161    170   		ctx.input = ctx.input[:8]
   162    171   		flags = ctx.input[0]
   163         -		predicted = int(bits.Hamming(flags))
   164         -		available = 8 - predicted
          172  +		predicted = bits.Hamming(flags)
   165    173   
   166    174   		// Read the non-predicted bytes and place them in the end of the buffer
   167    175   		rc, err = reader.Read(ctx.input[predicted:])
   168    176   	retryData:
   169         -		if rc < int(available) && err == nil {
          177  +		if rc < int(8-predicted) && err == nil {
   170    178   			// Retry the read if we have fewer bytes than what the prediction header indicates
   171    179   			var r int
   172         -			r, err = reader.Read(ctx.input[predicted+rc:])
          180  +			r, err = reader.Read(ctx.input[int(predicted)+rc:])
   173    181   			rc += r
   174    182   			goto retryData
   175    183   		} // Continue on any error, try to decompress and return it along the result
   176    184   
   177    185   		// rc now contains the amount of actual bytes in this cycle (usually 8)
   178         -		rc += predicted
          186  +		rc += int(predicted)
   179    187   
   180    188   		// Walk the buffer, filling in the predicted blanks,
   181    189   		// relocating read bytes and and updating the guess table
   182    190   		for i, a := 0, predicted; i < rc; i++ {
   183    191   			if (flags & (1 << uint(i))) > 0 {
   184    192   				// Guess succeeded, fill in from the table
   185    193   				ctx.input[i] = ctx.table[ctx.hash]
................................................................................
   186    194   			} else {
   187    195   				// Relocate a read byte
   188    196   				ctx.input[i], a = ctx.input[a], a+1
   189    197   				// Guess failed, update the table
   190    198   				ctx.table[ctx.hash] = ctx.input[i]
   191    199   			}
   192    200   			// Update the hash
   193         -			ctx.hash = (ctx.hash << 4) ^ uint16(ctx.input[i])
          201  +			ctx.update(uint16(ctx.input[i]))
   194    202   		}
   195    203   
   196    204   		// Copy the decompressed data to the output
   197    205   		ctx.input = ctx.input[:rc]
   198         -		available = copy(output, ctx.input)
          206  +		copied = copy(output, ctx.input)
   199    207   
   200         -		total += available
          208  +		total += copied
   201    209   
   202    210   		// Check for remaining bytes that dont fit in the output buffer
   203         -		if available < rc {
   204         -			ctx.input = ctx.input[:copy(ctx.input, ctx.input[available:])]
          211  +		if copied < rc {
          212  +			ctx.input = ctx.input[:copy(ctx.input, ctx.input[copied:])]
   205    213   		} else {
   206    214   			// Clear the buffer
   207    215   			ctx.input = ctx.input[:0]
   208    216   
   209         -			output = output[available:]
          217  +			output = output[copied:]
   210    218   			if len(output) > 0 && err == nil {
   211    219   				goto readHeader
   212    220   			}
   213    221   		}
   214    222   
   215    223   		return total, err
   216    224   	})
   217    225   }