Check-in [50507bd510]
Overview
SHA1:50507bd510ea785ba85b12897d7c3be04f114166
Date: 2014-12-25 00:43:31
User: spaskalev
Comment:Extracted predictor's compressor and decompressor code into separate structs that embed Sized{Writer,Reader}
Timelines: family | ancestors | descendants | both | trunk
Downloads: Tarball | ZIP archive
Other Links: files | file ages | folders | manifest
Tags And Properties
Context
2014-12-25
00:55
[4dfcff962c] Predictor's compressor and decompressor structures now implement io.Writer/io.Reader in order to deal away with function indirection but they do not follow the required semantics. Those are provided by the SizedWriter/SizedReader wrappers returned by the constructor functions. (user: spaskalev, tags: trunk)
00:43
[50507bd510] Extracted predictor's compressor and decompressor code into separate structs that embed Sized{Writer,Reader} (user: spaskalev, tags: trunk)
00:26
[46da7a6ae9] Extracted SizedWriter to a sizedWriter struct with a Write() method. (user: spaskalev, tags: trunk)
Changes

Modified src/0dev.org/predictor/predictor.go from [43e7f751ed] to [cb877636af].

     4      4   
     5      5   import (
     6      6   	bits "0dev.org/bits"
     7      7   	iou "0dev.org/ioutil"
     8      8   	"io"
     9      9   )
    10     10   
           11  +// The context struct contains the predictor's algorithm guess table
           12  +// and the current value of its input/output hash
    11     13   type context struct {
    12     14   	table [1 << 16]byte
    13         -	input []byte
    14     15   	hash  uint16
    15     16   }
    16     17   
    17     18   // The following hash code is the heart of the algorithm:
    18     19   // It builds a sliding hash sum of the previous 3-and-a-bit
    19     20   // characters which will be used to index the guess table.
    20     21   // A better hash function would result in additional compression,
................................................................................
    25     26   
    26     27   // Returns an io.Writer implementation that wraps the provided io.Writer
    27     28   // and compresses data according to the predictor algorithm
    28     29   //
    29     30   // It can buffer data as the predictor mandates 8-byte blocks with a header.
    30     31   // A call with no data will force a flush.
    31     32   func Compressor(writer io.Writer) io.Writer {
    32         -	var ctx context
    33         -
    34         -	return iou.SizedWriter(iou.WriterFunc(func(data []byte) (int, error) {
    35         -		var (
    36         -			blockSize  int = 8
    37         -			datalength int = len(data)
    38         -		)
    39         -
    40         -		if datalength == 0 {
    41         -			return 0, nil
    42         -		}
    43         -
    44         -		if datalength < blockSize {
    45         -			blockSize = datalength
           33  +	var cmp compressor
           34  +	cmp.Writer = iou.SizedWriter(iou.WriterFunc(cmp.compress), 8)
           35  +	cmp.target = writer
           36  +	return &cmp
           37  +}
           38  +
           39  +type compressor struct {
           40  +	context
           41  +	io.Writer
           42  +	target io.Writer
           43  +}
           44  +
           45  +func (ctx *compressor) compress(data []byte) (int, error) {
           46  +	var (
           47  +		blockSize  int = 8
           48  +		datalength int = len(data)
           49  +	)
           50  +
           51  +	if datalength == 0 {
           52  +		return 0, nil
           53  +	}
           54  +
           55  +	if datalength < blockSize {
           56  +		blockSize = datalength
           57  +	}
           58  +
           59  +	var buf []byte = make([]byte, 1, blockSize+1)
           60  +	for block := 0; block < datalength/blockSize; block++ {
           61  +		for i := 0; i < blockSize; i++ {
           62  +			var current byte = data[(block*blockSize)+i]
           63  +			if ctx.table[ctx.hash] == current {
           64  +				// Guess was right - don't output
           65  +				buf[0] |= 1 << uint(i)
           66  +			} else {
           67  +				// Guess was wrong, output char
           68  +				ctx.table[ctx.hash] = current
           69  +				buf = append(buf, current)
           70  +			}
           71  +			ctx.update(current)
    46     72   		}
    47     73   
    48         -		var buf []byte = make([]byte, 1, blockSize+1)
    49         -		for block := 0; block < datalength/blockSize; block++ {
    50         -			for i := 0; i < blockSize; i++ {
    51         -				var current byte = data[(block*blockSize)+i]
    52         -				if ctx.table[ctx.hash] == current {
    53         -					// Guess was right - don't output
    54         -					buf[0] |= 1 << uint(i)
    55         -				} else {
    56         -					// Guess was wrong, output char
    57         -					ctx.table[ctx.hash] = current
    58         -					buf = append(buf, current)
    59         -				}
    60         -				ctx.update(current)
    61         -			}
    62         -
    63         -			if c, err := writer.Write(buf); err != nil {
    64         -				return (block * blockSize) + c, err
    65         -			}
    66         -
    67         -			// Reset the flags and buffer for the next iteration
    68         -			buf, buf[0] = buf[:1], 0
           74  +		if c, err := ctx.target.Write(buf); err != nil {
           75  +			return (block * blockSize) + c, err
    69     76   		}
    70     77   
    71         -		return datalength, nil
    72         -	}), 8)
           78  +		// Reset the flags and buffer for the next iteration
           79  +		buf, buf[0] = buf[:1], 0
           80  +	}
           81  +
           82  +	return datalength, nil
    73     83   }
    74     84   
    75     85   // Returns an io.Reader implementation that wraps the provided io.Reader
    76     86   // and decompresses data according to the predictor algorithm
    77     87   func Decompressor(reader io.Reader) io.Reader {
    78         -	var ctx context
    79         -	ctx.input = make([]byte, 0, 8)
           88  +	var dcmp decompressor
           89  +	dcmp.Reader = iou.SizedReader(iou.ReaderFunc(dcmp.decompress), 8)
           90  +	dcmp.source = reader
           91  +	dcmp.input = make([]byte, 0, 8)
           92  +	return &dcmp
           93  +}
           94  +
           95  +type decompressor struct {
           96  +	context
           97  +	io.Reader
           98  +	source io.Reader
           99  +	input  []byte
          100  +}
          101  +
          102  +func (ctx *decompressor) decompress(output []byte) (int, error) {
          103  +	var (
          104  +		err               error
          105  +		flags, predicted  byte
          106  +		rc, total, copied int
          107  +	)
          108  +
          109  +	// Read the next prediction header
          110  +readHeader:
          111  +	rc, err = ctx.source.Read(ctx.input[:1])
          112  +	// Fail on error unless it is EOF
          113  +	if err != nil && err != io.EOF {
          114  +		return total, err
          115  +	} else if rc == 0 {
          116  +		return total, err
          117  +	}
    80    118   
    81         -	return iou.SizedReader(iou.ReaderFunc(func(output []byte) (int, error) {
    82         -		var (
    83         -			err               error
    84         -			flags, predicted  byte
    85         -			rc, total, copied int
    86         -		)
          119  +	// Extend the buffer, copy the prediction header
          120  +	//  and calculate the number of subsequent bytes to read
          121  +	ctx.input = ctx.input[:8]
          122  +	flags = ctx.input[0]
          123  +	predicted = bits.Hamming(flags)
    87    124   
    88         -		// Read the next prediction header
    89         -	readHeader:
    90         -		rc, err = reader.Read(ctx.input[:1])
    91         -		// Fail on error unless it is EOF
    92         -		if err != nil && err != io.EOF {
    93         -			return total, err
    94         -		} else if rc == 0 {
    95         -			return total, err
    96         -		}
          125  +	// Read the non-predicted bytes and place them in the end of the buffer
          126  +	rc, err = ctx.source.Read(ctx.input[predicted:])
          127  +retryData:
          128  +	if rc < int(8-predicted) && err == nil {
          129  +		// Retry the read if we have fewer bytes than what the prediction header indicates
          130  +		var r int
          131  +		r, err = ctx.source.Read(ctx.input[int(predicted)+rc:])
          132  +		rc += r
          133  +		goto retryData
          134  +	} // Continue on any error, try to decompress and return it along the result
    97    135   
    98         -		// Extend the buffer, copy the prediction header
    99         -		//  and calculate the number of subsequent bytes to read
   100         -		ctx.input = ctx.input[:8]
   101         -		flags = ctx.input[0]
   102         -		predicted = bits.Hamming(flags)
   103         -
   104         -		// Read the non-predicted bytes and place them in the end of the buffer
   105         -		rc, err = reader.Read(ctx.input[predicted:])
   106         -	retryData:
   107         -		if rc < int(8-predicted) && err == nil {
   108         -			// Retry the read if we have fewer bytes than what the prediction header indicates
   109         -			var r int
   110         -			r, err = reader.Read(ctx.input[int(predicted)+rc:])
   111         -			rc += r
   112         -			goto retryData
   113         -		} // Continue on any error, try to decompress and return it along the result
          136  +	// rc now contains the amount of actual bytes in this cycle (usually 8)
          137  +	rc += int(predicted)
   114    138   
   115         -		// rc now contains the amount of actual bytes in this cycle (usually 8)
   116         -		rc += int(predicted)
   117         -
   118         -		// Walk the buffer, filling in the predicted blanks,
   119         -		// relocating read bytes and and updating the guess table
   120         -		for i, a := 0, predicted; i < rc; i++ {
   121         -			if (flags & (1 << uint(i))) > 0 {
   122         -				// Guess succeeded, fill in from the table
   123         -				ctx.input[i] = ctx.table[ctx.hash]
   124         -			} else {
   125         -				// Relocate a read byte and advance the read byte index
   126         -				ctx.input[i], a = ctx.input[a], a+1
   127         -				// Guess failed, update the table
   128         -				ctx.table[ctx.hash] = ctx.input[i]
   129         -			}
   130         -			// Update the hash
   131         -			ctx.update(ctx.input[i])
          139  +	// Walk the buffer, filling in the predicted blanks,
          140  +	// relocating read bytes and and updating the guess table
          141  +	for i, a := 0, predicted; i < rc; i++ {
          142  +		if (flags & (1 << uint(i))) > 0 {
          143  +			// Guess succeeded, fill in from the table
          144  +			ctx.input[i] = ctx.table[ctx.hash]
          145  +		} else {
          146  +			// Relocate a read byte and advance the read byte index
          147  +			ctx.input[i], a = ctx.input[a], a+1
          148  +			// Guess failed, update the table
          149  +			ctx.table[ctx.hash] = ctx.input[i]
   132    150   		}
          151  +		// Update the hash
          152  +		ctx.update(ctx.input[i])
          153  +	}
   133    154   
   134         -		// Copy the decompressed data to the output and accumulate the count
   135         -		copied = copy(output, ctx.input[:rc])
   136         -		total += copied
          155  +	// Copy the decompressed data to the output and accumulate the count
          156  +	copied = copy(output, ctx.input[:rc])
          157  +	total += copied
   137    158   
   138         -		// Clear the buffer
   139         -		ctx.input = ctx.input[:0]
          159  +	// Clear the buffer
          160  +	ctx.input = ctx.input[:0]
   140    161   
   141         -		// Loop for another pass if there is available space in the output
   142         -		output = output[copied:]
   143         -		if len(output) > 0 && err == nil {
   144         -			goto readHeader
   145         -		}
          162  +	// Loop for another pass if there is available space in the output
          163  +	output = output[copied:]
          164  +	if len(output) > 0 && err == nil {
          165  +		goto readHeader
          166  +	}
   146    167   
   147         -		return total, err
   148         -	}), 8)
          168  +	return total, err
   149    169   }