103 lines
2.9 KiB
Go
103 lines
2.9 KiB
Go
// The matchfinder package defines reusable components for data compression.
|
|
//
|
|
// Many compression libraries have two main parts:
|
|
// - Something that looks for repeated sequences of bytes
|
|
// - An encoder for the compressed data format (often an entropy coder)
|
|
//
|
|
// Although these are logically two separate steps, the implementations are
|
|
// usually closely tied together. You can't use flate's matcher with snappy's
|
|
// encoder, for example. This package defines interfaces and an intermediate
|
|
// representation to allow mixing and matching compression components.
|
|
package matchfinder
|
|
|
|
import "io"
|
|
|
|
// A Match is the basic unit of LZ77 compression.
|
|
type Match struct {
|
|
Unmatched int // the number of unmatched bytes since the previous match
|
|
Length int // the number of bytes in the matched string; it may be 0 at the end of the input
|
|
Distance int // how far back in the stream to copy from
|
|
}
|
|
|
|
// A MatchFinder performs the LZ77 stage of compression, looking for matches.
|
|
type MatchFinder interface {
|
|
// FindMatches looks for matches in src, appends them to dst, and returns dst.
|
|
FindMatches(dst []Match, src []byte) []Match
|
|
|
|
// Reset clears any internal state, preparing the MatchFinder to be used with
|
|
// a new stream.
|
|
Reset()
|
|
}
|
|
|
|
// An Encoder encodes the data in its final format.
|
|
type Encoder interface {
|
|
// Encode appends the encoded format of src to dst, using the match
|
|
// information from matches.
|
|
Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte
|
|
|
|
// Reset clears any internal state, preparing the Encoder to be used with
|
|
// a new stream.
|
|
Reset()
|
|
}
|
|
|
|
// A Writer uses MatchFinder and Encoder to write compressed data to Dest.
|
|
type Writer struct {
|
|
Dest io.Writer
|
|
MatchFinder MatchFinder
|
|
Encoder Encoder
|
|
|
|
// BlockSize is the number of bytes to compress at a time. If it is zero,
|
|
// each Write operation will be treated as one block.
|
|
BlockSize int
|
|
|
|
err error
|
|
inBuf []byte
|
|
outBuf []byte
|
|
matches []Match
|
|
}
|
|
|
|
func (w *Writer) Write(p []byte) (n int, err error) {
|
|
if w.err != nil {
|
|
return 0, w.err
|
|
}
|
|
|
|
if w.BlockSize == 0 {
|
|
return w.writeBlock(p, false)
|
|
}
|
|
|
|
w.inBuf = append(w.inBuf, p...)
|
|
var pos int
|
|
for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize {
|
|
w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false)
|
|
}
|
|
if pos > 0 {
|
|
n := copy(w.inBuf, w.inBuf[pos:])
|
|
w.inBuf = w.inBuf[:n]
|
|
}
|
|
|
|
return len(p), w.err
|
|
}
|
|
|
|
func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) {
|
|
w.outBuf = w.outBuf[:0]
|
|
w.matches = w.MatchFinder.FindMatches(w.matches[:0], p)
|
|
w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock)
|
|
_, w.err = w.Dest.Write(w.outBuf)
|
|
return len(p), w.err
|
|
}
|
|
|
|
func (w *Writer) Close() error {
|
|
w.writeBlock(w.inBuf, true)
|
|
w.inBuf = w.inBuf[:0]
|
|
return w.err
|
|
}
|
|
|
|
func (w *Writer) Reset(newDest io.Writer) {
|
|
w.MatchFinder.Reset()
|
|
w.Encoder.Reset()
|
|
w.err = nil
|
|
w.inBuf = w.inBuf[:0]
|
|
w.outBuf = w.outBuf[:0]
|
|
w.matches = w.matches[:0]
|
|
w.Dest = newDest
|
|
}
|