well-goknown/vendor/github.com/greatroar/blobloom/io.go
2024-10-28 20:11:29 -04:00

247 lines
6.7 KiB
Go

// Copyright 2023 the Blobloom authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package blobloom
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"strings"
"sync/atomic"
)
const maxCommentLen = 44
// Dump writes f to w, with an optional comment string, in the binary format
// that a Loader accepts. It returns the number of bytes written to w.
//
// The comment may contain arbitrary data, within the limits layed out by the
// format description. It can be used to record the hash function to be used
// with a Filter.
func Dump(w io.Writer, f *Filter, comment string) (int64, error) {
return dump(w, f.b, f.k, comment)
}
// DumpSync is like Dump, but for SyncFilters.
//
// If other goroutines are simultaneously modifying f,
// their modifications may not be reflected in the dump.
// Separate synchronization is required to prevent this.
//
// The format produced is the same as Dump's. The fact that
// the argument is a SyncFilter is not encoded in the dump.
func DumpSync(w io.Writer, f *SyncFilter, comment string) (n int64, err error) {
return dump(w, f.b, f.k, comment)
}
func dump(w io.Writer, b []block, nhashes int, comment string) (n int64, err error) {
switch {
case len(b) == 0 || nhashes == 0:
err = errors.New("blobloom: won't dump uninitialized Filter")
case len(comment) > maxCommentLen:
err = fmt.Errorf("blobloom: comment of length %d too long", len(comment))
case strings.IndexByte(comment, 0) != -1:
err = fmt.Errorf("blobloom: comment %q contains zero byte", len(comment))
}
if err != nil {
return 0, err
}
var buf [64]byte
copy(buf[:8], "blobloom")
// As documented in the comment for Loader, we store one less than the
// number of blocks. This way, we can use the otherwise invalid value 0
// and store 2³² blocks instead of at most 2³²-1.
binary.LittleEndian.PutUint32(buf[12:], uint32(len(b)-1))
binary.LittleEndian.PutUint32(buf[16:], uint32(nhashes))
copy(buf[20:], comment)
k, err := w.Write(buf[:])
n = int64(k)
if err != nil {
return n, err
}
for i := range b {
for j := range b[i] {
x := atomic.LoadUint32(&b[i][j])
binary.LittleEndian.PutUint32(buf[4*j:], x)
}
k, err = w.Write(buf[:])
n += int64(k)
if err != nil {
break
}
}
return n, err
}
// A Loader reads a Filter or SyncFilter from an io.Reader.
//
// A Loader accepts the binary format produced by Dump. The format starts
// with a 64-byte header:
// - the string "blobloom", in ASCII;
// - a four-byte version number, which must be zero;
// - the number of Bloom filter blocks, minus one, as a 32-bit integer;
// - the number of hashes, as a 32-bit integer;
// - a comment of at most 44 non-zero bytes, padded to 44 bytes with zeros.
//
// After the header come the 512-bit blocks, divided into sixteen 32-bit limbs.
// All integers are little-endian.
type Loader struct {
buf [64]byte
r io.Reader
err error
Comment string // Comment field. Filled in by NewLoader.
nblocks uint64
nhashes int
}
// NewLoader parses the format header from r and returns a Loader
// that can be used to load a Filter from it.
func NewLoader(r io.Reader) (*Loader, error) {
l := &Loader{r: r}
err := l.fillbuf()
if err != nil {
return nil, err
}
version := binary.LittleEndian.Uint32(l.buf[8:])
// See comment in dump for the +1.
l.nblocks = 1 + uint64(binary.LittleEndian.Uint32(l.buf[12:]))
l.nhashes = int(binary.LittleEndian.Uint32(l.buf[16:]))
comment := l.buf[20:]
switch {
case string(l.buf[:8]) != "blobloom":
err = errors.New("blobloom: not a Bloom filter dump")
case version != 0:
err = errors.New("blobloom: unsupported dump version")
case l.nhashes == 0:
err = errors.New("blobloom: zero hashes in Bloom filter dump")
}
if err == nil {
comment, err = checkComment(comment)
l.Comment = string(comment)
}
if err != nil {
l = nil
}
return l, err
}
// Load sets f to the union of f and the Loader's filter, then returns f.
// If f is nil, a new Filter of the appropriate size is constructed.
//
// If f is not nil and an error occurs while reading from the Loader,
// f may end up in an inconsistent state.
func (l *Loader) Load(f *Filter) (*Filter, error) {
if f == nil {
nbits := BlockBits * l.nblocks
if nbits > MaxBits {
return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks)
}
f = New(nbits, int(l.nhashes))
} else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil {
return nil, err
}
for i := range f.b {
if err := l.fillbuf(); err != nil {
return nil, err
}
for j := range f.b[i] {
f.b[i][j] |= binary.LittleEndian.Uint32(l.buf[4*j:])
}
}
return f, nil
}
// Load sets f to the union of f and the Loader's filter, then returns f.
// If f is nil, a new SyncFilter of the appropriate size is constructed.
// Else, LoadSync may run concurrently with other modifications to f.
//
// If f is not nil and an error occurs while reading from the Loader,
// f may end up in an inconsistent state.
func (l *Loader) LoadSync(f *SyncFilter) (*SyncFilter, error) {
if f == nil {
nbits := BlockBits * l.nblocks
if nbits > MaxBits {
return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks)
}
f = NewSync(nbits, int(l.nhashes))
} else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil {
return nil, err
}
for i := range f.b {
if err := l.fillbuf(); err != nil {
return nil, err
}
for j := range f.b[i] {
p := &f.b[i][j]
x := binary.LittleEndian.Uint32(l.buf[4*j:])
for {
old := atomic.LoadUint32(p)
if atomic.CompareAndSwapUint32(p, old, old|x) {
break
}
}
}
}
return f, nil
}
func (l *Loader) checkBitsAndHashes(nblocks, nhashes int) error {
switch {
case nblocks != int(l.nblocks):
return fmt.Errorf("blobloom: Filter has %d blocks, but dump has %d", nblocks, l.nblocks)
case nhashes != l.nhashes:
return fmt.Errorf("blobloom: Filter has %d hashes, but dump has %d", nhashes, l.nhashes)
}
return nil
}
func (l *Loader) fillbuf() error {
_, err := io.ReadFull(l.r, l.buf[:])
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
func checkComment(p []byte) ([]byte, error) {
eos := bytes.IndexByte(p, 0)
if eos != -1 {
tail := p[eos+1:]
if !bytes.Equal(tail, make([]byte, len(tail))) {
return nil, fmt.Errorf("blobloom: comment block %q contains zero byte", p)
}
p = p[:eos]
}
return p, nil
}