well-goknown/vendor/github.com/greatroar/blobloom/io.go

// Copyright 2023 the Blobloom authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package blobloom

import (
	"bytes"
	"encoding/binary"
	"errors"
	"fmt"
	"io"
	"strings"
	"sync/atomic"
)

const maxCommentLen = 44

// Dump writes f to w, with an optional comment string, in the binary format
// that a Loader accepts. It returns the number of bytes written to w.
//
// The comment may contain arbitrary data, within the limits layed out by the
// format description. It can be used to record the hash function to be used
// with a Filter.
func Dump(w io.Writer, f *Filter, comment string) (int64, error) {
	return dump(w, f.b, f.k, comment)
}

// DumpSync is like Dump, but for SyncFilters.
//
// If other goroutines are simultaneously modifying f,
// their modifications may not be reflected in the dump.
// Separate synchronization is required to prevent this.
//
// The format produced is the same as Dump's. The fact that
// the argument is a SyncFilter is not encoded in the dump.
func DumpSync(w io.Writer, f *SyncFilter, comment string) (n int64, err error) {
	return dump(w, f.b, f.k, comment)
}

func dump(w io.Writer, b []block, nhashes int, comment string) (n int64, err error) {
	switch {
	case len(b) == 0 || nhashes == 0:
		err = errors.New("blobloom: won't dump uninitialized Filter")
	case len(comment) > maxCommentLen:
		err = fmt.Errorf("blobloom: comment of length %d too long", len(comment))
	case strings.IndexByte(comment, 0) != -1:
		err = fmt.Errorf("blobloom: comment %q contains zero byte", len(comment))
	}
	if err != nil {
		return 0, err
	}

	var buf [64]byte
	copy(buf[:8], "blobloom")
	// As documented in the comment for Loader, we store one less than the
	// number of blocks. This way, we can use the otherwise invalid value 0
	// and store 2³² blocks instead of at most 2³²-1.
	binary.LittleEndian.PutUint32(buf[12:], uint32(len(b)-1))
	binary.LittleEndian.PutUint32(buf[16:], uint32(nhashes))
	copy(buf[20:], comment)

	k, err := w.Write(buf[:])
	n = int64(k)
	if err != nil {
		return n, err
	}

	for i := range b {
		for j := range b[i] {
			x := atomic.LoadUint32(&b[i][j])
			binary.LittleEndian.PutUint32(buf[4*j:], x)
		}
		k, err = w.Write(buf[:])
		n += int64(k)
		if err != nil {
			break
		}
	}

	return n, err
}

// A Loader reads a Filter or SyncFilter from an io.Reader.
//
// A Loader accepts the binary format produced by Dump. The format starts
// with a 64-byte header:
//   - the string "blobloom", in ASCII;
//   - a four-byte version number, which must be zero;
//   - the number of Bloom filter blocks, minus one, as a 32-bit integer;
//   - the number of hashes, as a 32-bit integer;
//   - a comment of at most 44 non-zero bytes, padded to 44 bytes with zeros.
//
// After the header come the 512-bit blocks, divided into sixteen 32-bit limbs.
// All integers are little-endian.
type Loader struct {
	buf [64]byte
	r   io.Reader
	err error

	Comment string // Comment field. Filled in by NewLoader.
	nblocks uint64
	nhashes int
}

// NewLoader parses the format header from r and returns a Loader
// that can be used to load a Filter from it.
func NewLoader(r io.Reader) (*Loader, error) {
	l := &Loader{r: r}

	err := l.fillbuf()
	if err != nil {
		return nil, err
	}

	version := binary.LittleEndian.Uint32(l.buf[8:])
	// See comment in dump for the +1.
	l.nblocks = 1 + uint64(binary.LittleEndian.Uint32(l.buf[12:]))
	l.nhashes = int(binary.LittleEndian.Uint32(l.buf[16:]))
	comment := l.buf[20:]

	switch {
	case string(l.buf[:8]) != "blobloom":
		err = errors.New("blobloom: not a Bloom filter dump")
	case version != 0:
		err = errors.New("blobloom: unsupported dump version")
	case l.nhashes == 0:
		err = errors.New("blobloom: zero hashes in Bloom filter dump")
	}
	if err == nil {
		comment, err = checkComment(comment)
		l.Comment = string(comment)
	}

	if err != nil {
		l = nil
	}
	return l, err
}

// Load sets f to the union of f and the Loader's filter, then returns f.
// If f is nil, a new Filter of the appropriate size is constructed.
//
// If f is not nil and an error occurs while reading from the Loader,
// f may end up in an inconsistent state.
func (l *Loader) Load(f *Filter) (*Filter, error) {
	if f == nil {
		nbits := BlockBits * l.nblocks
		if nbits > MaxBits {
			return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks)
		}
		f = New(nbits, int(l.nhashes))
	} else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil {
		return nil, err
	}

	for i := range f.b {
		if err := l.fillbuf(); err != nil {
			return nil, err
		}

		for j := range f.b[i] {
			f.b[i][j] |= binary.LittleEndian.Uint32(l.buf[4*j:])
		}
	}

	return f, nil
}

// Load sets f to the union of f and the Loader's filter, then returns f.
// If f is nil, a new SyncFilter of the appropriate size is constructed.
// Else, LoadSync may run concurrently with other modifications to f.
//
// If f is not nil and an error occurs while reading from the Loader,
// f may end up in an inconsistent state.
func (l *Loader) LoadSync(f *SyncFilter) (*SyncFilter, error) {
	if f == nil {
		nbits := BlockBits * l.nblocks
		if nbits > MaxBits {
			return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks)
		}
		f = NewSync(nbits, int(l.nhashes))
	} else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil {
		return nil, err
	}

	for i := range f.b {
		if err := l.fillbuf(); err != nil {
			return nil, err
		}

		for j := range f.b[i] {
			p := &f.b[i][j]
			x := binary.LittleEndian.Uint32(l.buf[4*j:])

			for {
				old := atomic.LoadUint32(p)
				if atomic.CompareAndSwapUint32(p, old, old|x) {
					break
				}
			}
		}
	}

	return f, nil
}

func (l *Loader) checkBitsAndHashes(nblocks, nhashes int) error {
	switch {
	case nblocks != int(l.nblocks):
		return fmt.Errorf("blobloom: Filter has %d blocks, but dump has %d", nblocks, l.nblocks)
	case nhashes != l.nhashes:
		return fmt.Errorf("blobloom: Filter has %d hashes, but dump has %d", nhashes, l.nhashes)
	}
	return nil
}

func (l *Loader) fillbuf() error {
	_, err := io.ReadFull(l.r, l.buf[:])
	if err == io.EOF {
		err = io.ErrUnexpectedEOF
	}
	return err
}

func checkComment(p []byte) ([]byte, error) {
	eos := bytes.IndexByte(p, 0)
	if eos != -1 {
		tail := p[eos+1:]
		if !bytes.Equal(tail, make([]byte, len(tail))) {
			return nil, fmt.Errorf("blobloom: comment block %q contains zero byte", p)
		}
		p = p[:eos]
	}
	return p, nil
}
Add goreleaser 2024-10-29 00:11:29 +00:00			`// Copyright 2023 the Blobloom authors`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`package blobloom`

			`import (`
			`"bytes"`
			`"encoding/binary"`
			`"errors"`
			`"fmt"`
			`"io"`
			`"strings"`
			`"sync/atomic"`
			`)`

			`const maxCommentLen = 44`

			`// Dump writes f to w, with an optional comment string, in the binary format`
			`// that a Loader accepts. It returns the number of bytes written to w.`
			`//`
			`// The comment may contain arbitrary data, within the limits layed out by the`
			`// format description. It can be used to record the hash function to be used`
			`// with a Filter.`
			`func Dump(w io.Writer, f *Filter, comment string) (int64, error) {`
			`return dump(w, f.b, f.k, comment)`
			`}`

			`// DumpSync is like Dump, but for SyncFilters.`
			`//`
			`// If other goroutines are simultaneously modifying f,`
			`// their modifications may not be reflected in the dump.`
			`// Separate synchronization is required to prevent this.`
			`//`
			`// The format produced is the same as Dump's. The fact that`
			`// the argument is a SyncFilter is not encoded in the dump.`
			`func DumpSync(w io.Writer, f *SyncFilter, comment string) (n int64, err error) {`
			`return dump(w, f.b, f.k, comment)`
			`}`

			`func dump(w io.Writer, b []block, nhashes int, comment string) (n int64, err error) {`
			`switch {`
			`case len(b) == 0 \|\| nhashes == 0:`
			`err = errors.New("blobloom: won't dump uninitialized Filter")`
			`case len(comment) > maxCommentLen:`
			`err = fmt.Errorf("blobloom: comment of length %d too long", len(comment))`
			`case strings.IndexByte(comment, 0) != -1:`
			`err = fmt.Errorf("blobloom: comment %q contains zero byte", len(comment))`
			`}`
			`if err != nil {`
			`return 0, err`
			`}`

			`var buf [64]byte`
			`copy(buf[:8], "blobloom")`
			`// As documented in the comment for Loader, we store one less than the`
			`// number of blocks. This way, we can use the otherwise invalid value 0`
			`// and store 2³² blocks instead of at most 2³²-1.`
			`binary.LittleEndian.PutUint32(buf[12:], uint32(len(b)-1))`
			`binary.LittleEndian.PutUint32(buf[16:], uint32(nhashes))`
			`copy(buf[20:], comment)`

			`k, err := w.Write(buf[:])`
			`n = int64(k)`
			`if err != nil {`
			`return n, err`
			`}`

			`for i := range b {`
			`for j := range b[i] {`
			`x := atomic.LoadUint32(&b[i][j])`
			`binary.LittleEndian.PutUint32(buf[4*j:], x)`
			`}`
			`k, err = w.Write(buf[:])`
			`n += int64(k)`
			`if err != nil {`
			`break`
			`}`
			`}`

			`return n, err`
			`}`

			`// A Loader reads a Filter or SyncFilter from an io.Reader.`
			`//`
			`// A Loader accepts the binary format produced by Dump. The format starts`
			`// with a 64-byte header:`
			`// - the string "blobloom", in ASCII;`
			`// - a four-byte version number, which must be zero;`
			`// - the number of Bloom filter blocks, minus one, as a 32-bit integer;`
			`// - the number of hashes, as a 32-bit integer;`
			`// - a comment of at most 44 non-zero bytes, padded to 44 bytes with zeros.`
			`//`
			`// After the header come the 512-bit blocks, divided into sixteen 32-bit limbs.`
			`// All integers are little-endian.`
			`type Loader struct {`
			`buf [64]byte`
			`r io.Reader`
			`err error`

			`Comment string // Comment field. Filled in by NewLoader.`
			`nblocks uint64`
			`nhashes int`
			`}`

			`// NewLoader parses the format header from r and returns a Loader`
			`// that can be used to load a Filter from it.`
			`func NewLoader(r io.Reader) (*Loader, error) {`
			`l := &Loader{r: r}`

			`err := l.fillbuf()`
			`if err != nil {`
			`return nil, err`
			`}`

			`version := binary.LittleEndian.Uint32(l.buf[8:])`
			`// See comment in dump for the +1.`
			`l.nblocks = 1 + uint64(binary.LittleEndian.Uint32(l.buf[12:]))`
			`l.nhashes = int(binary.LittleEndian.Uint32(l.buf[16:]))`
			`comment := l.buf[20:]`

			`switch {`
			`case string(l.buf[:8]) != "blobloom":`
			`err = errors.New("blobloom: not a Bloom filter dump")`
			`case version != 0:`
			`err = errors.New("blobloom: unsupported dump version")`
			`case l.nhashes == 0:`
			`err = errors.New("blobloom: zero hashes in Bloom filter dump")`
			`}`
			`if err == nil {`
			`comment, err = checkComment(comment)`
			`l.Comment = string(comment)`
			`}`

			`if err != nil {`
			`l = nil`
			`}`
			`return l, err`
			`}`

			`// Load sets f to the union of f and the Loader's filter, then returns f.`
			`// If f is nil, a new Filter of the appropriate size is constructed.`
			`//`
			`// If f is not nil and an error occurs while reading from the Loader,`
			`// f may end up in an inconsistent state.`
			`func (l Loader) Load(f Filter) (*Filter, error) {`
			`if f == nil {`
			`nbits := BlockBits * l.nblocks`
			`if nbits > MaxBits {`
			`return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks)`
			`}`
			`f = New(nbits, int(l.nhashes))`
			`} else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil {`
			`return nil, err`
			`}`

			`for i := range f.b {`
			`if err := l.fillbuf(); err != nil {`
			`return nil, err`
			`}`

			`for j := range f.b[i] {`
			`f.b[i][j] \|= binary.LittleEndian.Uint32(l.buf[4*j:])`
			`}`
			`}`

			`return f, nil`
			`}`

			`// Load sets f to the union of f and the Loader's filter, then returns f.`
			`// If f is nil, a new SyncFilter of the appropriate size is constructed.`
			`// Else, LoadSync may run concurrently with other modifications to f.`
			`//`
			`// If f is not nil and an error occurs while reading from the Loader,`
			`// f may end up in an inconsistent state.`
			`func (l Loader) LoadSync(f SyncFilter) (*SyncFilter, error) {`
			`if f == nil {`
			`nbits := BlockBits * l.nblocks`
			`if nbits > MaxBits {`
			`return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks)`
			`}`
			`f = NewSync(nbits, int(l.nhashes))`
			`} else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil {`
			`return nil, err`
			`}`

			`for i := range f.b {`
			`if err := l.fillbuf(); err != nil {`
			`return nil, err`
			`}`

			`for j := range f.b[i] {`
			`p := &f.b[i][j]`
			`x := binary.LittleEndian.Uint32(l.buf[4*j:])`

			`for {`
			`old := atomic.LoadUint32(p)`
			`if atomic.CompareAndSwapUint32(p, old, old\|x) {`
			`break`
			`}`
			`}`
			`}`
			`}`

			`return f, nil`
			`}`

			`func (l *Loader) checkBitsAndHashes(nblocks, nhashes int) error {`
			`switch {`
			`case nblocks != int(l.nblocks):`
			`return fmt.Errorf("blobloom: Filter has %d blocks, but dump has %d", nblocks, l.nblocks)`
			`case nhashes != l.nhashes:`
			`return fmt.Errorf("blobloom: Filter has %d hashes, but dump has %d", nhashes, l.nhashes)`
			`}`
			`return nil`
			`}`

			`func (l *Loader) fillbuf() error {`
			`_, err := io.ReadFull(l.r, l.buf[:])`
			`if err == io.EOF {`
			`err = io.ErrUnexpectedEOF`
			`}`
			`return err`
			`}`

			`func checkComment(p []byte) ([]byte, error) {`
			`eos := bytes.IndexByte(p, 0)`
			`if eos != -1 {`
			`tail := p[eos+1:]`
			`if !bytes.Equal(tail, make([]byte, len(tail))) {`
			`return nil, fmt.Errorf("blobloom: comment block %q contains zero byte", p)`
			`}`
			`p = p[:eos]`
			`}`
			`return p, nil`
			`}`