115 lines
2.9 KiB
Go
115 lines
2.9 KiB
Go
package hyperloglog
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"encoding/hex"
|
|
"fmt"
|
|
)
|
|
|
|
// Everything is hardcoded to use precision 8, i.e. 256 registers.
|
|
type HyperLogLog struct {
|
|
offset int
|
|
registers []uint8
|
|
}
|
|
|
|
func New(offset int) *HyperLogLog {
|
|
if offset < 0 || offset > 32-8 {
|
|
panic(fmt.Errorf("invalid offset %d", offset))
|
|
}
|
|
|
|
// precision is always 8
|
|
// the number of registers is always 256 (1<<8)
|
|
hll := &HyperLogLog{offset: offset}
|
|
hll.registers = make([]uint8, 256)
|
|
return hll
|
|
}
|
|
|
|
func NewWithRegisters(registers []byte, offset int) *HyperLogLog {
|
|
if offset < 0 || offset > 32-8 {
|
|
panic(fmt.Errorf("invalid offset %d", offset))
|
|
}
|
|
if len(registers) != 256 {
|
|
panic(fmt.Errorf("invalid number of registers %d", len(registers)))
|
|
}
|
|
return &HyperLogLog{registers: registers, offset: offset}
|
|
}
|
|
|
|
func (hll *HyperLogLog) GetRegisters() []byte { return hll.registers }
|
|
func (hll *HyperLogLog) SetRegisters(enc []byte) { hll.registers = enc }
|
|
func (hll *HyperLogLog) MergeRegisters(other []byte) {
|
|
for i, v := range other {
|
|
if v > hll.registers[i] {
|
|
hll.registers[i] = v
|
|
}
|
|
}
|
|
}
|
|
|
|
func (hll *HyperLogLog) Clear() {
|
|
for i := range hll.registers {
|
|
hll.registers[i] = 0
|
|
}
|
|
}
|
|
|
|
// Add takes a Nostr event pubkey which will be used as the item "key" (that combined with the offset)
|
|
func (hll *HyperLogLog) Add(pubkey string) {
|
|
x, _ := hex.DecodeString(pubkey[hll.offset*2 : hll.offset*2+8*2])
|
|
j := x[0] // register address (first 8 bits, i.e. first byte)
|
|
|
|
w := binary.BigEndian.Uint64(x) // number that we will use
|
|
zeroBits := clz56(w) + 1 // count zeroes (skip the first byte, so only use 56 bits)
|
|
|
|
if zeroBits > hll.registers[j] {
|
|
hll.registers[j] = zeroBits
|
|
}
|
|
}
|
|
|
|
// AddBytes is like Add, but takes pubkey as bytes instead of as string
|
|
func (hll *HyperLogLog) AddBytes(pubkey []byte) {
|
|
x := pubkey[hll.offset : hll.offset+8]
|
|
j := x[0] // register address (first 8 bits, i.e. first byte)
|
|
|
|
w := binary.BigEndian.Uint64(x) // number that we will use
|
|
zeroBits := clz56(w) + 1 // count zeroes (skip the first byte, so only use 56 bits)
|
|
|
|
if zeroBits > hll.registers[j] {
|
|
hll.registers[j] = zeroBits
|
|
}
|
|
}
|
|
|
|
func (hll *HyperLogLog) Merge(other *HyperLogLog) {
|
|
for i, v := range other.registers {
|
|
if v > hll.registers[i] {
|
|
hll.registers[i] = v
|
|
}
|
|
}
|
|
}
|
|
|
|
func (hll *HyperLogLog) Count() uint64 {
|
|
v := countZeros(hll.registers)
|
|
|
|
if v != 0 {
|
|
lc := linearCounting(256 /* nregisters */, v)
|
|
|
|
if lc <= 220 /* threshold */ {
|
|
return uint64(lc)
|
|
}
|
|
}
|
|
|
|
est := hll.calculateEstimate()
|
|
if est <= 256 /* nregisters */ *3 {
|
|
if v != 0 {
|
|
return uint64(linearCounting(256 /* nregisters */, v))
|
|
}
|
|
}
|
|
|
|
return uint64(est)
|
|
}
|
|
|
|
func (hll HyperLogLog) calculateEstimate() float64 {
|
|
sum := 0.0
|
|
for _, val := range hll.registers {
|
|
sum += 1.0 / float64(uint64(1)<<val) // this is the same as 2^(-val)
|
|
}
|
|
|
|
return 0.7182725932495458 /* alpha for 256 registers */ * 256 /* nregisters */ * 256 /* nregisters */ / sum
|
|
}
|