package xsync import ( "math/bits" "runtime" _ "unsafe" ) // test-only assert()-like flag var assertionsEnabled = false const ( // cacheLineSize is used in paddings to prevent false sharing; // 64B are used instead of 128B as a compromise between // memory footprint and performance; 128B usage may give ~30% // improvement on NUMA machines. cacheLineSize = 64 ) // nextPowOf2 computes the next highest power of 2 of 32-bit v. // Source: https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 func nextPowOf2(v uint32) uint32 { if v == 0 { return 1 } v-- v |= v >> 1 v |= v >> 2 v |= v >> 4 v |= v >> 8 v |= v >> 16 v++ return v } func parallelism() uint32 { maxProcs := uint32(runtime.GOMAXPROCS(0)) numCores := uint32(runtime.NumCPU()) if maxProcs < numCores { return maxProcs } return numCores } //go:noescape //go:linkname runtime_fastrand runtime.fastrand func runtime_fastrand() uint32 func broadcast(b uint8) uint64 { return 0x101010101010101 * uint64(b) } func firstMarkedByteIndex(w uint64) int { return bits.TrailingZeros64(w) >> 3 } // SWAR byte search: may produce false positives, e.g. for 0x0100, // so make sure to double-check bytes found by this function. func markZeroBytes(w uint64) uint64 { return ((w - 0x0101010101010101) & (^w) & 0x8080808080808080) } func setByte(w uint64, b uint8, idx int) uint64 { shift := idx << 3 return (w &^ (0xff << shift)) | (uint64(b) << shift) }