首次提交代码
This commit is contained in:
345
internal/bloom/bloom.go
Normal file
345
internal/bloom/bloom.go
Normal file
@@ -0,0 +1,345 @@
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/roaring64"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
/*
|
||||
主体代码来源于https://github.com/bits-and-blooms/bloom
|
||||
在此文件中,将bitmap的管理改用roaring bitmap实现
|
||||
algotao 2022-08-29
|
||||
*/
|
||||
|
||||
type BloomFilter struct {
|
||||
m uint64 // 存贮空间上限
|
||||
k uint64 // hash函数个数
|
||||
elementsMax uint64 // 元素数量上限
|
||||
elementsAdded uint64 // 已加入的元素数量
|
||||
falsePositiveRate float64 // 假阳率
|
||||
rb *roaring64.Bitmap // 位图
|
||||
chOne chan []uint64 // 接收每个插入的hash索引
|
||||
chInsert chan []uint64 // 接收排好序的hash索引进Bitmap
|
||||
chSortJobQuota chan int // 排序工作的配额控制
|
||||
buf []uint64 // 缓冲
|
||||
wgJobs sync.WaitGroup
|
||||
}
|
||||
|
||||
type BloomFilterStat struct {
|
||||
M uint64
|
||||
K uint64
|
||||
ElementsMax uint64
|
||||
ElementsAdded uint64
|
||||
FalsePositiveRate float64
|
||||
}
|
||||
|
||||
const (
|
||||
headerVersion1 = 1
|
||||
)
|
||||
|
||||
// bitmapFileHeader 存贮文件头
|
||||
type bitmapFileHeader struct {
|
||||
Size uint64 // Header Protobuf size
|
||||
}
|
||||
|
||||
// NewWithEstimates 创建一个BloomFilter,并期望有n个元素,<fp的误匹配率
|
||||
func NewWithEstimates(e uint64, fr float64) *BloomFilter {
|
||||
m, k := EstimateParameters(e, fr)
|
||||
return newBloomFilter(m, k, e, fr)
|
||||
}
|
||||
|
||||
// new 创建一个新的BloomFilter,具有 _m_ bits 和 _k_ hashing 函数
|
||||
func newBloomFilter(m uint64, k uint64, e uint64, fr float64) *BloomFilter {
|
||||
b := &BloomFilter{
|
||||
m: max(1, m),
|
||||
k: max(1, k),
|
||||
elementsMax: e,
|
||||
falsePositiveRate: fr,
|
||||
rb: roaring64.New(),
|
||||
chOne: make(chan []uint64, 1024*1024), // 索引缓冲区chan
|
||||
chInsert: make(chan []uint64, 2), // 插入队列
|
||||
chSortJobQuota: make(chan int, 8), // 排序队列
|
||||
}
|
||||
|
||||
for i := 0; i < cap(b.chSortJobQuota); i++ {
|
||||
b.chSortJobQuota <- 0
|
||||
}
|
||||
|
||||
//log.Printf("Init quota len(%v), cap(%v)\n", len(b.chSortJobQuota), cap(b.chSortJobQuota))
|
||||
|
||||
go b.consumeOne()
|
||||
go b.consumeInsert()
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// 生成 m 和 k
|
||||
func EstimateParameters(n uint64, p float64) (m uint64, k uint64) {
|
||||
m = uint64(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2)))
|
||||
k = uint64(math.Ceil(math.Log(2) * float64(m) / float64(n)))
|
||||
return
|
||||
}
|
||||
|
||||
// location 返回当前位置的hash值
|
||||
func (b *BloomFilter) location(h [4]uint64, i uint64) uint64 {
|
||||
return (h[i%2] + i*h[2+(((i+(i%2))%4)/2)]) % b.m
|
||||
//return 0
|
||||
}
|
||||
|
||||
// baseHashes 生成4个hash值,用于生产key
|
||||
func (b *BloomFilter) baseHashes(data []byte) [4]uint64 {
|
||||
h := New128()
|
||||
h.Write(data)
|
||||
|
||||
h1, h2 := h.Sum128()
|
||||
|
||||
h.Write([]byte{1})
|
||||
h3, h4 := h.Sum128()
|
||||
|
||||
return [4]uint64{
|
||||
h1, h2, h3, h4,
|
||||
}
|
||||
}
|
||||
|
||||
// 消费一个计算好的bloom bits对象,并填入缓冲。当缓冲半满时发送给排序队列处理
|
||||
func (b *BloomFilter) consumeOne() {
|
||||
batchSize := cap(b.chOne) * int(b.k) / 2 //一半buffer满了就开始处理,即一半个数的uint64。或是遇到Flush标志(bits长度0),则刷缓冲
|
||||
|
||||
for bits := range b.chOne {
|
||||
if len(bits) != 0 {
|
||||
b.elementsAdded++
|
||||
}
|
||||
|
||||
b.buf = append(b.buf, bits...)
|
||||
if len(b.buf) >= batchSize || len(bits) == 0 {
|
||||
|
||||
buf := b.buf[:]
|
||||
b.buf = []uint64{}
|
||||
b.wgJobs.Add(1)
|
||||
|
||||
// 如果接收到了Flush标志,则在处理最后buffer后,减一次waitgroup
|
||||
if len(bits) == 0 {
|
||||
b.wgJobs.Done()
|
||||
}
|
||||
|
||||
//等待有可用排序配额,如成功则消耗一个配额
|
||||
<-b.chSortJobQuota
|
||||
|
||||
go func() {
|
||||
sort.Slice(buf, func(i, j int) bool { return buf[i] < buf[j] })
|
||||
|
||||
//提交至插入任务
|
||||
b.chInsert <- buf
|
||||
|
||||
//恢复1个配额
|
||||
b.chSortJobQuota <- 1
|
||||
}()
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 将批量bits写到bitmap
|
||||
func (b *BloomFilter) consumeInsert() {
|
||||
for bitsBatch := range b.chInsert {
|
||||
b.rb.AddMany(bitsBatch)
|
||||
b.wgJobs.Done()
|
||||
}
|
||||
}
|
||||
|
||||
// Add 添加数据的Hash位图
|
||||
func (b *BloomFilter) Add(data []byte) *BloomFilter {
|
||||
h := b.baseHashes(data)
|
||||
bits := make([]uint64, b.k)
|
||||
for i := uint64(0); i < b.k; i++ {
|
||||
bits[i] = b.location(h, i)
|
||||
}
|
||||
b.chOne <- bits //将一个计算好的bloom bits发送到待处理队列
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// AddString 添加字符串的Hash位图
|
||||
func (b *BloomFilter) AddString(data string) *BloomFilter {
|
||||
return b.Add([]byte(data))
|
||||
}
|
||||
|
||||
// Test 如果命中Hash位图,则返回真 (有误匹配率)
|
||||
func (b *BloomFilter) Test(data []byte) bool {
|
||||
h := b.baseHashes(data)
|
||||
for i := uint64(0); i < b.k; i++ {
|
||||
if !b.rb.Contains(b.location(h, i)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// TestString 如果命中字符串Hash位图,则返回真 (有误匹配率)
|
||||
func (b *BloomFilter) TestString(data string) bool {
|
||||
return b.Test([]byte(data))
|
||||
}
|
||||
|
||||
// Flush 将缓冲中的待处理Bit写入Bitmap
|
||||
func (b *BloomFilter) Flush() {
|
||||
b.wgJobs.Add(1)
|
||||
|
||||
//发出Flush指令
|
||||
b.chOne <- []uint64{}
|
||||
|
||||
b.wgJobs.Wait()
|
||||
}
|
||||
|
||||
// free 将缓冲中的待处理Bit写入Bitmap
|
||||
func (b *BloomFilter) free() {
|
||||
close(b.chOne)
|
||||
close(b.chInsert)
|
||||
b.rb.Clear()
|
||||
}
|
||||
|
||||
// Iterator 位图遍历器
|
||||
func (b *BloomFilter) Iterator() roaring64.IntPeekable64 {
|
||||
return b.rb.Iterator()
|
||||
}
|
||||
|
||||
// GetSizeInBytes 返回位图大小
|
||||
func (b *BloomFilter) GetSizeInBytes() uint64 {
|
||||
return b.rb.GetSerializedSizeInBytes()
|
||||
}
|
||||
|
||||
// 获得统计信息,主要用于运行期间获取状态
|
||||
func (b *BloomFilter) GetStat() BloomFilterStat {
|
||||
return BloomFilterStat{
|
||||
M: b.m,
|
||||
K: b.k,
|
||||
ElementsMax: b.elementsMax,
|
||||
ElementsAdded: b.elementsAdded,
|
||||
FalsePositiveRate: b.falsePositiveRate,
|
||||
}
|
||||
}
|
||||
|
||||
// SaveToFile 写入到文件
|
||||
func (b *BloomFilter) SaveToFile(filename string) (err error) {
|
||||
b.Flush()
|
||||
|
||||
headerPB := &Header{
|
||||
Version: headerVersion1,
|
||||
M: b.m,
|
||||
K: b.k,
|
||||
ElementsMax: b.elementsMax,
|
||||
ElementsAdded: b.elementsAdded,
|
||||
FalsePositiveRate: b.falsePositiveRate,
|
||||
}
|
||||
headerData, err := proto.Marshal(headerPB)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fi, fe := os.Create(filename)
|
||||
if fe != nil {
|
||||
return fe
|
||||
}
|
||||
|
||||
defer fi.Close()
|
||||
|
||||
fh := bitmapFileHeader{
|
||||
Size: uint64(len(headerData)),
|
||||
}
|
||||
|
||||
//写入文件头(字节数)
|
||||
fe = binary.Write(fi, binary.BigEndian, fh)
|
||||
if fe != nil {
|
||||
return fe
|
||||
}
|
||||
|
||||
//写入文件头(PB详细信息)
|
||||
fe = binary.Write(fi, binary.BigEndian, headerData)
|
||||
if fe != nil {
|
||||
return fe
|
||||
}
|
||||
|
||||
b.rb.RunOptimize()
|
||||
|
||||
zw, err := zstd.NewWriter(fi)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer zw.Close()
|
||||
|
||||
_, fe = b.rb.WriteTo(zw)
|
||||
|
||||
b.free()
|
||||
|
||||
return fe
|
||||
}
|
||||
|
||||
// LoadFromFile 从文件中读取
|
||||
func LoadFromFile(filename string, headerOnly bool) (bft *BloomFilter, err error) {
|
||||
fi, fe := os.Open(filename)
|
||||
|
||||
if fe != nil {
|
||||
return nil, fe
|
||||
}
|
||||
|
||||
defer fi.Close()
|
||||
|
||||
fh := bitmapFileHeader{}
|
||||
fe = binary.Read(fi, binary.BigEndian, &fh)
|
||||
if fe != nil {
|
||||
return nil, fe
|
||||
}
|
||||
|
||||
headerData := make([]byte, fh.Size)
|
||||
|
||||
n, err := fi.Read(headerData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if n != len(headerData) {
|
||||
return nil, errors.New("unknown file format")
|
||||
}
|
||||
headerPB := &Header{}
|
||||
|
||||
err = proto.Unmarshal(headerData, headerPB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if headerPB.Version != headerVersion1 {
|
||||
return nil, errors.New("unsupported version")
|
||||
}
|
||||
|
||||
bft = &BloomFilter{}
|
||||
bft.m = headerPB.GetM()
|
||||
bft.k = headerPB.GetK()
|
||||
bft.elementsMax = headerPB.GetElementsMax()
|
||||
bft.elementsAdded = headerPB.GetElementsAdded()
|
||||
bft.falsePositiveRate = headerPB.GetFalsePositiveRate()
|
||||
|
||||
if headerOnly {
|
||||
return bft, nil
|
||||
}
|
||||
|
||||
bft.rb = roaring64.New()
|
||||
|
||||
zr, err := zstd.NewReader(fi)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer zr.Close()
|
||||
|
||||
_, fe = bft.rb.ReadFrom(zr)
|
||||
if fe != nil {
|
||||
return nil, fe
|
||||
}
|
||||
|
||||
return bft, nil
|
||||
}
|
||||
127
internal/bloom/bloom_test.go
Normal file
127
internal/bloom/bloom_test.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/roaring64"
|
||||
)
|
||||
|
||||
func TestBasic(t *testing.T) {
|
||||
f := NewWithEstimates(1000000000, 0.00001)
|
||||
n1 := "Bess"
|
||||
n2 := "Jane"
|
||||
n3 := "Tony"
|
||||
n4 := "Algo"
|
||||
f.AddString(n1)
|
||||
f.AddString(n2)
|
||||
f.AddString(n3)
|
||||
f.Flush()
|
||||
n1b := f.TestString(n1)
|
||||
n2b := f.TestString(n2)
|
||||
n3b := f.TestString(n3)
|
||||
n4b := f.TestString(n4)
|
||||
if !n1b {
|
||||
t.Errorf("%v should be in.", n1)
|
||||
}
|
||||
if !n2b {
|
||||
t.Errorf("%v should be in.", n2)
|
||||
}
|
||||
if !n3b {
|
||||
t.Errorf("%v should be in.", n3)
|
||||
}
|
||||
if n4b {
|
||||
t.Errorf("%v should be not in.", n4)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFile(t *testing.T) {
|
||||
f := NewWithEstimates(1000000000, 0.00001)
|
||||
n1 := "Bess"
|
||||
n2 := "Jane"
|
||||
n3 := "Tony"
|
||||
n4 := "Algo"
|
||||
f.AddString(n1)
|
||||
f.AddString(n2)
|
||||
f.AddString(n3)
|
||||
const tmpfile = "//tmp//bloomtest.bin"
|
||||
|
||||
err := f.SaveToFile(tmpfile)
|
||||
if err != nil {
|
||||
t.Errorf("save file error %v", err)
|
||||
}
|
||||
|
||||
f, err = LoadFromFile(tmpfile, false)
|
||||
if err != nil {
|
||||
t.Errorf("load file error %v", err)
|
||||
}
|
||||
|
||||
n1b := f.TestString(n1)
|
||||
n2b := f.TestString(n2)
|
||||
n3b := f.TestString(n3)
|
||||
n4b := f.TestString(n4)
|
||||
if !n1b {
|
||||
t.Errorf("%v should be in.", n1)
|
||||
}
|
||||
if !n2b {
|
||||
t.Errorf("%v should be in.", n2)
|
||||
}
|
||||
if !n3b {
|
||||
t.Errorf("%v should be in.", n3)
|
||||
}
|
||||
if n4b {
|
||||
t.Errorf("%v should be not in.", n4)
|
||||
}
|
||||
}
|
||||
|
||||
func Test10W(t *testing.T) {
|
||||
f := NewWithEstimates(100000, 0.00001)
|
||||
for i := uint64(0); i < 100000; i++ {
|
||||
f.AddString(strconv.FormatUint(i, 10))
|
||||
}
|
||||
|
||||
const tmpfile = "//tmp//bloomtest.bin"
|
||||
err := f.SaveToFile(tmpfile)
|
||||
if err != nil {
|
||||
t.Errorf("save file error %v", err)
|
||||
}
|
||||
|
||||
f, err = LoadFromFile(tmpfile, false)
|
||||
if err != nil {
|
||||
t.Errorf("load file error %v", err)
|
||||
}
|
||||
|
||||
for i := uint64(0); i < 100000; i++ {
|
||||
ns := f.TestString(strconv.FormatUint(i, 10))
|
||||
if !ns {
|
||||
t.Errorf("%v should be in.", ns)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStat(t *testing.T) {
|
||||
f := NewWithEstimates(1000000000, 0.00000001)
|
||||
t.Errorf("%v", f.GetStat())
|
||||
}
|
||||
|
||||
func BenchmarkNormal(b *testing.B) {
|
||||
f := NewWithEstimates(1000000000, 0.00001)
|
||||
for n := 0; n < b.N; n++ {
|
||||
f.AddString(strconv.FormatUint(uint64(n), 10))
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRoaringBitmap(b *testing.B) {
|
||||
f := roaring64.New()
|
||||
r := rand.New(rand.NewSource(99))
|
||||
x := uint64(0)
|
||||
|
||||
b.Run("Add", func(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
x = r.Uint64() % 23962645944
|
||||
f.Add(x)
|
||||
}
|
||||
})
|
||||
b.Errorf("%v aa\n", f.GetSizeInBytes())
|
||||
}
|
||||
167
internal/bloom/bloomfile.pb.go
Normal file
167
internal/bloom/bloomfile.pb.go
Normal file
@@ -0,0 +1,167 @@
|
||||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
// versions:
|
||||
// protoc-gen-go v1.36.10
|
||||
// protoc v5.29.4
|
||||
// source: bloomfile.proto
|
||||
|
||||
package bloom
|
||||
|
||||
import (
|
||||
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
|
||||
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
|
||||
reflect "reflect"
|
||||
sync "sync"
|
||||
unsafe "unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
// Verify that this generated code is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
|
||||
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
|
||||
)
|
||||
|
||||
type Header struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Version uint64 `protobuf:"varint,1,opt,name=Version,proto3" json:"Version,omitempty"` // 版本,当前为1
|
||||
M uint64 `protobuf:"varint,2,opt,name=M,proto3" json:"M,omitempty"` // 存贮空间上限
|
||||
K uint64 `protobuf:"varint,3,opt,name=K,proto3" json:"K,omitempty"` // hash函数个数
|
||||
ElementsMax uint64 `protobuf:"varint,4,opt,name=ElementsMax,proto3" json:"ElementsMax,omitempty"` // 创建空间元素数量
|
||||
ElementsAdded uint64 `protobuf:"varint,5,opt,name=ElementsAdded,proto3" json:"ElementsAdded,omitempty"` // 实际加入元素数量
|
||||
FalsePositiveRate float64 `protobuf:"fixed64,6,opt,name=FalsePositiveRate,proto3" json:"FalsePositiveRate,omitempty"` // 假阳率
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *Header) Reset() {
|
||||
*x = Header{}
|
||||
mi := &file_bloomfile_proto_msgTypes[0]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *Header) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*Header) ProtoMessage() {}
|
||||
|
||||
func (x *Header) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_bloomfile_proto_msgTypes[0]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use Header.ProtoReflect.Descriptor instead.
|
||||
func (*Header) Descriptor() ([]byte, []int) {
|
||||
return file_bloomfile_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
func (x *Header) GetVersion() uint64 {
|
||||
if x != nil {
|
||||
return x.Version
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *Header) GetM() uint64 {
|
||||
if x != nil {
|
||||
return x.M
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *Header) GetK() uint64 {
|
||||
if x != nil {
|
||||
return x.K
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *Header) GetElementsMax() uint64 {
|
||||
if x != nil {
|
||||
return x.ElementsMax
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *Header) GetElementsAdded() uint64 {
|
||||
if x != nil {
|
||||
return x.ElementsAdded
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *Header) GetFalsePositiveRate() float64 {
|
||||
if x != nil {
|
||||
return x.FalsePositiveRate
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
var File_bloomfile_proto protoreflect.FileDescriptor
|
||||
|
||||
const file_bloomfile_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"\x0fbloomfile.proto\x12\x05bloom\"\xb4\x01\n" +
|
||||
"\x06Header\x12\x18\n" +
|
||||
"\aVersion\x18\x01 \x01(\x04R\aVersion\x12\f\n" +
|
||||
"\x01M\x18\x02 \x01(\x04R\x01M\x12\f\n" +
|
||||
"\x01K\x18\x03 \x01(\x04R\x01K\x12 \n" +
|
||||
"\vElementsMax\x18\x04 \x01(\x04R\vElementsMax\x12$\n" +
|
||||
"\rElementsAdded\x18\x05 \x01(\x04R\rElementsAdded\x12,\n" +
|
||||
"\x11FalsePositiveRate\x18\x06 \x01(\x01R\x11FalsePositiveRateB\x16Z\x14internal/bloom;bloomb\x06proto3"
|
||||
|
||||
var (
|
||||
file_bloomfile_proto_rawDescOnce sync.Once
|
||||
file_bloomfile_proto_rawDescData []byte
|
||||
)
|
||||
|
||||
func file_bloomfile_proto_rawDescGZIP() []byte {
|
||||
file_bloomfile_proto_rawDescOnce.Do(func() {
|
||||
file_bloomfile_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_bloomfile_proto_rawDesc), len(file_bloomfile_proto_rawDesc)))
|
||||
})
|
||||
return file_bloomfile_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_bloomfile_proto_msgTypes = make([]protoimpl.MessageInfo, 1)
|
||||
var file_bloomfile_proto_goTypes = []any{
|
||||
(*Header)(nil), // 0: bloom.Header
|
||||
}
|
||||
var file_bloomfile_proto_depIdxs = []int32{
|
||||
0, // [0:0] is the sub-list for method output_type
|
||||
0, // [0:0] is the sub-list for method input_type
|
||||
0, // [0:0] is the sub-list for extension type_name
|
||||
0, // [0:0] is the sub-list for extension extendee
|
||||
0, // [0:0] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_bloomfile_proto_init() }
|
||||
func file_bloomfile_proto_init() {
|
||||
if File_bloomfile_proto != nil {
|
||||
return
|
||||
}
|
||||
type x struct{}
|
||||
out := protoimpl.TypeBuilder{
|
||||
File: protoimpl.DescBuilder{
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_bloomfile_proto_rawDesc), len(file_bloomfile_proto_rawDesc)),
|
||||
NumEnums: 0,
|
||||
NumMessages: 1,
|
||||
NumExtensions: 0,
|
||||
NumServices: 0,
|
||||
},
|
||||
GoTypes: file_bloomfile_proto_goTypes,
|
||||
DependencyIndexes: file_bloomfile_proto_depIdxs,
|
||||
MessageInfos: file_bloomfile_proto_msgTypes,
|
||||
}.Build()
|
||||
File_bloomfile_proto = out.File
|
||||
file_bloomfile_proto_goTypes = nil
|
||||
file_bloomfile_proto_depIdxs = nil
|
||||
}
|
||||
15
internal/bloom/bloomfile.proto
Normal file
15
internal/bloom/bloomfile.proto
Normal file
@@ -0,0 +1,15 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package bloom;
|
||||
|
||||
option go_package = "internal/bloom;bloom";
|
||||
|
||||
|
||||
message Header {
|
||||
uint64 Version = 1 ; // 版本,当前为1
|
||||
uint64 M = 2 ; // 存贮空间上限
|
||||
uint64 K = 3 ; // hash函数个数
|
||||
uint64 ElementsMax = 4 ; // 创建空间元素数量
|
||||
uint64 ElementsAdded = 5 ; // 实际加入元素数量
|
||||
double FalsePositiveRate = 6 ; // 假阳率
|
||||
}
|
||||
1
internal/bloom/makeproto.sh
Executable file
1
internal/bloom/makeproto.sh
Executable file
@@ -0,0 +1 @@
|
||||
protoc --proto_path=. *.proto --go_out=. --go_opt=paths=source_relative
|
||||
72
internal/bloom/murmur.go
Normal file
72
internal/bloom/murmur.go
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2013, Sébastien Paolacci. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package murmur3 provides an amd64 native (Go generic fallback)
|
||||
// implementation of the murmur3 hash algorithm for strings and slices.
|
||||
//
|
||||
// Assembly is provided for amd64 go1.5+; pull requests are welcome for other
|
||||
// architectures.
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type bmixer interface {
|
||||
bmix(p []byte) (tail []byte)
|
||||
Size() (n int)
|
||||
reset()
|
||||
}
|
||||
|
||||
type digest struct {
|
||||
clen int // Digested input cumulative length.
|
||||
tail []byte // 0 to Size()-1 bytes view of `buf'.
|
||||
buf [16]byte // Expected (but not required) to be Size() large.
|
||||
bmixer
|
||||
}
|
||||
|
||||
func (d *digest) BlockSize() int { return 1 }
|
||||
|
||||
func (d *digest) Write(p []byte) (n int, err error) {
|
||||
n = len(p)
|
||||
d.clen += n
|
||||
|
||||
if len(d.tail) > 0 {
|
||||
// Stick back pending bytes.
|
||||
nfree := d.Size() - len(d.tail) // nfree ∈ [1, d.Size()-1].
|
||||
if nfree < len(p) {
|
||||
// One full block can be formed.
|
||||
block := append(d.tail, p[:nfree]...)
|
||||
p = p[nfree:]
|
||||
_ = d.bmix(block) // No tail.
|
||||
} else {
|
||||
// Tail's buf is large enough to prevent reallocs.
|
||||
p = append(d.tail, p...)
|
||||
}
|
||||
}
|
||||
|
||||
d.tail = d.bmix(p)
|
||||
|
||||
// Keep own copy of the 0 to Size()-1 pending bytes.
|
||||
nn := copy(d.buf[:], d.tail)
|
||||
d.tail = d.buf[:nn]
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (d *digest) Reset() {
|
||||
d.clen = 0
|
||||
d.tail = nil
|
||||
d.bmixer.reset()
|
||||
}
|
||||
|
||||
func strslice(slice []byte) string {
|
||||
var str string
|
||||
slicehdr := ((*reflect.SliceHeader)(unsafe.Pointer(&slice)))
|
||||
strhdr := (*reflect.StringHeader)(unsafe.Pointer(&str))
|
||||
strhdr.Data = slicehdr.Data
|
||||
strhdr.Len = slicehdr.Len
|
||||
return str
|
||||
}
|
||||
182
internal/bloom/murmur128.go
Normal file
182
internal/bloom/murmur128.go
Normal file
@@ -0,0 +1,182 @@
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"hash"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
const (
|
||||
c1_128 = 0x87c37b91114253d5
|
||||
c2_128 = 0x4cf5ad432745937f
|
||||
)
|
||||
|
||||
// Make sure interfaces are correctly implemented.
|
||||
var (
|
||||
_ hash.Hash = new(digest128)
|
||||
_ Hash128 = new(digest128)
|
||||
_ bmixer = new(digest128)
|
||||
)
|
||||
|
||||
// Hash128 provides an interface for a streaming 128 bit hash.
|
||||
type Hash128 interface {
|
||||
hash.Hash
|
||||
Sum128() (uint64, uint64)
|
||||
}
|
||||
|
||||
// digest128 represents a partial evaluation of a 128 bites hash.
|
||||
type digest128 struct {
|
||||
digest
|
||||
seed1 uint64
|
||||
seed2 uint64
|
||||
h1 uint64 // Unfinalized running hash part 1.
|
||||
h2 uint64 // Unfinalized running hash part 2.
|
||||
}
|
||||
|
||||
// SeedNew128 returns a Hash128 for streaming 128 bit sums with its internal
|
||||
// digests initialized to seed1 and seed2.
|
||||
//
|
||||
// The canonical implementation allows one only uint32 seed; to imitate that
|
||||
// behavior, use the same, uint32-max seed for seed1 and seed2.
|
||||
func SeedNew128(seed1, seed2 uint64) Hash128 {
|
||||
d := &digest128{seed1: seed1, seed2: seed2}
|
||||
d.bmixer = d
|
||||
d.Reset()
|
||||
return d
|
||||
}
|
||||
|
||||
// New128 returns a Hash128 for streaming 128 bit sums.
|
||||
func New128() Hash128 {
|
||||
return SeedNew128(0, 0)
|
||||
}
|
||||
|
||||
func (d *digest128) Size() int { return 16 }
|
||||
|
||||
func (d *digest128) reset() { d.h1, d.h2 = d.seed1, d.seed2 }
|
||||
|
||||
func (d *digest128) Sum(b []byte) []byte {
|
||||
h1, h2 := d.Sum128()
|
||||
return append(b,
|
||||
byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32),
|
||||
byte(h1>>24), byte(h1>>16), byte(h1>>8), byte(h1),
|
||||
|
||||
byte(h2>>56), byte(h2>>48), byte(h2>>40), byte(h2>>32),
|
||||
byte(h2>>24), byte(h2>>16), byte(h2>>8), byte(h2),
|
||||
)
|
||||
}
|
||||
|
||||
func (d *digest128) bmix(p []byte) (tail []byte) {
|
||||
h1, h2 := d.h1, d.h2
|
||||
|
||||
for len(p) >= 16 {
|
||||
k1 := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 | uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
|
||||
k2 := uint64(p[8]) | uint64(p[9])<<8 | uint64(p[10])<<16 | uint64(p[11])<<24 | uint64(p[12])<<32 | uint64(p[13])<<40 | uint64(p[14])<<48 | uint64(p[15])<<56
|
||||
p = p[16:]
|
||||
|
||||
k1 *= c1_128
|
||||
k1 = bits.RotateLeft64(k1, 31)
|
||||
k1 *= c2_128
|
||||
h1 ^= k1
|
||||
|
||||
h1 = bits.RotateLeft64(h1, 27)
|
||||
h1 += h2
|
||||
h1 = h1*5 + 0x52dce729
|
||||
|
||||
k2 *= c2_128
|
||||
k2 = bits.RotateLeft64(k2, 33)
|
||||
k2 *= c1_128
|
||||
h2 ^= k2
|
||||
|
||||
h2 = bits.RotateLeft64(h2, 31)
|
||||
h2 += h1
|
||||
h2 = h2*5 + 0x38495ab5
|
||||
}
|
||||
d.h1, d.h2 = h1, h2
|
||||
return p
|
||||
}
|
||||
|
||||
func (d *digest128) Sum128() (h1, h2 uint64) {
|
||||
|
||||
h1, h2 = d.h1, d.h2
|
||||
|
||||
var k1, k2 uint64
|
||||
switch len(d.tail) & 15 {
|
||||
case 15:
|
||||
k2 ^= uint64(d.tail[14]) << 48
|
||||
fallthrough
|
||||
case 14:
|
||||
k2 ^= uint64(d.tail[13]) << 40
|
||||
fallthrough
|
||||
case 13:
|
||||
k2 ^= uint64(d.tail[12]) << 32
|
||||
fallthrough
|
||||
case 12:
|
||||
k2 ^= uint64(d.tail[11]) << 24
|
||||
fallthrough
|
||||
case 11:
|
||||
k2 ^= uint64(d.tail[10]) << 16
|
||||
fallthrough
|
||||
case 10:
|
||||
k2 ^= uint64(d.tail[9]) << 8
|
||||
fallthrough
|
||||
case 9:
|
||||
k2 ^= uint64(d.tail[8]) << 0
|
||||
|
||||
k2 *= c2_128
|
||||
k2 = bits.RotateLeft64(k2, 33)
|
||||
k2 *= c1_128
|
||||
h2 ^= k2
|
||||
|
||||
fallthrough
|
||||
|
||||
case 8:
|
||||
k1 ^= uint64(d.tail[7]) << 56
|
||||
fallthrough
|
||||
case 7:
|
||||
k1 ^= uint64(d.tail[6]) << 48
|
||||
fallthrough
|
||||
case 6:
|
||||
k1 ^= uint64(d.tail[5]) << 40
|
||||
fallthrough
|
||||
case 5:
|
||||
k1 ^= uint64(d.tail[4]) << 32
|
||||
fallthrough
|
||||
case 4:
|
||||
k1 ^= uint64(d.tail[3]) << 24
|
||||
fallthrough
|
||||
case 3:
|
||||
k1 ^= uint64(d.tail[2]) << 16
|
||||
fallthrough
|
||||
case 2:
|
||||
k1 ^= uint64(d.tail[1]) << 8
|
||||
fallthrough
|
||||
case 1:
|
||||
k1 ^= uint64(d.tail[0]) << 0
|
||||
k1 *= c1_128
|
||||
k1 = bits.RotateLeft64(k1, 31)
|
||||
k1 *= c2_128
|
||||
h1 ^= k1
|
||||
}
|
||||
|
||||
h1 ^= uint64(d.clen)
|
||||
h2 ^= uint64(d.clen)
|
||||
|
||||
h1 += h2
|
||||
h2 += h1
|
||||
|
||||
h1 = fmix64(h1)
|
||||
h2 = fmix64(h2)
|
||||
|
||||
h1 += h2
|
||||
h2 += h1
|
||||
|
||||
return h1, h2
|
||||
}
|
||||
|
||||
func fmix64(k uint64) uint64 {
|
||||
k ^= k >> 33
|
||||
k *= 0xff51afd7ed558ccd
|
||||
k ^= k >> 33
|
||||
k *= 0xc4ceb9fe1a85ec53
|
||||
k ^= k >> 33
|
||||
return k
|
||||
}
|
||||
247
internal/bloom/murmur128_amd64.s
Normal file
247
internal/bloom/murmur128_amd64.s
Normal file
@@ -0,0 +1,247 @@
|
||||
// +build go1.5,amd64
|
||||
|
||||
// SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64)
|
||||
TEXT ·SeedSum128(SB), $0-56
|
||||
MOVQ seed1+0(FP), R12
|
||||
MOVQ seed2+8(FP), R13
|
||||
MOVQ data_base+16(FP), SI
|
||||
MOVQ data_len+24(FP), R9
|
||||
LEAQ h1+40(FP), BX
|
||||
JMP sum128internal<>(SB)
|
||||
|
||||
// Sum128(data []byte) (h1 uint64, h2 uint64)
|
||||
TEXT ·Sum128(SB), $0-40
|
||||
XORQ R12, R12
|
||||
XORQ R13, R13
|
||||
MOVQ data_base+0(FP), SI
|
||||
MOVQ data_len+8(FP), R9
|
||||
LEAQ h1+24(FP), BX
|
||||
JMP sum128internal<>(SB)
|
||||
|
||||
// SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64)
|
||||
TEXT ·SeedStringSum128(SB), $0-48
|
||||
MOVQ seed1+0(FP), R12
|
||||
MOVQ seed2+8(FP), R13
|
||||
MOVQ data_base+16(FP), SI
|
||||
MOVQ data_len+24(FP), R9
|
||||
LEAQ h1+32(FP), BX
|
||||
JMP sum128internal<>(SB)
|
||||
|
||||
// StringSum128(data string) (h1 uint64, h2 uint64)
|
||||
TEXT ·StringSum128(SB), $0-32
|
||||
XORQ R12, R12
|
||||
XORQ R13, R13
|
||||
MOVQ data_base+0(FP), SI
|
||||
MOVQ data_len+8(FP), R9
|
||||
LEAQ h1+16(FP), BX
|
||||
JMP sum128internal<>(SB)
|
||||
|
||||
// Expects:
|
||||
// R12 == h1 uint64 seed
|
||||
// R13 == h2 uint64 seed
|
||||
// SI == &data
|
||||
// R9 == len(data)
|
||||
// BX == &[2]uint64 return
|
||||
TEXT sum128internal<>(SB), $0
|
||||
MOVQ $0x87c37b91114253d5, R14 // c1
|
||||
MOVQ $0x4cf5ad432745937f, R15 // c2
|
||||
|
||||
MOVQ R9, CX
|
||||
ANDQ $-16, CX // cx == data_len - (data_len % 16)
|
||||
|
||||
// for r10 = 0; r10 < cx; r10 += 16 {...
|
||||
XORQ R10, R10
|
||||
|
||||
loop:
|
||||
CMPQ R10, CX
|
||||
JE tail
|
||||
MOVQ (SI)(R10*1), AX
|
||||
MOVQ 8(SI)(R10*1), DX
|
||||
ADDQ $16, R10
|
||||
|
||||
IMULQ R14, AX
|
||||
IMULQ R15, DX
|
||||
|
||||
ROLQ $31, AX
|
||||
ROLQ $33, DX
|
||||
|
||||
IMULQ R15, AX
|
||||
IMULQ R14, DX
|
||||
|
||||
XORQ AX, R12
|
||||
ROLQ $27, R12
|
||||
ADDQ R13, R12
|
||||
XORQ DX, R13
|
||||
ROLQ $31, R13
|
||||
LEAQ 0x52dce729(R12)(R12*4), R12
|
||||
|
||||
ADDQ R12, R13
|
||||
LEAQ 0x38495ab5(R13)(R13*4), R13
|
||||
|
||||
JMP loop
|
||||
|
||||
tail:
|
||||
MOVQ R9, CX
|
||||
ANDQ $0xf, CX
|
||||
JZ finalize // if len % 16 == 0
|
||||
|
||||
XORQ AX, AX
|
||||
|
||||
// poor man's binary tree jump table
|
||||
SUBQ $8, CX
|
||||
JZ tail8
|
||||
JG over8
|
||||
ADDQ $4, CX
|
||||
JZ tail4
|
||||
JG over4
|
||||
ADDQ $2, CX
|
||||
JL tail1
|
||||
JZ tail2
|
||||
JMP tail3
|
||||
|
||||
over4:
|
||||
SUBQ $2, CX
|
||||
JL tail5
|
||||
JZ tail6
|
||||
JMP tail7
|
||||
|
||||
over8:
|
||||
SUBQ $4, CX
|
||||
JZ tail12
|
||||
JG over12
|
||||
ADDQ $2, CX
|
||||
JL tail9
|
||||
JZ tail10
|
||||
JMP tail11
|
||||
|
||||
over12:
|
||||
SUBQ $2, CX
|
||||
JL tail13
|
||||
JZ tail14
|
||||
|
||||
tail15:
|
||||
MOVBQZX 14(SI)(R10*1), AX
|
||||
SALQ $16, AX
|
||||
|
||||
tail14:
|
||||
MOVW 12(SI)(R10*1), AX
|
||||
SALQ $32, AX
|
||||
JMP tail12
|
||||
|
||||
tail13:
|
||||
MOVBQZX 12(SI)(R10*1), AX
|
||||
SALQ $32, AX
|
||||
|
||||
tail12:
|
||||
MOVL 8(SI)(R10*1), DX
|
||||
ORQ DX, AX
|
||||
JMP fintailhigh
|
||||
|
||||
tail11:
|
||||
MOVBQZX 10(SI)(R10*1), AX
|
||||
SALQ $16, AX
|
||||
|
||||
tail10:
|
||||
MOVW 8(SI)(R10*1), AX
|
||||
JMP fintailhigh
|
||||
|
||||
tail9:
|
||||
MOVB 8(SI)(R10*1), AL
|
||||
|
||||
fintailhigh:
|
||||
IMULQ R15, AX
|
||||
ROLQ $33, AX
|
||||
IMULQ R14, AX
|
||||
XORQ AX, R13
|
||||
|
||||
tail8:
|
||||
MOVQ (SI)(R10*1), AX
|
||||
JMP fintaillow
|
||||
|
||||
tail7:
|
||||
MOVBQZX 6(SI)(R10*1), AX
|
||||
SALQ $16, AX
|
||||
|
||||
tail6:
|
||||
MOVW 4(SI)(R10*1), AX
|
||||
SALQ $32, AX
|
||||
JMP tail4
|
||||
|
||||
tail5:
|
||||
MOVBQZX 4(SI)(R10*1), AX
|
||||
SALQ $32, AX
|
||||
|
||||
tail4:
|
||||
MOVL (SI)(R10*1), DX
|
||||
ORQ DX, AX
|
||||
JMP fintaillow
|
||||
|
||||
tail3:
|
||||
MOVBQZX 2(SI)(R10*1), AX
|
||||
SALQ $16, AX
|
||||
|
||||
tail2:
|
||||
MOVW (SI)(R10*1), AX
|
||||
JMP fintaillow
|
||||
|
||||
tail1:
|
||||
MOVB (SI)(R10*1), AL
|
||||
|
||||
fintaillow:
|
||||
IMULQ R14, AX
|
||||
ROLQ $31, AX
|
||||
IMULQ R15, AX
|
||||
XORQ AX, R12
|
||||
|
||||
finalize:
|
||||
XORQ R9, R12
|
||||
XORQ R9, R13
|
||||
|
||||
ADDQ R13, R12
|
||||
ADDQ R12, R13
|
||||
|
||||
// fmix128 (both interleaved)
|
||||
MOVQ R12, DX
|
||||
MOVQ R13, AX
|
||||
|
||||
SHRQ $33, DX
|
||||
SHRQ $33, AX
|
||||
|
||||
XORQ DX, R12
|
||||
XORQ AX, R13
|
||||
|
||||
MOVQ $0xff51afd7ed558ccd, CX
|
||||
|
||||
IMULQ CX, R12
|
||||
IMULQ CX, R13
|
||||
|
||||
MOVQ R12, DX
|
||||
MOVQ R13, AX
|
||||
|
||||
SHRQ $33, DX
|
||||
SHRQ $33, AX
|
||||
|
||||
XORQ DX, R12
|
||||
XORQ AX, R13
|
||||
|
||||
MOVQ $0xc4ceb9fe1a85ec53, CX
|
||||
|
||||
IMULQ CX, R12
|
||||
IMULQ CX, R13
|
||||
|
||||
MOVQ R12, DX
|
||||
MOVQ R13, AX
|
||||
|
||||
SHRQ $33, DX
|
||||
SHRQ $33, AX
|
||||
|
||||
XORQ DX, R12
|
||||
XORQ AX, R13
|
||||
|
||||
ADDQ R13, R12
|
||||
ADDQ R12, R13
|
||||
|
||||
MOVQ R12, (BX)
|
||||
MOVQ R13, 8(BX)
|
||||
RET
|
||||
|
||||
36
internal/bloom/murmur128_decl.go
Normal file
36
internal/bloom/murmur128_decl.go
Normal file
@@ -0,0 +1,36 @@
|
||||
//go:build go1.5 && amd64
|
||||
// +build go1.5,amd64
|
||||
|
||||
package bloom
|
||||
|
||||
//go:noescape
|
||||
|
||||
// Sum128 returns the murmur3 sum of data. It is equivalent to the following
|
||||
// sequence (without the extra burden and the extra allocation):
|
||||
//
|
||||
// hasher := New128()
|
||||
// hasher.Write(data)
|
||||
// return hasher.Sum128()
|
||||
func Sum128(data []byte) (h1 uint64, h2 uint64)
|
||||
|
||||
//go:noescape
|
||||
|
||||
// SeedSum128 returns the murmur3 sum of data with digests initialized to seed1
|
||||
// and seed2.
|
||||
//
|
||||
// The canonical implementation allows only one uint32 seed; to imitate that
|
||||
// behavior, use the same, uint32-max seed for seed1 and seed2.
|
||||
//
|
||||
// This reads and processes the data in chunks of little endian uint64s;
|
||||
// thus, the returned hashes are portable across architectures.
|
||||
func SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64)
|
||||
|
||||
//go:noescape
|
||||
|
||||
// StringSum128 is the string version of Sum128.
|
||||
func StringSum128(data string) (h1 uint64, h2 uint64)
|
||||
|
||||
//go:noescape
|
||||
|
||||
// SeedStringSum128 is the string version of SeedSum128.
|
||||
func SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64)
|
||||
137
internal/bloom/murmur128_gen.go
Normal file
137
internal/bloom/murmur128_gen.go
Normal file
@@ -0,0 +1,137 @@
|
||||
//go:build !go1.5 || !amd64
|
||||
// +build !go1.5 !amd64
|
||||
|
||||
package bloom
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// SeedSum128 returns the murmur3 sum of data with digests initialized to seed1
|
||||
// and seed2.
|
||||
//
|
||||
// The canonical implementation allows only one uint32 seed; to imitate that
|
||||
// behavior, use the same, uint32-max seed for seed1 and seed2.
|
||||
//
|
||||
// This reads and processes the data in chunks of little endian uint64s;
|
||||
// thus, the returned hashes are portable across architectures.
|
||||
func SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64) {
|
||||
return SeedStringSum128(seed1, seed2, strslice(data))
|
||||
}
|
||||
|
||||
// Sum128 returns the murmur3 sum of data. It is equivalent to the following
|
||||
// sequence (without the extra burden and the extra allocation):
|
||||
//
|
||||
// hasher := New128()
|
||||
// hasher.Write(data)
|
||||
// return hasher.Sum128()
|
||||
func Sum128(data []byte) (h1 uint64, h2 uint64) {
|
||||
return SeedStringSum128(0, 0, strslice(data))
|
||||
}
|
||||
|
||||
// StringSum128 is the string version of Sum128.
|
||||
func StringSum128(data string) (h1 uint64, h2 uint64) {
|
||||
return SeedStringSum128(0, 0, data)
|
||||
}
|
||||
|
||||
// SeedStringSum128 is the string version of SeedSum128.
|
||||
func SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64) {
|
||||
h1, h2 = seed1, seed2
|
||||
clen := len(data)
|
||||
for len(data) >= 16 {
|
||||
// yes, this is faster than using binary.LittleEndian.Uint64
|
||||
k1 := uint64(data[0]) | uint64(data[1])<<8 | uint64(data[2])<<16 | uint64(data[3])<<24 | uint64(data[4])<<32 | uint64(data[5])<<40 | uint64(data[6])<<48 | uint64(data[7])<<56
|
||||
k2 := uint64(data[8]) | uint64(data[9])<<8 | uint64(data[10])<<16 | uint64(data[11])<<24 | uint64(data[12])<<32 | uint64(data[13])<<40 | uint64(data[14])<<48 | uint64(data[15])<<56
|
||||
|
||||
data = data[16:]
|
||||
|
||||
k1 *= c1_128
|
||||
k1 = bits.RotateLeft64(k1, 31)
|
||||
k1 *= c2_128
|
||||
h1 ^= k1
|
||||
|
||||
h1 = bits.RotateLeft64(h1, 27)
|
||||
h1 += h2
|
||||
h1 = h1*5 + 0x52dce729
|
||||
|
||||
k2 *= c2_128
|
||||
k2 = bits.RotateLeft64(k2, 33)
|
||||
k2 *= c1_128
|
||||
h2 ^= k2
|
||||
|
||||
h2 = bits.RotateLeft64(h2, 31)
|
||||
h2 += h1
|
||||
h2 = h2*5 + 0x38495ab5
|
||||
}
|
||||
|
||||
var k1, k2 uint64
|
||||
switch len(data) {
|
||||
case 15:
|
||||
k2 ^= uint64(data[14]) << 48
|
||||
fallthrough
|
||||
case 14:
|
||||
k2 ^= uint64(data[13]) << 40
|
||||
fallthrough
|
||||
case 13:
|
||||
k2 ^= uint64(data[12]) << 32
|
||||
fallthrough
|
||||
case 12:
|
||||
k2 ^= uint64(data[11]) << 24
|
||||
fallthrough
|
||||
case 11:
|
||||
k2 ^= uint64(data[10]) << 16
|
||||
fallthrough
|
||||
case 10:
|
||||
k2 ^= uint64(data[9]) << 8
|
||||
fallthrough
|
||||
case 9:
|
||||
k2 ^= uint64(data[8]) << 0
|
||||
|
||||
k2 *= c2_128
|
||||
k2 = bits.RotateLeft64(k2, 33)
|
||||
k2 *= c1_128
|
||||
h2 ^= k2
|
||||
|
||||
fallthrough
|
||||
|
||||
case 8:
|
||||
k1 ^= uint64(data[7]) << 56
|
||||
fallthrough
|
||||
case 7:
|
||||
k1 ^= uint64(data[6]) << 48
|
||||
fallthrough
|
||||
case 6:
|
||||
k1 ^= uint64(data[5]) << 40
|
||||
fallthrough
|
||||
case 5:
|
||||
k1 ^= uint64(data[4]) << 32
|
||||
fallthrough
|
||||
case 4:
|
||||
k1 ^= uint64(data[3]) << 24
|
||||
fallthrough
|
||||
case 3:
|
||||
k1 ^= uint64(data[2]) << 16
|
||||
fallthrough
|
||||
case 2:
|
||||
k1 ^= uint64(data[1]) << 8
|
||||
fallthrough
|
||||
case 1:
|
||||
k1 ^= uint64(data[0]) << 0
|
||||
k1 *= c1_128
|
||||
k1 = bits.RotateLeft64(k1, 31)
|
||||
k1 *= c2_128
|
||||
h1 ^= k1
|
||||
}
|
||||
|
||||
h1 ^= uint64(clen)
|
||||
h2 ^= uint64(clen)
|
||||
|
||||
h1 += h2
|
||||
h2 += h1
|
||||
|
||||
h1 = fmix64(h1)
|
||||
h2 = fmix64(h2)
|
||||
|
||||
h1 += h2
|
||||
h2 += h1
|
||||
|
||||
return h1, h2
|
||||
}
|
||||
Reference in New Issue
Block a user