首次提交代码

2025-11-03 14:37:59 +08:00
parent e60f64721c
commit d76c196fb1
311 changed files with 81709 additions and 0 deletions
--- a/internal/bloom/bloom.go
+++ b/internal/bloom/bloom.go
@@ -0,0 +1,345 @@
+package bloom
+
+import (
+	"encoding/binary"
+	"errors"
+	"math"
+	"os"
+	"sort"
+	"sync"
+
+	"github.com/RoaringBitmap/roaring/roaring64"
+	"github.com/klauspost/compress/zstd"
+	"google.golang.org/protobuf/proto"
+)
+
+/*
+主体代码来源于https://github.com/bits-and-blooms/bloom
+在此文件中，将bitmap的管理改用roaring bitmap实现
+algotao 2022-08-29
+*/
+
+type BloomFilter struct {
+	m                 uint64            // 存贮空间上限
+	k                 uint64            // hash函数个数
+	elementsMax       uint64            // 元素数量上限
+	elementsAdded     uint64            // 已加入的元素数量
+	falsePositiveRate float64           // 假阳率
+	rb                *roaring64.Bitmap // 位图
+	chOne             chan []uint64     // 接收每个插入的hash索引
+	chInsert          chan []uint64     // 接收排好序的hash索引进Bitmap
+	chSortJobQuota    chan int          // 排序工作的配额控制
+	buf               []uint64          // 缓冲
+	wgJobs            sync.WaitGroup
+}
+
+type BloomFilterStat struct {
+	M                 uint64
+	K                 uint64
+	ElementsMax       uint64
+	ElementsAdded     uint64
+	FalsePositiveRate float64
+}
+
+const (
+	headerVersion1 = 1
+)
+
+// bitmapFileHeader 存贮文件头
+type bitmapFileHeader struct {
+	Size uint64 // Header Protobuf size
+}
+
+// NewWithEstimates 创建一个BloomFilter，并期望有n个元素，<fp的误匹配率
+func NewWithEstimates(e uint64, fr float64) *BloomFilter {
+	m, k := EstimateParameters(e, fr)
+	return newBloomFilter(m, k, e, fr)
+}
+
+// new 创建一个新的BloomFilter，具有 _m_ bits 和 _k_ hashing 函数
+func newBloomFilter(m uint64, k uint64, e uint64, fr float64) *BloomFilter {
+	b := &BloomFilter{
+		m:                 max(1, m),
+		k:                 max(1, k),
+		elementsMax:       e,
+		falsePositiveRate: fr,
+		rb:                roaring64.New(),
+		chOne:             make(chan []uint64, 1024*1024), // 索引缓冲区chan
+		chInsert:          make(chan []uint64, 2),         // 插入队列
+		chSortJobQuota:    make(chan int, 8),              // 排序队列
+	}
+
+	for i := 0; i < cap(b.chSortJobQuota); i++ {
+		b.chSortJobQuota <- 0
+	}
+
+	//log.Printf("Init quota len(%v), cap(%v)\n", len(b.chSortJobQuota), cap(b.chSortJobQuota))
+
+	go b.consumeOne()
+	go b.consumeInsert()
+
+	return b
+}
+
+// 生成 m 和 k
+func EstimateParameters(n uint64, p float64) (m uint64, k uint64) {
+	m = uint64(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2)))
+	k = uint64(math.Ceil(math.Log(2) * float64(m) / float64(n)))
+	return
+}
+
+// location 返回当前位置的hash值
+func (b *BloomFilter) location(h [4]uint64, i uint64) uint64 {
+	return (h[i%2] + i*h[2+(((i+(i%2))%4)/2)]) % b.m
+	//return 0
+}
+
+// baseHashes 生成4个hash值，用于生产key
+func (b *BloomFilter) baseHashes(data []byte) [4]uint64 {
+	h := New128()
+	h.Write(data)
+
+	h1, h2 := h.Sum128()
+
+	h.Write([]byte{1})
+	h3, h4 := h.Sum128()
+
+	return [4]uint64{
+		h1, h2, h3, h4,
+	}
+}
+
+// 消费一个计算好的bloom bits对象，并填入缓冲。当缓冲半满时发送给排序队列处理
+func (b *BloomFilter) consumeOne() {
+	batchSize := cap(b.chOne) * int(b.k) / 2 //一半buffer满了就开始处理，即一半个数的uint64。或是遇到Flush标志（bits长度0），则刷缓冲
+
+	for bits := range b.chOne {
+		if len(bits) != 0 {
+			b.elementsAdded++
+		}
+
+		b.buf = append(b.buf, bits...)
+		if len(b.buf) >= batchSize || len(bits) == 0 {
+
+			buf := b.buf[:]
+			b.buf = []uint64{}
+			b.wgJobs.Add(1)
+
+			// 如果接收到了Flush标志，则在处理最后buffer后，减一次waitgroup
+			if len(bits) == 0 {
+				b.wgJobs.Done()
+			}
+
+			//等待有可用排序配额，如成功则消耗一个配额
+			<-b.chSortJobQuota
+
+			go func() {
+				sort.Slice(buf, func(i, j int) bool { return buf[i] < buf[j] })
+
+				//提交至插入任务
+				b.chInsert <- buf
+
+				//恢复1个配额
+				b.chSortJobQuota <- 1
+			}()
+
+		}
+	}
+}
+
+// 将批量bits写到bitmap
+func (b *BloomFilter) consumeInsert() {
+	for bitsBatch := range b.chInsert {
+		b.rb.AddMany(bitsBatch)
+		b.wgJobs.Done()
+	}
+}
+
+// Add 添加数据的Hash位图
+func (b *BloomFilter) Add(data []byte) *BloomFilter {
+	h := b.baseHashes(data)
+	bits := make([]uint64, b.k)
+	for i := uint64(0); i < b.k; i++ {
+		bits[i] = b.location(h, i)
+	}
+	b.chOne <- bits //将一个计算好的bloom bits发送到待处理队列
+
+	return b
+}
+
+// AddString 添加字符串的Hash位图
+func (b *BloomFilter) AddString(data string) *BloomFilter {
+	return b.Add([]byte(data))
+}
+
+// Test 如果命中Hash位图，则返回真 (有误匹配率)
+func (b *BloomFilter) Test(data []byte) bool {
+	h := b.baseHashes(data)
+	for i := uint64(0); i < b.k; i++ {
+		if !b.rb.Contains(b.location(h, i)) {
+			return false
+		}
+	}
+	return true
+}
+
+// TestString 如果命中字符串Hash位图，则返回真 (有误匹配率）
+func (b *BloomFilter) TestString(data string) bool {
+	return b.Test([]byte(data))
+}
+
+// Flush 将缓冲中的待处理Bit写入Bitmap
+func (b *BloomFilter) Flush() {
+	b.wgJobs.Add(1)
+
+	//发出Flush指令
+	b.chOne <- []uint64{}
+
+	b.wgJobs.Wait()
+}
+
+// free 将缓冲中的待处理Bit写入Bitmap
+func (b *BloomFilter) free() {
+	close(b.chOne)
+	close(b.chInsert)
+	b.rb.Clear()
+}
+
+// Iterator 位图遍历器
+func (b *BloomFilter) Iterator() roaring64.IntPeekable64 {
+	return b.rb.Iterator()
+}
+
+// GetSizeInBytes 返回位图大小
+func (b *BloomFilter) GetSizeInBytes() uint64 {
+	return b.rb.GetSerializedSizeInBytes()
+}
+
+// 获得统计信息，主要用于运行期间获取状态
+func (b *BloomFilter) GetStat() BloomFilterStat {
+	return BloomFilterStat{
+		M:                 b.m,
+		K:                 b.k,
+		ElementsMax:       b.elementsMax,
+		ElementsAdded:     b.elementsAdded,
+		FalsePositiveRate: b.falsePositiveRate,
+	}
+}
+
+// SaveToFile 写入到文件
+func (b *BloomFilter) SaveToFile(filename string) (err error) {
+	b.Flush()
+
+	headerPB := &Header{
+		Version:           headerVersion1,
+		M:                 b.m,
+		K:                 b.k,
+		ElementsMax:       b.elementsMax,
+		ElementsAdded:     b.elementsAdded,
+		FalsePositiveRate: b.falsePositiveRate,
+	}
+	headerData, err := proto.Marshal(headerPB)
+	if err != nil {
+		return err
+	}
+
+	fi, fe := os.Create(filename)
+	if fe != nil {
+		return fe
+	}
+
+	defer fi.Close()
+
+	fh := bitmapFileHeader{
+		Size: uint64(len(headerData)),
+	}
+
+	//写入文件头（字节数）
+	fe = binary.Write(fi, binary.BigEndian, fh)
+	if fe != nil {
+		return fe
+	}
+
+	//写入文件头（PB详细信息）
+	fe = binary.Write(fi, binary.BigEndian, headerData)
+	if fe != nil {
+		return fe
+	}
+
+	b.rb.RunOptimize()
+
+	zw, err := zstd.NewWriter(fi)
+	if err != nil {
+		return err
+	}
+	defer zw.Close()
+
+	_, fe = b.rb.WriteTo(zw)
+
+	b.free()
+
+	return fe
+}
+
+// LoadFromFile 从文件中读取
+func LoadFromFile(filename string, headerOnly bool) (bft *BloomFilter, err error) {
+	fi, fe := os.Open(filename)
+
+	if fe != nil {
+		return nil, fe
+	}
+
+	defer fi.Close()
+
+	fh := bitmapFileHeader{}
+	fe = binary.Read(fi, binary.BigEndian, &fh)
+	if fe != nil {
+		return nil, fe
+	}
+
+	headerData := make([]byte, fh.Size)
+
+	n, err := fi.Read(headerData)
+	if err != nil {
+		return nil, err
+	}
+	if n != len(headerData) {
+		return nil, errors.New("unknown file format")
+	}
+	headerPB := &Header{}
+
+	err = proto.Unmarshal(headerData, headerPB)
+	if err != nil {
+		return nil, err
+	}
+
+	if headerPB.Version != headerVersion1 {
+		return nil, errors.New("unsupported version")
+	}
+
+	bft = &BloomFilter{}
+	bft.m = headerPB.GetM()
+	bft.k = headerPB.GetK()
+	bft.elementsMax = headerPB.GetElementsMax()
+	bft.elementsAdded = headerPB.GetElementsAdded()
+	bft.falsePositiveRate = headerPB.GetFalsePositiveRate()
+
+	if headerOnly {
+		return bft, nil
+	}
+
+	bft.rb = roaring64.New()
+
+	zr, err := zstd.NewReader(fi)
+	if err != nil {
+		return nil, err
+	}
+	defer zr.Close()
+
+	_, fe = bft.rb.ReadFrom(zr)
+	if fe != nil {
+		return nil, fe
+	}
+
+	return bft, nil
+}
--- a/internal/bloom/bloom_test.go
+++ b/internal/bloom/bloom_test.go
@@ -0,0 +1,127 @@
+package bloom
+
+import (
+	"math/rand"
+	"strconv"
+	"testing"
+
+	"github.com/RoaringBitmap/roaring/roaring64"
+)
+
+func TestBasic(t *testing.T) {
+	f := NewWithEstimates(1000000000, 0.00001)
+	n1 := "Bess"
+	n2 := "Jane"
+	n3 := "Tony"
+	n4 := "Algo"
+	f.AddString(n1)
+	f.AddString(n2)
+	f.AddString(n3)
+	f.Flush()
+	n1b := f.TestString(n1)
+	n2b := f.TestString(n2)
+	n3b := f.TestString(n3)
+	n4b := f.TestString(n4)
+	if !n1b {
+		t.Errorf("%v should be in.", n1)
+	}
+	if !n2b {
+		t.Errorf("%v should be in.", n2)
+	}
+	if !n3b {
+		t.Errorf("%v should be in.", n3)
+	}
+	if n4b {
+		t.Errorf("%v should be not in.", n4)
+	}
+}
+
+func TestFile(t *testing.T) {
+	f := NewWithEstimates(1000000000, 0.00001)
+	n1 := "Bess"
+	n2 := "Jane"
+	n3 := "Tony"
+	n4 := "Algo"
+	f.AddString(n1)
+	f.AddString(n2)
+	f.AddString(n3)
+	const tmpfile = "//tmp//bloomtest.bin"
+
+	err := f.SaveToFile(tmpfile)
+	if err != nil {
+		t.Errorf("save file error %v", err)
+	}
+
+	f, err = LoadFromFile(tmpfile, false)
+	if err != nil {
+		t.Errorf("load file error %v", err)
+	}
+
+	n1b := f.TestString(n1)
+	n2b := f.TestString(n2)
+	n3b := f.TestString(n3)
+	n4b := f.TestString(n4)
+	if !n1b {
+		t.Errorf("%v should be in.", n1)
+	}
+	if !n2b {
+		t.Errorf("%v should be in.", n2)
+	}
+	if !n3b {
+		t.Errorf("%v should be in.", n3)
+	}
+	if n4b {
+		t.Errorf("%v should be not in.", n4)
+	}
+}
+
+func Test10W(t *testing.T) {
+	f := NewWithEstimates(100000, 0.00001)
+	for i := uint64(0); i < 100000; i++ {
+		f.AddString(strconv.FormatUint(i, 10))
+	}
+
+	const tmpfile = "//tmp//bloomtest.bin"
+	err := f.SaveToFile(tmpfile)
+	if err != nil {
+		t.Errorf("save file error %v", err)
+	}
+
+	f, err = LoadFromFile(tmpfile, false)
+	if err != nil {
+		t.Errorf("load file error %v", err)
+	}
+
+	for i := uint64(0); i < 100000; i++ {
+		ns := f.TestString(strconv.FormatUint(i, 10))
+		if !ns {
+			t.Errorf("%v should be in.", ns)
+		}
+	}
+}
+
+func TestStat(t *testing.T) {
+	f := NewWithEstimates(1000000000, 0.00000001)
+	t.Errorf("%v", f.GetStat())
+}
+
+func BenchmarkNormal(b *testing.B) {
+	f := NewWithEstimates(1000000000, 0.00001)
+	for n := 0; n < b.N; n++ {
+		f.AddString(strconv.FormatUint(uint64(n), 10))
+	}
+}
+
+func BenchmarkRoaringBitmap(b *testing.B) {
+	f := roaring64.New()
+	r := rand.New(rand.NewSource(99))
+	x := uint64(0)
+
+	b.Run("Add", func(b *testing.B) {
+		for n := 0; n < b.N; n++ {
+			x = r.Uint64() % 23962645944
+			f.Add(x)
+		}
+	})
+	b.Errorf("%v aa\n", f.GetSizeInBytes())
+}
--- a/internal/bloom/bloomfile.pb.go
+++ b/internal/bloom/bloomfile.pb.go
@@ -0,0 +1,167 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.36.10
+// 	protoc        v5.29.4
+// source: bloomfile.proto
+
+package bloom
+
+import (
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+	unsafe "unsafe"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+type Header struct {
+	state             protoimpl.MessageState `protogen:"open.v1"`
+	Version           uint64                 `protobuf:"varint,1,opt,name=Version,proto3" json:"Version,omitempty"`                      // 版本，当前为1
+	M                 uint64                 `protobuf:"varint,2,opt,name=M,proto3" json:"M,omitempty"`                                  // 存贮空间上限
+	K                 uint64                 `protobuf:"varint,3,opt,name=K,proto3" json:"K,omitempty"`                                  // hash函数个数
+	ElementsMax       uint64                 `protobuf:"varint,4,opt,name=ElementsMax,proto3" json:"ElementsMax,omitempty"`              // 创建空间元素数量
+	ElementsAdded     uint64                 `protobuf:"varint,5,opt,name=ElementsAdded,proto3" json:"ElementsAdded,omitempty"`          // 实际加入元素数量
+	FalsePositiveRate float64                `protobuf:"fixed64,6,opt,name=FalsePositiveRate,proto3" json:"FalsePositiveRate,omitempty"` // 假阳率
+	unknownFields     protoimpl.UnknownFields
+	sizeCache         protoimpl.SizeCache
+}
+
+func (x *Header) Reset() {
+	*x = Header{}
+	mi := &file_bloomfile_proto_msgTypes[0]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *Header) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Header) ProtoMessage() {}
+
+func (x *Header) ProtoReflect() protoreflect.Message {
+	mi := &file_bloomfile_proto_msgTypes[0]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Header.ProtoReflect.Descriptor instead.
+func (*Header) Descriptor() ([]byte, []int) {
+	return file_bloomfile_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *Header) GetVersion() uint64 {
+	if x != nil {
+		return x.Version
+	}
+	return 0
+}
+
+func (x *Header) GetM() uint64 {
+	if x != nil {
+		return x.M
+	}
+	return 0
+}
+
+func (x *Header) GetK() uint64 {
+	if x != nil {
+		return x.K
+	}
+	return 0
+}
+
+func (x *Header) GetElementsMax() uint64 {
+	if x != nil {
+		return x.ElementsMax
+	}
+	return 0
+}
+
+func (x *Header) GetElementsAdded() uint64 {
+	if x != nil {
+		return x.ElementsAdded
+	}
+	return 0
+}
+
+func (x *Header) GetFalsePositiveRate() float64 {
+	if x != nil {
+		return x.FalsePositiveRate
+	}
+	return 0
+}
+
+var File_bloomfile_proto protoreflect.FileDescriptor
+
+const file_bloomfile_proto_rawDesc = "" +
+	"\n" +
+	"\x0fbloomfile.proto\x12\x05bloom\"\xb4\x01\n" +
+	"\x06Header\x12\x18\n" +
+	"\aVersion\x18\x01 \x01(\x04R\aVersion\x12\f\n" +
+	"\x01M\x18\x02 \x01(\x04R\x01M\x12\f\n" +
+	"\x01K\x18\x03 \x01(\x04R\x01K\x12 \n" +
+	"\vElementsMax\x18\x04 \x01(\x04R\vElementsMax\x12$\n" +
+	"\rElementsAdded\x18\x05 \x01(\x04R\rElementsAdded\x12,\n" +
+	"\x11FalsePositiveRate\x18\x06 \x01(\x01R\x11FalsePositiveRateB\x16Z\x14internal/bloom;bloomb\x06proto3"
+
+var (
+	file_bloomfile_proto_rawDescOnce sync.Once
+	file_bloomfile_proto_rawDescData []byte
+)
+
+func file_bloomfile_proto_rawDescGZIP() []byte {
+	file_bloomfile_proto_rawDescOnce.Do(func() {
+		file_bloomfile_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_bloomfile_proto_rawDesc), len(file_bloomfile_proto_rawDesc)))
+	})
+	return file_bloomfile_proto_rawDescData
+}
+
+var file_bloomfile_proto_msgTypes = make([]protoimpl.MessageInfo, 1)
+var file_bloomfile_proto_goTypes = []any{
+	(*Header)(nil), // 0: bloom.Header
+}
+var file_bloomfile_proto_depIdxs = []int32{
+	0, // [0:0] is the sub-list for method output_type
+	0, // [0:0] is the sub-list for method input_type
+	0, // [0:0] is the sub-list for extension type_name
+	0, // [0:0] is the sub-list for extension extendee
+	0, // [0:0] is the sub-list for field type_name
+}
+
+func init() { file_bloomfile_proto_init() }
+func file_bloomfile_proto_init() {
+	if File_bloomfile_proto != nil {
+		return
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: unsafe.Slice(unsafe.StringData(file_bloomfile_proto_rawDesc), len(file_bloomfile_proto_rawDesc)),
+			NumEnums:      0,
+			NumMessages:   1,
+			NumExtensions: 0,
+			NumServices:   0,
+		},
+		GoTypes:           file_bloomfile_proto_goTypes,
+		DependencyIndexes: file_bloomfile_proto_depIdxs,
+		MessageInfos:      file_bloomfile_proto_msgTypes,
+	}.Build()
+	File_bloomfile_proto = out.File
+	file_bloomfile_proto_goTypes = nil
+	file_bloomfile_proto_depIdxs = nil
+}
--- a/internal/bloom/bloomfile.proto
+++ b/internal/bloom/bloomfile.proto
@@ -0,0 +1,15 @@
+syntax = "proto3";
+
+package bloom;
+
+option go_package = "internal/bloom;bloom";
+
+
+message Header {
+	uint64 Version                    = 1 ;  // 版本，当前为1
+    uint64 M                          = 2 ;  // 存贮空间上限
+    uint64 K                          = 3 ;  // hash函数个数
+	uint64 ElementsMax                = 4 ;  // 创建空间元素数量
+    uint64 ElementsAdded              = 5 ;  // 实际加入元素数量
+    double FalsePositiveRate          = 6 ;  // 假阳率
+}
--- a/internal/bloom/makeproto.sh
+++ b/internal/bloom/makeproto.sh
@@ -0,0 +1 @@
+protoc --proto_path=. *.proto --go_out=. --go_opt=paths=source_relative
--- a/internal/bloom/murmur.go
+++ b/internal/bloom/murmur.go
@@ -0,0 +1,72 @@
+// Copyright 2013, Sébastien Paolacci. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package murmur3 provides an amd64 native (Go generic fallback)
+// implementation of the murmur3 hash algorithm for strings and slices.
+//
+// Assembly is provided for amd64 go1.5+; pull requests are welcome for other
+// architectures.
+package bloom
+
+import (
+	"reflect"
+	"unsafe"
+)
+
+type bmixer interface {
+	bmix(p []byte) (tail []byte)
+	Size() (n int)
+	reset()
+}
+
+type digest struct {
+	clen int      // Digested input cumulative length.
+	tail []byte   // 0 to Size()-1 bytes view of `buf'.
+	buf  [16]byte // Expected (but not required) to be Size() large.
+	bmixer
+}
+
+func (d *digest) BlockSize() int { return 1 }
+
+func (d *digest) Write(p []byte) (n int, err error) {
+	n = len(p)
+	d.clen += n
+
+	if len(d.tail) > 0 {
+		// Stick back pending bytes.
+		nfree := d.Size() - len(d.tail) // nfree ∈ [1, d.Size()-1].
+		if nfree < len(p) {
+			// One full block can be formed.
+			block := append(d.tail, p[:nfree]...)
+			p = p[nfree:]
+			_ = d.bmix(block) // No tail.
+		} else {
+			// Tail's buf is large enough to prevent reallocs.
+			p = append(d.tail, p...)
+		}
+	}
+
+	d.tail = d.bmix(p)
+
+	// Keep own copy of the 0 to Size()-1 pending bytes.
+	nn := copy(d.buf[:], d.tail)
+	d.tail = d.buf[:nn]
+
+	return n, nil
+}
+
+func (d *digest) Reset() {
+	d.clen = 0
+	d.tail = nil
+	d.bmixer.reset()
+}
+
+func strslice(slice []byte) string {
+	var str string
+	slicehdr := ((*reflect.SliceHeader)(unsafe.Pointer(&slice)))
+	strhdr := (*reflect.StringHeader)(unsafe.Pointer(&str))
+	strhdr.Data = slicehdr.Data
+	strhdr.Len = slicehdr.Len
+	return str
+}
--- a/internal/bloom/murmur128.go
+++ b/internal/bloom/murmur128.go
@@ -0,0 +1,182 @@
+package bloom
+
+import (
+	"hash"
+	"math/bits"
+)
+
+const (
+	c1_128 = 0x87c37b91114253d5
+	c2_128 = 0x4cf5ad432745937f
+)
+
+// Make sure interfaces are correctly implemented.
+var (
+	_ hash.Hash = new(digest128)
+	_ Hash128   = new(digest128)
+	_ bmixer    = new(digest128)
+)
+
+// Hash128 provides an interface for a streaming 128 bit hash.
+type Hash128 interface {
+	hash.Hash
+	Sum128() (uint64, uint64)
+}
+
+// digest128 represents a partial evaluation of a 128 bites hash.
+type digest128 struct {
+	digest
+	seed1 uint64
+	seed2 uint64
+	h1    uint64 // Unfinalized running hash part 1.
+	h2    uint64 // Unfinalized running hash part 2.
+}
+
+// SeedNew128 returns a Hash128 for streaming 128 bit sums with its internal
+// digests initialized to seed1 and seed2.
+//
+// The canonical implementation allows one only uint32 seed; to imitate that
+// behavior, use the same, uint32-max seed for seed1 and seed2.
+func SeedNew128(seed1, seed2 uint64) Hash128 {
+	d := &digest128{seed1: seed1, seed2: seed2}
+	d.bmixer = d
+	d.Reset()
+	return d
+}
+
+// New128 returns a Hash128 for streaming 128 bit sums.
+func New128() Hash128 {
+	return SeedNew128(0, 0)
+}
+
+func (d *digest128) Size() int { return 16 }
+
+func (d *digest128) reset() { d.h1, d.h2 = d.seed1, d.seed2 }
+
+func (d *digest128) Sum(b []byte) []byte {
+	h1, h2 := d.Sum128()
+	return append(b,
+		byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32),
+		byte(h1>>24), byte(h1>>16), byte(h1>>8), byte(h1),
+
+		byte(h2>>56), byte(h2>>48), byte(h2>>40), byte(h2>>32),
+		byte(h2>>24), byte(h2>>16), byte(h2>>8), byte(h2),
+	)
+}
+
+func (d *digest128) bmix(p []byte) (tail []byte) {
+	h1, h2 := d.h1, d.h2
+
+	for len(p) >= 16 {
+		k1 := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 | uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+		k2 := uint64(p[8]) | uint64(p[9])<<8 | uint64(p[10])<<16 | uint64(p[11])<<24 | uint64(p[12])<<32 | uint64(p[13])<<40 | uint64(p[14])<<48 | uint64(p[15])<<56
+		p = p[16:]
+
+		k1 *= c1_128
+		k1 = bits.RotateLeft64(k1, 31)
+		k1 *= c2_128
+		h1 ^= k1
+
+		h1 = bits.RotateLeft64(h1, 27)
+		h1 += h2
+		h1 = h1*5 + 0x52dce729
+
+		k2 *= c2_128
+		k2 = bits.RotateLeft64(k2, 33)
+		k2 *= c1_128
+		h2 ^= k2
+
+		h2 = bits.RotateLeft64(h2, 31)
+		h2 += h1
+		h2 = h2*5 + 0x38495ab5
+	}
+	d.h1, d.h2 = h1, h2
+	return p
+}
+
+func (d *digest128) Sum128() (h1, h2 uint64) {
+
+	h1, h2 = d.h1, d.h2
+
+	var k1, k2 uint64
+	switch len(d.tail) & 15 {
+	case 15:
+		k2 ^= uint64(d.tail[14]) << 48
+		fallthrough
+	case 14:
+		k2 ^= uint64(d.tail[13]) << 40
+		fallthrough
+	case 13:
+		k2 ^= uint64(d.tail[12]) << 32
+		fallthrough
+	case 12:
+		k2 ^= uint64(d.tail[11]) << 24
+		fallthrough
+	case 11:
+		k2 ^= uint64(d.tail[10]) << 16
+		fallthrough
+	case 10:
+		k2 ^= uint64(d.tail[9]) << 8
+		fallthrough
+	case 9:
+		k2 ^= uint64(d.tail[8]) << 0
+
+		k2 *= c2_128
+		k2 = bits.RotateLeft64(k2, 33)
+		k2 *= c1_128
+		h2 ^= k2
+
+		fallthrough
+
+	case 8:
+		k1 ^= uint64(d.tail[7]) << 56
+		fallthrough
+	case 7:
+		k1 ^= uint64(d.tail[6]) << 48
+		fallthrough
+	case 6:
+		k1 ^= uint64(d.tail[5]) << 40
+		fallthrough
+	case 5:
+		k1 ^= uint64(d.tail[4]) << 32
+		fallthrough
+	case 4:
+		k1 ^= uint64(d.tail[3]) << 24
+		fallthrough
+	case 3:
+		k1 ^= uint64(d.tail[2]) << 16
+		fallthrough
+	case 2:
+		k1 ^= uint64(d.tail[1]) << 8
+		fallthrough
+	case 1:
+		k1 ^= uint64(d.tail[0]) << 0
+		k1 *= c1_128
+		k1 = bits.RotateLeft64(k1, 31)
+		k1 *= c2_128
+		h1 ^= k1
+	}
+
+	h1 ^= uint64(d.clen)
+	h2 ^= uint64(d.clen)
+
+	h1 += h2
+	h2 += h1
+
+	h1 = fmix64(h1)
+	h2 = fmix64(h2)
+
+	h1 += h2
+	h2 += h1
+
+	return h1, h2
+}
+
+func fmix64(k uint64) uint64 {
+	k ^= k >> 33
+	k *= 0xff51afd7ed558ccd
+	k ^= k >> 33
+	k *= 0xc4ceb9fe1a85ec53
+	k ^= k >> 33
+	return k
+}
--- a/internal/bloom/murmur128_amd64.s
+++ b/internal/bloom/murmur128_amd64.s
@@ -0,0 +1,247 @@
+// +build go1.5,amd64
+
+// SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64)
+TEXT ·SeedSum128(SB), $0-56
+	MOVQ seed1+0(FP), R12
+	MOVQ seed2+8(FP), R13
+	MOVQ data_base+16(FP), SI
+	MOVQ data_len+24(FP), R9
+	LEAQ h1+40(FP), BX
+	JMP  sum128internal<>(SB)
+
+// Sum128(data []byte) (h1 uint64, h2 uint64)
+TEXT ·Sum128(SB), $0-40
+	XORQ R12, R12
+	XORQ R13, R13
+	MOVQ data_base+0(FP), SI
+	MOVQ data_len+8(FP), R9
+	LEAQ h1+24(FP), BX
+	JMP  sum128internal<>(SB)
+
+// SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64)
+TEXT ·SeedStringSum128(SB), $0-48
+	MOVQ seed1+0(FP), R12
+	MOVQ seed2+8(FP), R13
+	MOVQ data_base+16(FP), SI
+	MOVQ data_len+24(FP), R9
+	LEAQ h1+32(FP), BX
+	JMP  sum128internal<>(SB)
+
+// StringSum128(data string) (h1 uint64, h2 uint64)
+TEXT ·StringSum128(SB), $0-32
+	XORQ R12, R12
+	XORQ R13, R13
+	MOVQ data_base+0(FP), SI
+	MOVQ data_len+8(FP), R9
+	LEAQ h1+16(FP), BX
+	JMP  sum128internal<>(SB)
+
+// Expects:
+// R12 == h1 uint64 seed
+// R13 == h2 uint64 seed
+// SI  == &data
+// R9  == len(data)
+// BX  == &[2]uint64 return
+TEXT sum128internal<>(SB), $0
+	MOVQ $0x87c37b91114253d5, R14 // c1
+	MOVQ $0x4cf5ad432745937f, R15 // c2
+
+	MOVQ R9, CX
+	ANDQ $-16, CX // cx == data_len - (data_len % 16)
+
+	// for r10 = 0; r10 < cx; r10 += 16 {...
+	XORQ R10, R10
+
+loop:
+	CMPQ R10, CX
+	JE   tail
+	MOVQ (SI)(R10*1), AX
+	MOVQ 8(SI)(R10*1), DX
+	ADDQ $16, R10
+
+	IMULQ R14, AX
+	IMULQ R15, DX
+
+	ROLQ  $31, AX
+	ROLQ  $33, DX
+
+	IMULQ R15, AX
+	IMULQ R14, DX
+
+	XORQ AX,  R12
+	ROLQ $27, R12
+	ADDQ R13, R12
+	XORQ DX,  R13
+	ROLQ $31, R13
+	LEAQ 0x52dce729(R12)(R12*4), R12
+
+	ADDQ R12, R13
+	LEAQ 0x38495ab5(R13)(R13*4), R13
+
+	JMP loop
+
+tail:
+	MOVQ R9, CX
+	ANDQ $0xf, CX
+	JZ   finalize // if len % 16 == 0
+
+	XORQ AX, AX
+
+	// poor man's binary tree jump table
+	SUBQ $8, CX
+	JZ   tail8
+	JG   over8
+	ADDQ $4, CX
+	JZ   tail4
+	JG   over4
+	ADDQ $2, CX
+	JL   tail1
+	JZ   tail2
+	JMP  tail3
+
+over4:
+	SUBQ $2, CX
+	JL   tail5
+	JZ   tail6
+	JMP  tail7
+
+over8:
+	SUBQ $4, CX
+	JZ   tail12
+	JG   over12
+	ADDQ $2, CX
+	JL   tail9
+	JZ   tail10
+	JMP  tail11
+
+over12:
+	SUBQ $2, CX
+	JL   tail13
+	JZ   tail14
+
+tail15:
+	MOVBQZX 14(SI)(R10*1), AX
+	SALQ    $16, AX
+
+tail14:
+	MOVW 12(SI)(R10*1), AX
+	SALQ $32, AX
+	JMP  tail12
+
+tail13:
+	MOVBQZX 12(SI)(R10*1), AX
+	SALQ    $32, AX
+
+tail12:
+	MOVL 8(SI)(R10*1), DX
+	ORQ  DX, AX
+	JMP  fintailhigh
+
+tail11:
+	MOVBQZX 10(SI)(R10*1), AX
+	SALQ    $16, AX
+
+tail10:
+	MOVW 8(SI)(R10*1), AX
+	JMP  fintailhigh
+
+tail9:
+	MOVB 8(SI)(R10*1), AL
+
+fintailhigh:
+	IMULQ R15, AX
+	ROLQ  $33, AX
+	IMULQ R14, AX
+	XORQ  AX, R13
+
+tail8:
+	MOVQ (SI)(R10*1), AX
+	JMP  fintaillow
+
+tail7:
+	MOVBQZX 6(SI)(R10*1), AX
+	SALQ    $16, AX
+
+tail6:
+	MOVW 4(SI)(R10*1), AX
+	SALQ $32, AX
+	JMP  tail4
+
+tail5:
+	MOVBQZX 4(SI)(R10*1), AX
+	SALQ    $32, AX
+
+tail4:
+	MOVL (SI)(R10*1), DX
+	ORQ  DX, AX
+	JMP  fintaillow
+
+tail3:
+	MOVBQZX 2(SI)(R10*1), AX
+	SALQ    $16, AX
+
+tail2:
+	MOVW (SI)(R10*1), AX
+	JMP  fintaillow
+
+tail1:
+	MOVB (SI)(R10*1), AL
+
+fintaillow:
+	IMULQ R14, AX
+	ROLQ  $31, AX
+	IMULQ R15, AX
+	XORQ  AX, R12
+
+finalize:
+	XORQ R9, R12
+	XORQ R9, R13
+
+	ADDQ R13, R12
+	ADDQ R12, R13
+
+	// fmix128 (both interleaved)
+	MOVQ  R12, DX
+	MOVQ  R13, AX
+
+	SHRQ  $33, DX
+	SHRQ  $33, AX
+
+	XORQ  DX, R12
+	XORQ  AX, R13
+
+	MOVQ  $0xff51afd7ed558ccd, CX
+
+	IMULQ CX, R12
+	IMULQ CX, R13
+
+	MOVQ  R12, DX
+	MOVQ  R13, AX
+
+	SHRQ  $33, DX
+	SHRQ  $33, AX
+
+	XORQ  DX, R12
+	XORQ  AX, R13
+
+	MOVQ  $0xc4ceb9fe1a85ec53, CX
+
+	IMULQ CX, R12
+	IMULQ CX, R13
+
+	MOVQ  R12, DX
+	MOVQ  R13, AX
+
+	SHRQ  $33, DX
+	SHRQ  $33, AX
+
+	XORQ  DX, R12
+	XORQ  AX, R13
+
+	ADDQ R13, R12
+	ADDQ R12, R13
+
+	MOVQ R12, (BX)
+	MOVQ R13, 8(BX)
+	RET
+	
--- a/internal/bloom/murmur128_decl.go
+++ b/internal/bloom/murmur128_decl.go
@@ -0,0 +1,36 @@
+//go:build go1.5 && amd64
+// +build go1.5,amd64
+
+package bloom
+
+//go:noescape
+
+// Sum128 returns the murmur3 sum of data. It is equivalent to the following
+// sequence (without the extra burden and the extra allocation):
+//
+//	hasher := New128()
+//	hasher.Write(data)
+//	return hasher.Sum128()
+func Sum128(data []byte) (h1 uint64, h2 uint64)
+
+//go:noescape
+
+// SeedSum128 returns the murmur3 sum of data with digests initialized to seed1
+// and seed2.
+//
+// The canonical implementation allows only one uint32 seed; to imitate that
+// behavior, use the same, uint32-max seed for seed1 and seed2.
+//
+// This reads and processes the data in chunks of little endian uint64s;
+// thus, the returned hashes are portable across architectures.
+func SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64)
+
+//go:noescape
+
+// StringSum128 is the string version of Sum128.
+func StringSum128(data string) (h1 uint64, h2 uint64)
+
+//go:noescape
+
+// SeedStringSum128 is the string version of SeedSum128.
+func SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64)
--- a/internal/bloom/murmur128_gen.go
+++ b/internal/bloom/murmur128_gen.go
@@ -0,0 +1,137 @@
+//go:build !go1.5 || !amd64
+// +build !go1.5 !amd64
+
+package bloom
+
+import "math/bits"
+
+// SeedSum128 returns the murmur3 sum of data with digests initialized to seed1
+// and seed2.
+//
+// The canonical implementation allows only one uint32 seed; to imitate that
+// behavior, use the same, uint32-max seed for seed1 and seed2.
+//
+// This reads and processes the data in chunks of little endian uint64s;
+// thus, the returned hashes are portable across architectures.
+func SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64) {
+	return SeedStringSum128(seed1, seed2, strslice(data))
+}
+
+// Sum128 returns the murmur3 sum of data. It is equivalent to the following
+// sequence (without the extra burden and the extra allocation):
+//
+//	hasher := New128()
+//	hasher.Write(data)
+//	return hasher.Sum128()
+func Sum128(data []byte) (h1 uint64, h2 uint64) {
+	return SeedStringSum128(0, 0, strslice(data))
+}
+
+// StringSum128 is the string version of Sum128.
+func StringSum128(data string) (h1 uint64, h2 uint64) {
+	return SeedStringSum128(0, 0, data)
+}
+
+// SeedStringSum128 is the string version of SeedSum128.
+func SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64) {
+	h1, h2 = seed1, seed2
+	clen := len(data)
+	for len(data) >= 16 {
+		// yes, this is faster than using binary.LittleEndian.Uint64
+		k1 := uint64(data[0]) | uint64(data[1])<<8 | uint64(data[2])<<16 | uint64(data[3])<<24 | uint64(data[4])<<32 | uint64(data[5])<<40 | uint64(data[6])<<48 | uint64(data[7])<<56
+		k2 := uint64(data[8]) | uint64(data[9])<<8 | uint64(data[10])<<16 | uint64(data[11])<<24 | uint64(data[12])<<32 | uint64(data[13])<<40 | uint64(data[14])<<48 | uint64(data[15])<<56
+
+		data = data[16:]
+
+		k1 *= c1_128
+		k1 = bits.RotateLeft64(k1, 31)
+		k1 *= c2_128
+		h1 ^= k1
+
+		h1 = bits.RotateLeft64(h1, 27)
+		h1 += h2
+		h1 = h1*5 + 0x52dce729
+
+		k2 *= c2_128
+		k2 = bits.RotateLeft64(k2, 33)
+		k2 *= c1_128
+		h2 ^= k2
+
+		h2 = bits.RotateLeft64(h2, 31)
+		h2 += h1
+		h2 = h2*5 + 0x38495ab5
+	}
+
+	var k1, k2 uint64
+	switch len(data) {
+	case 15:
+		k2 ^= uint64(data[14]) << 48
+		fallthrough
+	case 14:
+		k2 ^= uint64(data[13]) << 40
+		fallthrough
+	case 13:
+		k2 ^= uint64(data[12]) << 32
+		fallthrough
+	case 12:
+		k2 ^= uint64(data[11]) << 24
+		fallthrough
+	case 11:
+		k2 ^= uint64(data[10]) << 16
+		fallthrough
+	case 10:
+		k2 ^= uint64(data[9]) << 8
+		fallthrough
+	case 9:
+		k2 ^= uint64(data[8]) << 0
+
+		k2 *= c2_128
+		k2 = bits.RotateLeft64(k2, 33)
+		k2 *= c1_128
+		h2 ^= k2
+
+		fallthrough
+
+	case 8:
+		k1 ^= uint64(data[7]) << 56
+		fallthrough
+	case 7:
+		k1 ^= uint64(data[6]) << 48
+		fallthrough
+	case 6:
+		k1 ^= uint64(data[5]) << 40
+		fallthrough
+	case 5:
+		k1 ^= uint64(data[4]) << 32
+		fallthrough
+	case 4:
+		k1 ^= uint64(data[3]) << 24
+		fallthrough
+	case 3:
+		k1 ^= uint64(data[2]) << 16
+		fallthrough
+	case 2:
+		k1 ^= uint64(data[1]) << 8
+		fallthrough
+	case 1:
+		k1 ^= uint64(data[0]) << 0
+		k1 *= c1_128
+		k1 = bits.RotateLeft64(k1, 31)
+		k1 *= c2_128
+		h1 ^= k1
+	}
+
+	h1 ^= uint64(clen)
+	h2 ^= uint64(clen)
+
+	h1 += h2
+	h2 += h1
+
+	h1 = fmix64(h1)
+	h2 = fmix64(h2)
+
+	h1 += h2
+	h2 += h1
+
+	return h1, h2
+}
				`@@ -0,0 +1 @@`
				`protoc --proto_path=. *.proto --go_out=. --go_opt=paths=source_relative`