首次提交代码
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -25,3 +25,5 @@ go.work.sum
|
|||||||
# env file
|
# env file
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
# default build target
|
||||||
|
bloomtool
|
||||||
|
|||||||
14
go.mod
Normal file
14
go.mod
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
module git.algo.com.cn/public/bloomtool
|
||||||
|
|
||||||
|
go 1.23
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/RoaringBitmap/roaring v1.9.4
|
||||||
|
github.com/klauspost/compress v1.18.1
|
||||||
|
google.golang.org/protobuf v1.36.10
|
||||||
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/bits-and-blooms/bitset v1.12.0 // indirect
|
||||||
|
github.com/mschoch/smat v0.2.0 // indirect
|
||||||
|
)
|
||||||
22
go.sum
Normal file
22
go.sum
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
|
||||||
|
github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
|
||||||
|
github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
|
||||||
|
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||||
|
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
|
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
|
||||||
|
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
|
||||||
|
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
|
||||||
|
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
||||||
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
|
||||||
|
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
26
help.go
Normal file
26
help.go
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RunHelp 帮助
|
||||||
|
func RunHelp(args ...string) error {
|
||||||
|
fmt.Println(strings.TrimSpace(usage))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
const usage = `
|
||||||
|
Usage: [[command] [arguments]]
|
||||||
|
|
||||||
|
The commands are:
|
||||||
|
|
||||||
|
makebloom Make bloom filter bitmap file
|
||||||
|
hittest Hittest text lines in bitmap
|
||||||
|
info Show bitmap file info
|
||||||
|
|
||||||
|
"help" is the default command.
|
||||||
|
|
||||||
|
Use "bloomtool [command] -help" for more information about a command.
|
||||||
|
`
|
||||||
88
hittest.go
Normal file
88
hittest.go
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"git.algo.com.cn/public/bloomtool/internal/bloom"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RunHitTest(args ...string) error {
|
||||||
|
fs := flag.NewFlagSet("hittest", flag.ExitOnError)
|
||||||
|
|
||||||
|
txtFile := fs.String("d", "", "device id filename")
|
||||||
|
bmpFile := fs.String("b", "", "bitmap filename")
|
||||||
|
stateFile := fs.String("s", "", "state filename for output")
|
||||||
|
filter := fs.Bool("f", false, "filter for hit only")
|
||||||
|
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
} else if fs.NArg() > 0 || *txtFile == "" || *bmpFile == "" || *stateFile == "" {
|
||||||
|
fmt.Println(fs.NArg())
|
||||||
|
fs.Usage()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return hitTest(*txtFile, *bmpFile, *stateFile, *filter)
|
||||||
|
}
|
||||||
|
|
||||||
|
func hitTest(txtFile, bmpFile, stateFile string, filter bool) error {
|
||||||
|
|
||||||
|
slog.Info("load bitmap file", "filename", bmpFile)
|
||||||
|
bfile, err := bloom.LoadFromFile(bmpFile, false)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("open bitmap file error", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("load text file", "filename", txtFile)
|
||||||
|
tfile, err := os.Open(txtFile)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("open text file error", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer tfile.Close()
|
||||||
|
|
||||||
|
slog.Info("create state file", "filename", stateFile)
|
||||||
|
sfile, err := os.Create(stateFile)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("create state file error", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer sfile.Close()
|
||||||
|
|
||||||
|
writer := bufio.NewWriter(sfile)
|
||||||
|
|
||||||
|
// 逐行读取
|
||||||
|
scanner := bufio.NewScanner(tfile)
|
||||||
|
lineCount := 1
|
||||||
|
lineText := ""
|
||||||
|
for scanner.Scan() {
|
||||||
|
if lineCount%100000 == 0 {
|
||||||
|
slog.Info("read line", "lineno", lineCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 撞库
|
||||||
|
lineText = scanner.Text()
|
||||||
|
hit := bfile.TestString(lineText)
|
||||||
|
intHit := 0
|
||||||
|
if hit {
|
||||||
|
intHit = 1
|
||||||
|
}
|
||||||
|
if filter {
|
||||||
|
if hit {
|
||||||
|
writer.WriteString(fmt.Sprintf("%v\n", lineText))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
writer.WriteString(fmt.Sprintf("%v\t%v\n", lineText, intHit))
|
||||||
|
}
|
||||||
|
|
||||||
|
lineCount++
|
||||||
|
}
|
||||||
|
writer.Flush()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
38
info.go
Normal file
38
info.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"log/slog"
|
||||||
|
|
||||||
|
"git.algo.com.cn/public/bloomtool/internal/bloom"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RunInfo(args ...string) error {
|
||||||
|
fs := flag.NewFlagSet("info", flag.ExitOnError)
|
||||||
|
|
||||||
|
bmpFile := fs.String("b", "", "bitmap filename")
|
||||||
|
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
} else if fs.NArg() > 0 || *bmpFile == "" {
|
||||||
|
fs.Usage()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return info(*bmpFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
func info(bmpFile string) error {
|
||||||
|
|
||||||
|
slog.Info("load bitmap file", "filename", bmpFile)
|
||||||
|
bfile, err := bloom.LoadFromFile(bmpFile, true)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("open bitmap file error", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stat := bfile.GetStat()
|
||||||
|
slog.Info("bitmap info", "stat", stat)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
345
internal/bloom/bloom.go
Normal file
345
internal/bloom/bloom.go
Normal file
@@ -0,0 +1,345 @@
|
|||||||
|
package bloom
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/RoaringBitmap/roaring/roaring64"
|
||||||
|
"github.com/klauspost/compress/zstd"
|
||||||
|
"google.golang.org/protobuf/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
主体代码来源于https://github.com/bits-and-blooms/bloom
|
||||||
|
在此文件中,将bitmap的管理改用roaring bitmap实现
|
||||||
|
algotao 2022-08-29
|
||||||
|
*/
|
||||||
|
|
||||||
|
type BloomFilter struct {
|
||||||
|
m uint64 // 存贮空间上限
|
||||||
|
k uint64 // hash函数个数
|
||||||
|
elementsMax uint64 // 元素数量上限
|
||||||
|
elementsAdded uint64 // 已加入的元素数量
|
||||||
|
falsePositiveRate float64 // 假阳率
|
||||||
|
rb *roaring64.Bitmap // 位图
|
||||||
|
chOne chan []uint64 // 接收每个插入的hash索引
|
||||||
|
chInsert chan []uint64 // 接收排好序的hash索引进Bitmap
|
||||||
|
chSortJobQuota chan int // 排序工作的配额控制
|
||||||
|
buf []uint64 // 缓冲
|
||||||
|
wgJobs sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
type BloomFilterStat struct {
|
||||||
|
M uint64
|
||||||
|
K uint64
|
||||||
|
ElementsMax uint64
|
||||||
|
ElementsAdded uint64
|
||||||
|
FalsePositiveRate float64
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
headerVersion1 = 1
|
||||||
|
)
|
||||||
|
|
||||||
|
// bitmapFileHeader 存贮文件头
|
||||||
|
type bitmapFileHeader struct {
|
||||||
|
Size uint64 // Header Protobuf size
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWithEstimates 创建一个BloomFilter,并期望有n个元素,<fp的误匹配率
|
||||||
|
func NewWithEstimates(e uint64, fr float64) *BloomFilter {
|
||||||
|
m, k := EstimateParameters(e, fr)
|
||||||
|
return newBloomFilter(m, k, e, fr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// new 创建一个新的BloomFilter,具有 _m_ bits 和 _k_ hashing 函数
|
||||||
|
func newBloomFilter(m uint64, k uint64, e uint64, fr float64) *BloomFilter {
|
||||||
|
b := &BloomFilter{
|
||||||
|
m: max(1, m),
|
||||||
|
k: max(1, k),
|
||||||
|
elementsMax: e,
|
||||||
|
falsePositiveRate: fr,
|
||||||
|
rb: roaring64.New(),
|
||||||
|
chOne: make(chan []uint64, 1024*1024), // 索引缓冲区chan
|
||||||
|
chInsert: make(chan []uint64, 2), // 插入队列
|
||||||
|
chSortJobQuota: make(chan int, 8), // 排序队列
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < cap(b.chSortJobQuota); i++ {
|
||||||
|
b.chSortJobQuota <- 0
|
||||||
|
}
|
||||||
|
|
||||||
|
//log.Printf("Init quota len(%v), cap(%v)\n", len(b.chSortJobQuota), cap(b.chSortJobQuota))
|
||||||
|
|
||||||
|
go b.consumeOne()
|
||||||
|
go b.consumeInsert()
|
||||||
|
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// 生成 m 和 k
|
||||||
|
func EstimateParameters(n uint64, p float64) (m uint64, k uint64) {
|
||||||
|
m = uint64(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2)))
|
||||||
|
k = uint64(math.Ceil(math.Log(2) * float64(m) / float64(n)))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// location 返回当前位置的hash值
|
||||||
|
func (b *BloomFilter) location(h [4]uint64, i uint64) uint64 {
|
||||||
|
return (h[i%2] + i*h[2+(((i+(i%2))%4)/2)]) % b.m
|
||||||
|
//return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// baseHashes 生成4个hash值,用于生产key
|
||||||
|
func (b *BloomFilter) baseHashes(data []byte) [4]uint64 {
|
||||||
|
h := New128()
|
||||||
|
h.Write(data)
|
||||||
|
|
||||||
|
h1, h2 := h.Sum128()
|
||||||
|
|
||||||
|
h.Write([]byte{1})
|
||||||
|
h3, h4 := h.Sum128()
|
||||||
|
|
||||||
|
return [4]uint64{
|
||||||
|
h1, h2, h3, h4,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 消费一个计算好的bloom bits对象,并填入缓冲。当缓冲半满时发送给排序队列处理
|
||||||
|
func (b *BloomFilter) consumeOne() {
|
||||||
|
batchSize := cap(b.chOne) * int(b.k) / 2 //一半buffer满了就开始处理,即一半个数的uint64。或是遇到Flush标志(bits长度0),则刷缓冲
|
||||||
|
|
||||||
|
for bits := range b.chOne {
|
||||||
|
if len(bits) != 0 {
|
||||||
|
b.elementsAdded++
|
||||||
|
}
|
||||||
|
|
||||||
|
b.buf = append(b.buf, bits...)
|
||||||
|
if len(b.buf) >= batchSize || len(bits) == 0 {
|
||||||
|
|
||||||
|
buf := b.buf[:]
|
||||||
|
b.buf = []uint64{}
|
||||||
|
b.wgJobs.Add(1)
|
||||||
|
|
||||||
|
// 如果接收到了Flush标志,则在处理最后buffer后,减一次waitgroup
|
||||||
|
if len(bits) == 0 {
|
||||||
|
b.wgJobs.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
//等待有可用排序配额,如成功则消耗一个配额
|
||||||
|
<-b.chSortJobQuota
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
sort.Slice(buf, func(i, j int) bool { return buf[i] < buf[j] })
|
||||||
|
|
||||||
|
//提交至插入任务
|
||||||
|
b.chInsert <- buf
|
||||||
|
|
||||||
|
//恢复1个配额
|
||||||
|
b.chSortJobQuota <- 1
|
||||||
|
}()
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将批量bits写到bitmap
|
||||||
|
func (b *BloomFilter) consumeInsert() {
|
||||||
|
for bitsBatch := range b.chInsert {
|
||||||
|
b.rb.AddMany(bitsBatch)
|
||||||
|
b.wgJobs.Done()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add 添加数据的Hash位图
|
||||||
|
func (b *BloomFilter) Add(data []byte) *BloomFilter {
|
||||||
|
h := b.baseHashes(data)
|
||||||
|
bits := make([]uint64, b.k)
|
||||||
|
for i := uint64(0); i < b.k; i++ {
|
||||||
|
bits[i] = b.location(h, i)
|
||||||
|
}
|
||||||
|
b.chOne <- bits //将一个计算好的bloom bits发送到待处理队列
|
||||||
|
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddString 添加字符串的Hash位图
|
||||||
|
func (b *BloomFilter) AddString(data string) *BloomFilter {
|
||||||
|
return b.Add([]byte(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 如果命中Hash位图,则返回真 (有误匹配率)
|
||||||
|
func (b *BloomFilter) Test(data []byte) bool {
|
||||||
|
h := b.baseHashes(data)
|
||||||
|
for i := uint64(0); i < b.k; i++ {
|
||||||
|
if !b.rb.Contains(b.location(h, i)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestString 如果命中字符串Hash位图,则返回真 (有误匹配率)
|
||||||
|
func (b *BloomFilter) TestString(data string) bool {
|
||||||
|
return b.Test([]byte(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush 将缓冲中的待处理Bit写入Bitmap
|
||||||
|
func (b *BloomFilter) Flush() {
|
||||||
|
b.wgJobs.Add(1)
|
||||||
|
|
||||||
|
//发出Flush指令
|
||||||
|
b.chOne <- []uint64{}
|
||||||
|
|
||||||
|
b.wgJobs.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// free 将缓冲中的待处理Bit写入Bitmap
|
||||||
|
func (b *BloomFilter) free() {
|
||||||
|
close(b.chOne)
|
||||||
|
close(b.chInsert)
|
||||||
|
b.rb.Clear()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterator 位图遍历器
|
||||||
|
func (b *BloomFilter) Iterator() roaring64.IntPeekable64 {
|
||||||
|
return b.rb.Iterator()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetSizeInBytes 返回位图大小
|
||||||
|
func (b *BloomFilter) GetSizeInBytes() uint64 {
|
||||||
|
return b.rb.GetSerializedSizeInBytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获得统计信息,主要用于运行期间获取状态
|
||||||
|
func (b *BloomFilter) GetStat() BloomFilterStat {
|
||||||
|
return BloomFilterStat{
|
||||||
|
M: b.m,
|
||||||
|
K: b.k,
|
||||||
|
ElementsMax: b.elementsMax,
|
||||||
|
ElementsAdded: b.elementsAdded,
|
||||||
|
FalsePositiveRate: b.falsePositiveRate,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SaveToFile 写入到文件
|
||||||
|
func (b *BloomFilter) SaveToFile(filename string) (err error) {
|
||||||
|
b.Flush()
|
||||||
|
|
||||||
|
headerPB := &Header{
|
||||||
|
Version: headerVersion1,
|
||||||
|
M: b.m,
|
||||||
|
K: b.k,
|
||||||
|
ElementsMax: b.elementsMax,
|
||||||
|
ElementsAdded: b.elementsAdded,
|
||||||
|
FalsePositiveRate: b.falsePositiveRate,
|
||||||
|
}
|
||||||
|
headerData, err := proto.Marshal(headerPB)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fi, fe := os.Create(filename)
|
||||||
|
if fe != nil {
|
||||||
|
return fe
|
||||||
|
}
|
||||||
|
|
||||||
|
defer fi.Close()
|
||||||
|
|
||||||
|
fh := bitmapFileHeader{
|
||||||
|
Size: uint64(len(headerData)),
|
||||||
|
}
|
||||||
|
|
||||||
|
//写入文件头(字节数)
|
||||||
|
fe = binary.Write(fi, binary.BigEndian, fh)
|
||||||
|
if fe != nil {
|
||||||
|
return fe
|
||||||
|
}
|
||||||
|
|
||||||
|
//写入文件头(PB详细信息)
|
||||||
|
fe = binary.Write(fi, binary.BigEndian, headerData)
|
||||||
|
if fe != nil {
|
||||||
|
return fe
|
||||||
|
}
|
||||||
|
|
||||||
|
b.rb.RunOptimize()
|
||||||
|
|
||||||
|
zw, err := zstd.NewWriter(fi)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer zw.Close()
|
||||||
|
|
||||||
|
_, fe = b.rb.WriteTo(zw)
|
||||||
|
|
||||||
|
b.free()
|
||||||
|
|
||||||
|
return fe
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadFromFile 从文件中读取
|
||||||
|
func LoadFromFile(filename string, headerOnly bool) (bft *BloomFilter, err error) {
|
||||||
|
fi, fe := os.Open(filename)
|
||||||
|
|
||||||
|
if fe != nil {
|
||||||
|
return nil, fe
|
||||||
|
}
|
||||||
|
|
||||||
|
defer fi.Close()
|
||||||
|
|
||||||
|
fh := bitmapFileHeader{}
|
||||||
|
fe = binary.Read(fi, binary.BigEndian, &fh)
|
||||||
|
if fe != nil {
|
||||||
|
return nil, fe
|
||||||
|
}
|
||||||
|
|
||||||
|
headerData := make([]byte, fh.Size)
|
||||||
|
|
||||||
|
n, err := fi.Read(headerData)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if n != len(headerData) {
|
||||||
|
return nil, errors.New("unknown file format")
|
||||||
|
}
|
||||||
|
headerPB := &Header{}
|
||||||
|
|
||||||
|
err = proto.Unmarshal(headerData, headerPB)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if headerPB.Version != headerVersion1 {
|
||||||
|
return nil, errors.New("unsupported version")
|
||||||
|
}
|
||||||
|
|
||||||
|
bft = &BloomFilter{}
|
||||||
|
bft.m = headerPB.GetM()
|
||||||
|
bft.k = headerPB.GetK()
|
||||||
|
bft.elementsMax = headerPB.GetElementsMax()
|
||||||
|
bft.elementsAdded = headerPB.GetElementsAdded()
|
||||||
|
bft.falsePositiveRate = headerPB.GetFalsePositiveRate()
|
||||||
|
|
||||||
|
if headerOnly {
|
||||||
|
return bft, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
bft.rb = roaring64.New()
|
||||||
|
|
||||||
|
zr, err := zstd.NewReader(fi)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer zr.Close()
|
||||||
|
|
||||||
|
_, fe = bft.rb.ReadFrom(zr)
|
||||||
|
if fe != nil {
|
||||||
|
return nil, fe
|
||||||
|
}
|
||||||
|
|
||||||
|
return bft, nil
|
||||||
|
}
|
||||||
127
internal/bloom/bloom_test.go
Normal file
127
internal/bloom/bloom_test.go
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
package bloom
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math/rand"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/RoaringBitmap/roaring/roaring64"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBasic(t *testing.T) {
|
||||||
|
f := NewWithEstimates(1000000000, 0.00001)
|
||||||
|
n1 := "Bess"
|
||||||
|
n2 := "Jane"
|
||||||
|
n3 := "Tony"
|
||||||
|
n4 := "Algo"
|
||||||
|
f.AddString(n1)
|
||||||
|
f.AddString(n2)
|
||||||
|
f.AddString(n3)
|
||||||
|
f.Flush()
|
||||||
|
n1b := f.TestString(n1)
|
||||||
|
n2b := f.TestString(n2)
|
||||||
|
n3b := f.TestString(n3)
|
||||||
|
n4b := f.TestString(n4)
|
||||||
|
if !n1b {
|
||||||
|
t.Errorf("%v should be in.", n1)
|
||||||
|
}
|
||||||
|
if !n2b {
|
||||||
|
t.Errorf("%v should be in.", n2)
|
||||||
|
}
|
||||||
|
if !n3b {
|
||||||
|
t.Errorf("%v should be in.", n3)
|
||||||
|
}
|
||||||
|
if n4b {
|
||||||
|
t.Errorf("%v should be not in.", n4)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFile(t *testing.T) {
|
||||||
|
f := NewWithEstimates(1000000000, 0.00001)
|
||||||
|
n1 := "Bess"
|
||||||
|
n2 := "Jane"
|
||||||
|
n3 := "Tony"
|
||||||
|
n4 := "Algo"
|
||||||
|
f.AddString(n1)
|
||||||
|
f.AddString(n2)
|
||||||
|
f.AddString(n3)
|
||||||
|
const tmpfile = "//tmp//bloomtest.bin"
|
||||||
|
|
||||||
|
err := f.SaveToFile(tmpfile)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("save file error %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err = LoadFromFile(tmpfile, false)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("load file error %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
n1b := f.TestString(n1)
|
||||||
|
n2b := f.TestString(n2)
|
||||||
|
n3b := f.TestString(n3)
|
||||||
|
n4b := f.TestString(n4)
|
||||||
|
if !n1b {
|
||||||
|
t.Errorf("%v should be in.", n1)
|
||||||
|
}
|
||||||
|
if !n2b {
|
||||||
|
t.Errorf("%v should be in.", n2)
|
||||||
|
}
|
||||||
|
if !n3b {
|
||||||
|
t.Errorf("%v should be in.", n3)
|
||||||
|
}
|
||||||
|
if n4b {
|
||||||
|
t.Errorf("%v should be not in.", n4)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test10W(t *testing.T) {
|
||||||
|
f := NewWithEstimates(100000, 0.00001)
|
||||||
|
for i := uint64(0); i < 100000; i++ {
|
||||||
|
f.AddString(strconv.FormatUint(i, 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
const tmpfile = "//tmp//bloomtest.bin"
|
||||||
|
err := f.SaveToFile(tmpfile)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("save file error %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err = LoadFromFile(tmpfile, false)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("load file error %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := uint64(0); i < 100000; i++ {
|
||||||
|
ns := f.TestString(strconv.FormatUint(i, 10))
|
||||||
|
if !ns {
|
||||||
|
t.Errorf("%v should be in.", ns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStat(t *testing.T) {
|
||||||
|
f := NewWithEstimates(1000000000, 0.00000001)
|
||||||
|
t.Errorf("%v", f.GetStat())
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkNormal(b *testing.B) {
|
||||||
|
f := NewWithEstimates(1000000000, 0.00001)
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
f.AddString(strconv.FormatUint(uint64(n), 10))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkRoaringBitmap(b *testing.B) {
|
||||||
|
f := roaring64.New()
|
||||||
|
r := rand.New(rand.NewSource(99))
|
||||||
|
x := uint64(0)
|
||||||
|
|
||||||
|
b.Run("Add", func(b *testing.B) {
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
x = r.Uint64() % 23962645944
|
||||||
|
f.Add(x)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
b.Errorf("%v aa\n", f.GetSizeInBytes())
|
||||||
|
}
|
||||||
167
internal/bloom/bloomfile.pb.go
Normal file
167
internal/bloom/bloomfile.pb.go
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||||
|
// versions:
|
||||||
|
// protoc-gen-go v1.36.10
|
||||||
|
// protoc v5.29.4
|
||||||
|
// source: bloomfile.proto
|
||||||
|
|
||||||
|
package bloom
|
||||||
|
|
||||||
|
import (
|
||||||
|
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
|
||||||
|
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
|
||||||
|
reflect "reflect"
|
||||||
|
sync "sync"
|
||||||
|
unsafe "unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Verify that this generated code is sufficiently up-to-date.
|
||||||
|
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
|
||||||
|
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||||
|
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
|
||||||
|
)
|
||||||
|
|
||||||
|
type Header struct {
|
||||||
|
state protoimpl.MessageState `protogen:"open.v1"`
|
||||||
|
Version uint64 `protobuf:"varint,1,opt,name=Version,proto3" json:"Version,omitempty"` // 版本,当前为1
|
||||||
|
M uint64 `protobuf:"varint,2,opt,name=M,proto3" json:"M,omitempty"` // 存贮空间上限
|
||||||
|
K uint64 `protobuf:"varint,3,opt,name=K,proto3" json:"K,omitempty"` // hash函数个数
|
||||||
|
ElementsMax uint64 `protobuf:"varint,4,opt,name=ElementsMax,proto3" json:"ElementsMax,omitempty"` // 创建空间元素数量
|
||||||
|
ElementsAdded uint64 `protobuf:"varint,5,opt,name=ElementsAdded,proto3" json:"ElementsAdded,omitempty"` // 实际加入元素数量
|
||||||
|
FalsePositiveRate float64 `protobuf:"fixed64,6,opt,name=FalsePositiveRate,proto3" json:"FalsePositiveRate,omitempty"` // 假阳率
|
||||||
|
unknownFields protoimpl.UnknownFields
|
||||||
|
sizeCache protoimpl.SizeCache
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) Reset() {
|
||||||
|
*x = Header{}
|
||||||
|
mi := &file_bloomfile_proto_msgTypes[0]
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) String() string {
|
||||||
|
return protoimpl.X.MessageStringOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*Header) ProtoMessage() {}
|
||||||
|
|
||||||
|
func (x *Header) ProtoReflect() protoreflect.Message {
|
||||||
|
mi := &file_bloomfile_proto_msgTypes[0]
|
||||||
|
if x != nil {
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
if ms.LoadMessageInfo() == nil {
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
return ms
|
||||||
|
}
|
||||||
|
return mi.MessageOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated: Use Header.ProtoReflect.Descriptor instead.
|
||||||
|
func (*Header) Descriptor() ([]byte, []int) {
|
||||||
|
return file_bloomfile_proto_rawDescGZIP(), []int{0}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) GetVersion() uint64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.Version
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) GetM() uint64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.M
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) GetK() uint64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.K
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) GetElementsMax() uint64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.ElementsMax
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) GetElementsAdded() uint64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.ElementsAdded
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *Header) GetFalsePositiveRate() float64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.FalsePositiveRate
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var File_bloomfile_proto protoreflect.FileDescriptor
|
||||||
|
|
||||||
|
const file_bloomfile_proto_rawDesc = "" +
|
||||||
|
"\n" +
|
||||||
|
"\x0fbloomfile.proto\x12\x05bloom\"\xb4\x01\n" +
|
||||||
|
"\x06Header\x12\x18\n" +
|
||||||
|
"\aVersion\x18\x01 \x01(\x04R\aVersion\x12\f\n" +
|
||||||
|
"\x01M\x18\x02 \x01(\x04R\x01M\x12\f\n" +
|
||||||
|
"\x01K\x18\x03 \x01(\x04R\x01K\x12 \n" +
|
||||||
|
"\vElementsMax\x18\x04 \x01(\x04R\vElementsMax\x12$\n" +
|
||||||
|
"\rElementsAdded\x18\x05 \x01(\x04R\rElementsAdded\x12,\n" +
|
||||||
|
"\x11FalsePositiveRate\x18\x06 \x01(\x01R\x11FalsePositiveRateB\x16Z\x14internal/bloom;bloomb\x06proto3"
|
||||||
|
|
||||||
|
var (
|
||||||
|
file_bloomfile_proto_rawDescOnce sync.Once
|
||||||
|
file_bloomfile_proto_rawDescData []byte
|
||||||
|
)
|
||||||
|
|
||||||
|
func file_bloomfile_proto_rawDescGZIP() []byte {
|
||||||
|
file_bloomfile_proto_rawDescOnce.Do(func() {
|
||||||
|
file_bloomfile_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_bloomfile_proto_rawDesc), len(file_bloomfile_proto_rawDesc)))
|
||||||
|
})
|
||||||
|
return file_bloomfile_proto_rawDescData
|
||||||
|
}
|
||||||
|
|
||||||
|
var file_bloomfile_proto_msgTypes = make([]protoimpl.MessageInfo, 1)
|
||||||
|
var file_bloomfile_proto_goTypes = []any{
|
||||||
|
(*Header)(nil), // 0: bloom.Header
|
||||||
|
}
|
||||||
|
var file_bloomfile_proto_depIdxs = []int32{
|
||||||
|
0, // [0:0] is the sub-list for method output_type
|
||||||
|
0, // [0:0] is the sub-list for method input_type
|
||||||
|
0, // [0:0] is the sub-list for extension type_name
|
||||||
|
0, // [0:0] is the sub-list for extension extendee
|
||||||
|
0, // [0:0] is the sub-list for field type_name
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() { file_bloomfile_proto_init() }
|
||||||
|
func file_bloomfile_proto_init() {
|
||||||
|
if File_bloomfile_proto != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
type x struct{}
|
||||||
|
out := protoimpl.TypeBuilder{
|
||||||
|
File: protoimpl.DescBuilder{
|
||||||
|
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||||
|
RawDescriptor: unsafe.Slice(unsafe.StringData(file_bloomfile_proto_rawDesc), len(file_bloomfile_proto_rawDesc)),
|
||||||
|
NumEnums: 0,
|
||||||
|
NumMessages: 1,
|
||||||
|
NumExtensions: 0,
|
||||||
|
NumServices: 0,
|
||||||
|
},
|
||||||
|
GoTypes: file_bloomfile_proto_goTypes,
|
||||||
|
DependencyIndexes: file_bloomfile_proto_depIdxs,
|
||||||
|
MessageInfos: file_bloomfile_proto_msgTypes,
|
||||||
|
}.Build()
|
||||||
|
File_bloomfile_proto = out.File
|
||||||
|
file_bloomfile_proto_goTypes = nil
|
||||||
|
file_bloomfile_proto_depIdxs = nil
|
||||||
|
}
|
||||||
15
internal/bloom/bloomfile.proto
Normal file
15
internal/bloom/bloomfile.proto
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package bloom;
|
||||||
|
|
||||||
|
option go_package = "internal/bloom;bloom";
|
||||||
|
|
||||||
|
|
||||||
|
message Header {
|
||||||
|
uint64 Version = 1 ; // 版本,当前为1
|
||||||
|
uint64 M = 2 ; // 存贮空间上限
|
||||||
|
uint64 K = 3 ; // hash函数个数
|
||||||
|
uint64 ElementsMax = 4 ; // 创建空间元素数量
|
||||||
|
uint64 ElementsAdded = 5 ; // 实际加入元素数量
|
||||||
|
double FalsePositiveRate = 6 ; // 假阳率
|
||||||
|
}
|
||||||
1
internal/bloom/makeproto.sh
Executable file
1
internal/bloom/makeproto.sh
Executable file
@@ -0,0 +1 @@
|
|||||||
|
protoc --proto_path=. *.proto --go_out=. --go_opt=paths=source_relative
|
||||||
72
internal/bloom/murmur.go
Normal file
72
internal/bloom/murmur.go
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
// Copyright 2013, Sébastien Paolacci. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package murmur3 provides an amd64 native (Go generic fallback)
|
||||||
|
// implementation of the murmur3 hash algorithm for strings and slices.
|
||||||
|
//
|
||||||
|
// Assembly is provided for amd64 go1.5+; pull requests are welcome for other
|
||||||
|
// architectures.
|
||||||
|
package bloom
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
type bmixer interface {
|
||||||
|
bmix(p []byte) (tail []byte)
|
||||||
|
Size() (n int)
|
||||||
|
reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
type digest struct {
|
||||||
|
clen int // Digested input cumulative length.
|
||||||
|
tail []byte // 0 to Size()-1 bytes view of `buf'.
|
||||||
|
buf [16]byte // Expected (but not required) to be Size() large.
|
||||||
|
bmixer
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *digest) BlockSize() int { return 1 }
|
||||||
|
|
||||||
|
func (d *digest) Write(p []byte) (n int, err error) {
|
||||||
|
n = len(p)
|
||||||
|
d.clen += n
|
||||||
|
|
||||||
|
if len(d.tail) > 0 {
|
||||||
|
// Stick back pending bytes.
|
||||||
|
nfree := d.Size() - len(d.tail) // nfree ∈ [1, d.Size()-1].
|
||||||
|
if nfree < len(p) {
|
||||||
|
// One full block can be formed.
|
||||||
|
block := append(d.tail, p[:nfree]...)
|
||||||
|
p = p[nfree:]
|
||||||
|
_ = d.bmix(block) // No tail.
|
||||||
|
} else {
|
||||||
|
// Tail's buf is large enough to prevent reallocs.
|
||||||
|
p = append(d.tail, p...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
d.tail = d.bmix(p)
|
||||||
|
|
||||||
|
// Keep own copy of the 0 to Size()-1 pending bytes.
|
||||||
|
nn := copy(d.buf[:], d.tail)
|
||||||
|
d.tail = d.buf[:nn]
|
||||||
|
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *digest) Reset() {
|
||||||
|
d.clen = 0
|
||||||
|
d.tail = nil
|
||||||
|
d.bmixer.reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
func strslice(slice []byte) string {
|
||||||
|
var str string
|
||||||
|
slicehdr := ((*reflect.SliceHeader)(unsafe.Pointer(&slice)))
|
||||||
|
strhdr := (*reflect.StringHeader)(unsafe.Pointer(&str))
|
||||||
|
strhdr.Data = slicehdr.Data
|
||||||
|
strhdr.Len = slicehdr.Len
|
||||||
|
return str
|
||||||
|
}
|
||||||
182
internal/bloom/murmur128.go
Normal file
182
internal/bloom/murmur128.go
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
package bloom
|
||||||
|
|
||||||
|
import (
|
||||||
|
"hash"
|
||||||
|
"math/bits"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
c1_128 = 0x87c37b91114253d5
|
||||||
|
c2_128 = 0x4cf5ad432745937f
|
||||||
|
)
|
||||||
|
|
||||||
|
// Make sure interfaces are correctly implemented.
|
||||||
|
var (
|
||||||
|
_ hash.Hash = new(digest128)
|
||||||
|
_ Hash128 = new(digest128)
|
||||||
|
_ bmixer = new(digest128)
|
||||||
|
)
|
||||||
|
|
||||||
|
// Hash128 provides an interface for a streaming 128 bit hash.
|
||||||
|
type Hash128 interface {
|
||||||
|
hash.Hash
|
||||||
|
Sum128() (uint64, uint64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// digest128 represents a partial evaluation of a 128 bites hash.
|
||||||
|
type digest128 struct {
|
||||||
|
digest
|
||||||
|
seed1 uint64
|
||||||
|
seed2 uint64
|
||||||
|
h1 uint64 // Unfinalized running hash part 1.
|
||||||
|
h2 uint64 // Unfinalized running hash part 2.
|
||||||
|
}
|
||||||
|
|
||||||
|
// SeedNew128 returns a Hash128 for streaming 128 bit sums with its internal
|
||||||
|
// digests initialized to seed1 and seed2.
|
||||||
|
//
|
||||||
|
// The canonical implementation allows one only uint32 seed; to imitate that
|
||||||
|
// behavior, use the same, uint32-max seed for seed1 and seed2.
|
||||||
|
func SeedNew128(seed1, seed2 uint64) Hash128 {
|
||||||
|
d := &digest128{seed1: seed1, seed2: seed2}
|
||||||
|
d.bmixer = d
|
||||||
|
d.Reset()
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// New128 returns a Hash128 for streaming 128 bit sums.
|
||||||
|
func New128() Hash128 {
|
||||||
|
return SeedNew128(0, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *digest128) Size() int { return 16 }
|
||||||
|
|
||||||
|
func (d *digest128) reset() { d.h1, d.h2 = d.seed1, d.seed2 }
|
||||||
|
|
||||||
|
func (d *digest128) Sum(b []byte) []byte {
|
||||||
|
h1, h2 := d.Sum128()
|
||||||
|
return append(b,
|
||||||
|
byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32),
|
||||||
|
byte(h1>>24), byte(h1>>16), byte(h1>>8), byte(h1),
|
||||||
|
|
||||||
|
byte(h2>>56), byte(h2>>48), byte(h2>>40), byte(h2>>32),
|
||||||
|
byte(h2>>24), byte(h2>>16), byte(h2>>8), byte(h2),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *digest128) bmix(p []byte) (tail []byte) {
|
||||||
|
h1, h2 := d.h1, d.h2
|
||||||
|
|
||||||
|
for len(p) >= 16 {
|
||||||
|
k1 := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 | uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
|
||||||
|
k2 := uint64(p[8]) | uint64(p[9])<<8 | uint64(p[10])<<16 | uint64(p[11])<<24 | uint64(p[12])<<32 | uint64(p[13])<<40 | uint64(p[14])<<48 | uint64(p[15])<<56
|
||||||
|
p = p[16:]
|
||||||
|
|
||||||
|
k1 *= c1_128
|
||||||
|
k1 = bits.RotateLeft64(k1, 31)
|
||||||
|
k1 *= c2_128
|
||||||
|
h1 ^= k1
|
||||||
|
|
||||||
|
h1 = bits.RotateLeft64(h1, 27)
|
||||||
|
h1 += h2
|
||||||
|
h1 = h1*5 + 0x52dce729
|
||||||
|
|
||||||
|
k2 *= c2_128
|
||||||
|
k2 = bits.RotateLeft64(k2, 33)
|
||||||
|
k2 *= c1_128
|
||||||
|
h2 ^= k2
|
||||||
|
|
||||||
|
h2 = bits.RotateLeft64(h2, 31)
|
||||||
|
h2 += h1
|
||||||
|
h2 = h2*5 + 0x38495ab5
|
||||||
|
}
|
||||||
|
d.h1, d.h2 = h1, h2
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *digest128) Sum128() (h1, h2 uint64) {
|
||||||
|
|
||||||
|
h1, h2 = d.h1, d.h2
|
||||||
|
|
||||||
|
var k1, k2 uint64
|
||||||
|
switch len(d.tail) & 15 {
|
||||||
|
case 15:
|
||||||
|
k2 ^= uint64(d.tail[14]) << 48
|
||||||
|
fallthrough
|
||||||
|
case 14:
|
||||||
|
k2 ^= uint64(d.tail[13]) << 40
|
||||||
|
fallthrough
|
||||||
|
case 13:
|
||||||
|
k2 ^= uint64(d.tail[12]) << 32
|
||||||
|
fallthrough
|
||||||
|
case 12:
|
||||||
|
k2 ^= uint64(d.tail[11]) << 24
|
||||||
|
fallthrough
|
||||||
|
case 11:
|
||||||
|
k2 ^= uint64(d.tail[10]) << 16
|
||||||
|
fallthrough
|
||||||
|
case 10:
|
||||||
|
k2 ^= uint64(d.tail[9]) << 8
|
||||||
|
fallthrough
|
||||||
|
case 9:
|
||||||
|
k2 ^= uint64(d.tail[8]) << 0
|
||||||
|
|
||||||
|
k2 *= c2_128
|
||||||
|
k2 = bits.RotateLeft64(k2, 33)
|
||||||
|
k2 *= c1_128
|
||||||
|
h2 ^= k2
|
||||||
|
|
||||||
|
fallthrough
|
||||||
|
|
||||||
|
case 8:
|
||||||
|
k1 ^= uint64(d.tail[7]) << 56
|
||||||
|
fallthrough
|
||||||
|
case 7:
|
||||||
|
k1 ^= uint64(d.tail[6]) << 48
|
||||||
|
fallthrough
|
||||||
|
case 6:
|
||||||
|
k1 ^= uint64(d.tail[5]) << 40
|
||||||
|
fallthrough
|
||||||
|
case 5:
|
||||||
|
k1 ^= uint64(d.tail[4]) << 32
|
||||||
|
fallthrough
|
||||||
|
case 4:
|
||||||
|
k1 ^= uint64(d.tail[3]) << 24
|
||||||
|
fallthrough
|
||||||
|
case 3:
|
||||||
|
k1 ^= uint64(d.tail[2]) << 16
|
||||||
|
fallthrough
|
||||||
|
case 2:
|
||||||
|
k1 ^= uint64(d.tail[1]) << 8
|
||||||
|
fallthrough
|
||||||
|
case 1:
|
||||||
|
k1 ^= uint64(d.tail[0]) << 0
|
||||||
|
k1 *= c1_128
|
||||||
|
k1 = bits.RotateLeft64(k1, 31)
|
||||||
|
k1 *= c2_128
|
||||||
|
h1 ^= k1
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 ^= uint64(d.clen)
|
||||||
|
h2 ^= uint64(d.clen)
|
||||||
|
|
||||||
|
h1 += h2
|
||||||
|
h2 += h1
|
||||||
|
|
||||||
|
h1 = fmix64(h1)
|
||||||
|
h2 = fmix64(h2)
|
||||||
|
|
||||||
|
h1 += h2
|
||||||
|
h2 += h1
|
||||||
|
|
||||||
|
return h1, h2
|
||||||
|
}
|
||||||
|
|
||||||
|
func fmix64(k uint64) uint64 {
|
||||||
|
k ^= k >> 33
|
||||||
|
k *= 0xff51afd7ed558ccd
|
||||||
|
k ^= k >> 33
|
||||||
|
k *= 0xc4ceb9fe1a85ec53
|
||||||
|
k ^= k >> 33
|
||||||
|
return k
|
||||||
|
}
|
||||||
247
internal/bloom/murmur128_amd64.s
Normal file
247
internal/bloom/murmur128_amd64.s
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
// +build go1.5,amd64
|
||||||
|
|
||||||
|
// SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64)
|
||||||
|
TEXT ·SeedSum128(SB), $0-56
|
||||||
|
MOVQ seed1+0(FP), R12
|
||||||
|
MOVQ seed2+8(FP), R13
|
||||||
|
MOVQ data_base+16(FP), SI
|
||||||
|
MOVQ data_len+24(FP), R9
|
||||||
|
LEAQ h1+40(FP), BX
|
||||||
|
JMP sum128internal<>(SB)
|
||||||
|
|
||||||
|
// Sum128(data []byte) (h1 uint64, h2 uint64)
|
||||||
|
TEXT ·Sum128(SB), $0-40
|
||||||
|
XORQ R12, R12
|
||||||
|
XORQ R13, R13
|
||||||
|
MOVQ data_base+0(FP), SI
|
||||||
|
MOVQ data_len+8(FP), R9
|
||||||
|
LEAQ h1+24(FP), BX
|
||||||
|
JMP sum128internal<>(SB)
|
||||||
|
|
||||||
|
// SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64)
|
||||||
|
TEXT ·SeedStringSum128(SB), $0-48
|
||||||
|
MOVQ seed1+0(FP), R12
|
||||||
|
MOVQ seed2+8(FP), R13
|
||||||
|
MOVQ data_base+16(FP), SI
|
||||||
|
MOVQ data_len+24(FP), R9
|
||||||
|
LEAQ h1+32(FP), BX
|
||||||
|
JMP sum128internal<>(SB)
|
||||||
|
|
||||||
|
// StringSum128(data string) (h1 uint64, h2 uint64)
|
||||||
|
TEXT ·StringSum128(SB), $0-32
|
||||||
|
XORQ R12, R12
|
||||||
|
XORQ R13, R13
|
||||||
|
MOVQ data_base+0(FP), SI
|
||||||
|
MOVQ data_len+8(FP), R9
|
||||||
|
LEAQ h1+16(FP), BX
|
||||||
|
JMP sum128internal<>(SB)
|
||||||
|
|
||||||
|
// Expects:
|
||||||
|
// R12 == h1 uint64 seed
|
||||||
|
// R13 == h2 uint64 seed
|
||||||
|
// SI == &data
|
||||||
|
// R9 == len(data)
|
||||||
|
// BX == &[2]uint64 return
|
||||||
|
TEXT sum128internal<>(SB), $0
|
||||||
|
MOVQ $0x87c37b91114253d5, R14 // c1
|
||||||
|
MOVQ $0x4cf5ad432745937f, R15 // c2
|
||||||
|
|
||||||
|
MOVQ R9, CX
|
||||||
|
ANDQ $-16, CX // cx == data_len - (data_len % 16)
|
||||||
|
|
||||||
|
// for r10 = 0; r10 < cx; r10 += 16 {...
|
||||||
|
XORQ R10, R10
|
||||||
|
|
||||||
|
loop:
|
||||||
|
CMPQ R10, CX
|
||||||
|
JE tail
|
||||||
|
MOVQ (SI)(R10*1), AX
|
||||||
|
MOVQ 8(SI)(R10*1), DX
|
||||||
|
ADDQ $16, R10
|
||||||
|
|
||||||
|
IMULQ R14, AX
|
||||||
|
IMULQ R15, DX
|
||||||
|
|
||||||
|
ROLQ $31, AX
|
||||||
|
ROLQ $33, DX
|
||||||
|
|
||||||
|
IMULQ R15, AX
|
||||||
|
IMULQ R14, DX
|
||||||
|
|
||||||
|
XORQ AX, R12
|
||||||
|
ROLQ $27, R12
|
||||||
|
ADDQ R13, R12
|
||||||
|
XORQ DX, R13
|
||||||
|
ROLQ $31, R13
|
||||||
|
LEAQ 0x52dce729(R12)(R12*4), R12
|
||||||
|
|
||||||
|
ADDQ R12, R13
|
||||||
|
LEAQ 0x38495ab5(R13)(R13*4), R13
|
||||||
|
|
||||||
|
JMP loop
|
||||||
|
|
||||||
|
tail:
|
||||||
|
MOVQ R9, CX
|
||||||
|
ANDQ $0xf, CX
|
||||||
|
JZ finalize // if len % 16 == 0
|
||||||
|
|
||||||
|
XORQ AX, AX
|
||||||
|
|
||||||
|
// poor man's binary tree jump table
|
||||||
|
SUBQ $8, CX
|
||||||
|
JZ tail8
|
||||||
|
JG over8
|
||||||
|
ADDQ $4, CX
|
||||||
|
JZ tail4
|
||||||
|
JG over4
|
||||||
|
ADDQ $2, CX
|
||||||
|
JL tail1
|
||||||
|
JZ tail2
|
||||||
|
JMP tail3
|
||||||
|
|
||||||
|
over4:
|
||||||
|
SUBQ $2, CX
|
||||||
|
JL tail5
|
||||||
|
JZ tail6
|
||||||
|
JMP tail7
|
||||||
|
|
||||||
|
over8:
|
||||||
|
SUBQ $4, CX
|
||||||
|
JZ tail12
|
||||||
|
JG over12
|
||||||
|
ADDQ $2, CX
|
||||||
|
JL tail9
|
||||||
|
JZ tail10
|
||||||
|
JMP tail11
|
||||||
|
|
||||||
|
over12:
|
||||||
|
SUBQ $2, CX
|
||||||
|
JL tail13
|
||||||
|
JZ tail14
|
||||||
|
|
||||||
|
tail15:
|
||||||
|
MOVBQZX 14(SI)(R10*1), AX
|
||||||
|
SALQ $16, AX
|
||||||
|
|
||||||
|
tail14:
|
||||||
|
MOVW 12(SI)(R10*1), AX
|
||||||
|
SALQ $32, AX
|
||||||
|
JMP tail12
|
||||||
|
|
||||||
|
tail13:
|
||||||
|
MOVBQZX 12(SI)(R10*1), AX
|
||||||
|
SALQ $32, AX
|
||||||
|
|
||||||
|
tail12:
|
||||||
|
MOVL 8(SI)(R10*1), DX
|
||||||
|
ORQ DX, AX
|
||||||
|
JMP fintailhigh
|
||||||
|
|
||||||
|
tail11:
|
||||||
|
MOVBQZX 10(SI)(R10*1), AX
|
||||||
|
SALQ $16, AX
|
||||||
|
|
||||||
|
tail10:
|
||||||
|
MOVW 8(SI)(R10*1), AX
|
||||||
|
JMP fintailhigh
|
||||||
|
|
||||||
|
tail9:
|
||||||
|
MOVB 8(SI)(R10*1), AL
|
||||||
|
|
||||||
|
fintailhigh:
|
||||||
|
IMULQ R15, AX
|
||||||
|
ROLQ $33, AX
|
||||||
|
IMULQ R14, AX
|
||||||
|
XORQ AX, R13
|
||||||
|
|
||||||
|
tail8:
|
||||||
|
MOVQ (SI)(R10*1), AX
|
||||||
|
JMP fintaillow
|
||||||
|
|
||||||
|
tail7:
|
||||||
|
MOVBQZX 6(SI)(R10*1), AX
|
||||||
|
SALQ $16, AX
|
||||||
|
|
||||||
|
tail6:
|
||||||
|
MOVW 4(SI)(R10*1), AX
|
||||||
|
SALQ $32, AX
|
||||||
|
JMP tail4
|
||||||
|
|
||||||
|
tail5:
|
||||||
|
MOVBQZX 4(SI)(R10*1), AX
|
||||||
|
SALQ $32, AX
|
||||||
|
|
||||||
|
tail4:
|
||||||
|
MOVL (SI)(R10*1), DX
|
||||||
|
ORQ DX, AX
|
||||||
|
JMP fintaillow
|
||||||
|
|
||||||
|
tail3:
|
||||||
|
MOVBQZX 2(SI)(R10*1), AX
|
||||||
|
SALQ $16, AX
|
||||||
|
|
||||||
|
tail2:
|
||||||
|
MOVW (SI)(R10*1), AX
|
||||||
|
JMP fintaillow
|
||||||
|
|
||||||
|
tail1:
|
||||||
|
MOVB (SI)(R10*1), AL
|
||||||
|
|
||||||
|
fintaillow:
|
||||||
|
IMULQ R14, AX
|
||||||
|
ROLQ $31, AX
|
||||||
|
IMULQ R15, AX
|
||||||
|
XORQ AX, R12
|
||||||
|
|
||||||
|
finalize:
|
||||||
|
XORQ R9, R12
|
||||||
|
XORQ R9, R13
|
||||||
|
|
||||||
|
ADDQ R13, R12
|
||||||
|
ADDQ R12, R13
|
||||||
|
|
||||||
|
// fmix128 (both interleaved)
|
||||||
|
MOVQ R12, DX
|
||||||
|
MOVQ R13, AX
|
||||||
|
|
||||||
|
SHRQ $33, DX
|
||||||
|
SHRQ $33, AX
|
||||||
|
|
||||||
|
XORQ DX, R12
|
||||||
|
XORQ AX, R13
|
||||||
|
|
||||||
|
MOVQ $0xff51afd7ed558ccd, CX
|
||||||
|
|
||||||
|
IMULQ CX, R12
|
||||||
|
IMULQ CX, R13
|
||||||
|
|
||||||
|
MOVQ R12, DX
|
||||||
|
MOVQ R13, AX
|
||||||
|
|
||||||
|
SHRQ $33, DX
|
||||||
|
SHRQ $33, AX
|
||||||
|
|
||||||
|
XORQ DX, R12
|
||||||
|
XORQ AX, R13
|
||||||
|
|
||||||
|
MOVQ $0xc4ceb9fe1a85ec53, CX
|
||||||
|
|
||||||
|
IMULQ CX, R12
|
||||||
|
IMULQ CX, R13
|
||||||
|
|
||||||
|
MOVQ R12, DX
|
||||||
|
MOVQ R13, AX
|
||||||
|
|
||||||
|
SHRQ $33, DX
|
||||||
|
SHRQ $33, AX
|
||||||
|
|
||||||
|
XORQ DX, R12
|
||||||
|
XORQ AX, R13
|
||||||
|
|
||||||
|
ADDQ R13, R12
|
||||||
|
ADDQ R12, R13
|
||||||
|
|
||||||
|
MOVQ R12, (BX)
|
||||||
|
MOVQ R13, 8(BX)
|
||||||
|
RET
|
||||||
|
|
||||||
36
internal/bloom/murmur128_decl.go
Normal file
36
internal/bloom/murmur128_decl.go
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
//go:build go1.5 && amd64
|
||||||
|
// +build go1.5,amd64
|
||||||
|
|
||||||
|
package bloom
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
// Sum128 returns the murmur3 sum of data. It is equivalent to the following
|
||||||
|
// sequence (without the extra burden and the extra allocation):
|
||||||
|
//
|
||||||
|
// hasher := New128()
|
||||||
|
// hasher.Write(data)
|
||||||
|
// return hasher.Sum128()
|
||||||
|
func Sum128(data []byte) (h1 uint64, h2 uint64)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
// SeedSum128 returns the murmur3 sum of data with digests initialized to seed1
|
||||||
|
// and seed2.
|
||||||
|
//
|
||||||
|
// The canonical implementation allows only one uint32 seed; to imitate that
|
||||||
|
// behavior, use the same, uint32-max seed for seed1 and seed2.
|
||||||
|
//
|
||||||
|
// This reads and processes the data in chunks of little endian uint64s;
|
||||||
|
// thus, the returned hashes are portable across architectures.
|
||||||
|
func SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
// StringSum128 is the string version of Sum128.
|
||||||
|
func StringSum128(data string) (h1 uint64, h2 uint64)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
// SeedStringSum128 is the string version of SeedSum128.
|
||||||
|
func SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64)
|
||||||
137
internal/bloom/murmur128_gen.go
Normal file
137
internal/bloom/murmur128_gen.go
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
//go:build !go1.5 || !amd64
|
||||||
|
// +build !go1.5 !amd64
|
||||||
|
|
||||||
|
package bloom
|
||||||
|
|
||||||
|
import "math/bits"
|
||||||
|
|
||||||
|
// SeedSum128 returns the murmur3 sum of data with digests initialized to seed1
|
||||||
|
// and seed2.
|
||||||
|
//
|
||||||
|
// The canonical implementation allows only one uint32 seed; to imitate that
|
||||||
|
// behavior, use the same, uint32-max seed for seed1 and seed2.
|
||||||
|
//
|
||||||
|
// This reads and processes the data in chunks of little endian uint64s;
|
||||||
|
// thus, the returned hashes are portable across architectures.
|
||||||
|
func SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64) {
|
||||||
|
return SeedStringSum128(seed1, seed2, strslice(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum128 returns the murmur3 sum of data. It is equivalent to the following
|
||||||
|
// sequence (without the extra burden and the extra allocation):
|
||||||
|
//
|
||||||
|
// hasher := New128()
|
||||||
|
// hasher.Write(data)
|
||||||
|
// return hasher.Sum128()
|
||||||
|
func Sum128(data []byte) (h1 uint64, h2 uint64) {
|
||||||
|
return SeedStringSum128(0, 0, strslice(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
// StringSum128 is the string version of Sum128.
|
||||||
|
func StringSum128(data string) (h1 uint64, h2 uint64) {
|
||||||
|
return SeedStringSum128(0, 0, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SeedStringSum128 is the string version of SeedSum128.
|
||||||
|
func SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64) {
|
||||||
|
h1, h2 = seed1, seed2
|
||||||
|
clen := len(data)
|
||||||
|
for len(data) >= 16 {
|
||||||
|
// yes, this is faster than using binary.LittleEndian.Uint64
|
||||||
|
k1 := uint64(data[0]) | uint64(data[1])<<8 | uint64(data[2])<<16 | uint64(data[3])<<24 | uint64(data[4])<<32 | uint64(data[5])<<40 | uint64(data[6])<<48 | uint64(data[7])<<56
|
||||||
|
k2 := uint64(data[8]) | uint64(data[9])<<8 | uint64(data[10])<<16 | uint64(data[11])<<24 | uint64(data[12])<<32 | uint64(data[13])<<40 | uint64(data[14])<<48 | uint64(data[15])<<56
|
||||||
|
|
||||||
|
data = data[16:]
|
||||||
|
|
||||||
|
k1 *= c1_128
|
||||||
|
k1 = bits.RotateLeft64(k1, 31)
|
||||||
|
k1 *= c2_128
|
||||||
|
h1 ^= k1
|
||||||
|
|
||||||
|
h1 = bits.RotateLeft64(h1, 27)
|
||||||
|
h1 += h2
|
||||||
|
h1 = h1*5 + 0x52dce729
|
||||||
|
|
||||||
|
k2 *= c2_128
|
||||||
|
k2 = bits.RotateLeft64(k2, 33)
|
||||||
|
k2 *= c1_128
|
||||||
|
h2 ^= k2
|
||||||
|
|
||||||
|
h2 = bits.RotateLeft64(h2, 31)
|
||||||
|
h2 += h1
|
||||||
|
h2 = h2*5 + 0x38495ab5
|
||||||
|
}
|
||||||
|
|
||||||
|
var k1, k2 uint64
|
||||||
|
switch len(data) {
|
||||||
|
case 15:
|
||||||
|
k2 ^= uint64(data[14]) << 48
|
||||||
|
fallthrough
|
||||||
|
case 14:
|
||||||
|
k2 ^= uint64(data[13]) << 40
|
||||||
|
fallthrough
|
||||||
|
case 13:
|
||||||
|
k2 ^= uint64(data[12]) << 32
|
||||||
|
fallthrough
|
||||||
|
case 12:
|
||||||
|
k2 ^= uint64(data[11]) << 24
|
||||||
|
fallthrough
|
||||||
|
case 11:
|
||||||
|
k2 ^= uint64(data[10]) << 16
|
||||||
|
fallthrough
|
||||||
|
case 10:
|
||||||
|
k2 ^= uint64(data[9]) << 8
|
||||||
|
fallthrough
|
||||||
|
case 9:
|
||||||
|
k2 ^= uint64(data[8]) << 0
|
||||||
|
|
||||||
|
k2 *= c2_128
|
||||||
|
k2 = bits.RotateLeft64(k2, 33)
|
||||||
|
k2 *= c1_128
|
||||||
|
h2 ^= k2
|
||||||
|
|
||||||
|
fallthrough
|
||||||
|
|
||||||
|
case 8:
|
||||||
|
k1 ^= uint64(data[7]) << 56
|
||||||
|
fallthrough
|
||||||
|
case 7:
|
||||||
|
k1 ^= uint64(data[6]) << 48
|
||||||
|
fallthrough
|
||||||
|
case 6:
|
||||||
|
k1 ^= uint64(data[5]) << 40
|
||||||
|
fallthrough
|
||||||
|
case 5:
|
||||||
|
k1 ^= uint64(data[4]) << 32
|
||||||
|
fallthrough
|
||||||
|
case 4:
|
||||||
|
k1 ^= uint64(data[3]) << 24
|
||||||
|
fallthrough
|
||||||
|
case 3:
|
||||||
|
k1 ^= uint64(data[2]) << 16
|
||||||
|
fallthrough
|
||||||
|
case 2:
|
||||||
|
k1 ^= uint64(data[1]) << 8
|
||||||
|
fallthrough
|
||||||
|
case 1:
|
||||||
|
k1 ^= uint64(data[0]) << 0
|
||||||
|
k1 *= c1_128
|
||||||
|
k1 = bits.RotateLeft64(k1, 31)
|
||||||
|
k1 *= c2_128
|
||||||
|
h1 ^= k1
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 ^= uint64(clen)
|
||||||
|
h2 ^= uint64(clen)
|
||||||
|
|
||||||
|
h1 += h2
|
||||||
|
h2 += h1
|
||||||
|
|
||||||
|
h1 = fmix64(h1)
|
||||||
|
h2 = fmix64(h2)
|
||||||
|
|
||||||
|
h1 += h2
|
||||||
|
h2 += h1
|
||||||
|
|
||||||
|
return h1, h2
|
||||||
|
}
|
||||||
33
main.go
Normal file
33
main.go
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if err := Run(os.Args[1:]...); err != nil {
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Run(args ...string) error {
|
||||||
|
name, args := ParseCommandName(args)
|
||||||
|
|
||||||
|
// 从参数中解析出命令
|
||||||
|
switch name {
|
||||||
|
case "", "help":
|
||||||
|
return RunHelp(args...)
|
||||||
|
case "makebloom":
|
||||||
|
return RunMakeBloom(args...)
|
||||||
|
case "hittest":
|
||||||
|
return RunHitTest(args...)
|
||||||
|
case "info":
|
||||||
|
return RunInfo(args...)
|
||||||
|
default:
|
||||||
|
err := fmt.Errorf(`unknown command "%s"`+"\n"+`Run 'bloomtool help' for usage`, name)
|
||||||
|
slog.Warn(err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
90
makebloom.go
Normal file
90
makebloom.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"flag"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"git.algo.com.cn/public/bloomtool/internal/bloom"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
FalseRate = 0.00000001 // 误判率 千万分之一
|
||||||
|
)
|
||||||
|
|
||||||
|
func RunMakeBloom(args ...string) error {
|
||||||
|
fs := flag.NewFlagSet("makebloom", flag.ExitOnError)
|
||||||
|
|
||||||
|
txtFile := fs.String("d", "", "device id filename")
|
||||||
|
bmpFile := fs.String("b", "", "bitmap filename for output")
|
||||||
|
elements := fs.Uint64("e", 0, "max elements. (max 100 0000 0000). if 0 then auto")
|
||||||
|
falseRate := fs.Float64("r", FalseRate, "false rate (0.01--0.0000 0000 1)")
|
||||||
|
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
} else if fs.NArg() > 0 || *txtFile == "" || *bmpFile == "" ||
|
||||||
|
*elements > 10000000000 ||
|
||||||
|
*falseRate > 0.01 || *falseRate < 0.000000001 {
|
||||||
|
fs.Usage()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return makeBloom(*txtFile, *bmpFile, *elements, *falseRate)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeBloom(txtFile string, bmpFile string, elements uint64, falseRate float64) error {
|
||||||
|
// 打开设备号文件
|
||||||
|
slog.Info("open source file", "filename", txtFile)
|
||||||
|
tfile, err := os.Open(txtFile)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("open source file error", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer tfile.Close()
|
||||||
|
|
||||||
|
fstat, err := tfile.Stat()
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("source file stat error", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// 计算元素个数,并预留了一些空间。理论上单行md5为32字节,加回车(1个或2个字节)。
|
||||||
|
// 这里取30做安全系数。再加10000个保险
|
||||||
|
maxElements := uint64(0)
|
||||||
|
if elements == 0 {
|
||||||
|
maxElements = uint64((fstat.Size() / 30)) + 10000
|
||||||
|
} else {
|
||||||
|
maxElements = elements
|
||||||
|
}
|
||||||
|
|
||||||
|
// 新建布隆过滤器
|
||||||
|
bloombmp := bloom.NewWithEstimates(maxElements, falseRate)
|
||||||
|
|
||||||
|
// 逐行读取
|
||||||
|
scanner := bufio.NewScanner(tfile)
|
||||||
|
lineCount := 1
|
||||||
|
for scanner.Scan() {
|
||||||
|
if lineCount%100000 == 0 {
|
||||||
|
slog.Info("read line", "lineno", lineCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 转换成bloom bit 写入
|
||||||
|
bloombmp.AddString(scanner.Text())
|
||||||
|
|
||||||
|
lineCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
// 保存文件
|
||||||
|
slog.Info("save bitmap file", "filename", bmpFile)
|
||||||
|
err = bloombmp.SaveToFile(bmpFile)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("save bitmap file error", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("save bitmap file done")
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
23
parse.go
Normal file
23
parse.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
func ParseCommandName(args []string) (string, []string) {
|
||||||
|
var name string
|
||||||
|
if len(args) > 0 {
|
||||||
|
if !strings.HasPrefix(args[0], "-") {
|
||||||
|
name = args[0]
|
||||||
|
} else if args[0] == "-h" || args[0] == "-help" || args[0] == "--help" {
|
||||||
|
name = "help"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if name == "help" && len(args) > 2 && !strings.HasPrefix(args[1], "-") {
|
||||||
|
return args[1], []string{"-h"}
|
||||||
|
}
|
||||||
|
|
||||||
|
if name != "" {
|
||||||
|
return name, args[1:]
|
||||||
|
}
|
||||||
|
return "", args
|
||||||
|
}
|
||||||
19
vendor/github.com/RoaringBitmap/roaring/.drone.yml
generated
vendored
Normal file
19
vendor/github.com/RoaringBitmap/roaring/.drone.yml
generated
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
kind: pipeline
|
||||||
|
name: default
|
||||||
|
|
||||||
|
workspace:
|
||||||
|
base: /go
|
||||||
|
path: src/github.com/RoaringBitmap/roaring
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: test
|
||||||
|
image: golang
|
||||||
|
commands:
|
||||||
|
- go get -t
|
||||||
|
- go test
|
||||||
|
- go build -tags appengine
|
||||||
|
- go test -tags appengine
|
||||||
|
- GOARCH=386 go build
|
||||||
|
- GOARCH=386 go test
|
||||||
|
- GOARCH=arm go build
|
||||||
|
- GOARCH=arm64 go build
|
||||||
5
vendor/github.com/RoaringBitmap/roaring/.gitignore
generated
vendored
Normal file
5
vendor/github.com/RoaringBitmap/roaring/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
*~
|
||||||
|
roaring-fuzz.zip
|
||||||
|
workdir
|
||||||
|
coverage.out
|
||||||
|
testdata/all3.classic
|
||||||
0
vendor/github.com/RoaringBitmap/roaring/.gitmodules
generated
vendored
Normal file
0
vendor/github.com/RoaringBitmap/roaring/.gitmodules
generated
vendored
Normal file
11
vendor/github.com/RoaringBitmap/roaring/AUTHORS
generated
vendored
Normal file
11
vendor/github.com/RoaringBitmap/roaring/AUTHORS
generated
vendored
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# This is the official list of roaring authors for copyright purposes.
|
||||||
|
|
||||||
|
Todd Gruben (@tgruben),
|
||||||
|
Daniel Lemire (@lemire),
|
||||||
|
Elliot Murphy (@statik),
|
||||||
|
Bob Potter (@bpot),
|
||||||
|
Tyson Maly (@tvmaly),
|
||||||
|
Will Glynn (@willglynn),
|
||||||
|
Brent Pedersen (@brentp)
|
||||||
|
Maciej Biłas (@maciej),
|
||||||
|
Joe Nall (@joenall)
|
||||||
18
vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS
generated
vendored
Normal file
18
vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS
generated
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# This is the official list of roaring contributors
|
||||||
|
|
||||||
|
Todd Gruben (@tgruben),
|
||||||
|
Daniel Lemire (@lemire),
|
||||||
|
Elliot Murphy (@statik),
|
||||||
|
Bob Potter (@bpot),
|
||||||
|
Tyson Maly (@tvmaly),
|
||||||
|
Will Glynn (@willglynn),
|
||||||
|
Brent Pedersen (@brentp),
|
||||||
|
Jason E. Aten (@glycerine),
|
||||||
|
Vali Malinoiu (@0x4139),
|
||||||
|
Forud Ghafouri (@fzerorubigd),
|
||||||
|
Joe Nall (@joenall),
|
||||||
|
(@fredim),
|
||||||
|
Edd Robinson (@e-dard),
|
||||||
|
Alexander Petrov (@alldroll),
|
||||||
|
Guy Molinari (@guymolinari),
|
||||||
|
Ling Jin (@JinLingChristopher)
|
||||||
235
vendor/github.com/RoaringBitmap/roaring/LICENSE
generated
vendored
Normal file
235
vendor/github.com/RoaringBitmap/roaring/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2016 by the authors
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
|
||||||
|
Portions of runcontainer.go are from the Go standard library, which is licensed
|
||||||
|
under:
|
||||||
|
|
||||||
|
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
202
vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt
generated
vendored
Normal file
202
vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt
generated
vendored
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2016 by the authors
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
413
vendor/github.com/RoaringBitmap/roaring/README.md
generated
vendored
Normal file
413
vendor/github.com/RoaringBitmap/roaring/README.md
generated
vendored
Normal file
@@ -0,0 +1,413 @@
|
|||||||
|
# roaring
|
||||||
|
|
||||||
|
[](https://godoc.org/github.com/RoaringBitmap/roaring) [](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
|
||||||
|
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
=============
|
||||||
|
|
||||||
|
This is a go version of the Roaring bitmap data structure.
|
||||||
|
|
||||||
|
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
|
||||||
|
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Redpanda](https://github.com/redpanda-data/redpanda), [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
|
||||||
|
|
||||||
|
[lucene]: https://lucene.apache.org/
|
||||||
|
[solr]: https://lucene.apache.org/solr/
|
||||||
|
[elasticsearch]: https://www.elastic.co/products/elasticsearch
|
||||||
|
[druid]: https://druid.apache.org/
|
||||||
|
[spark]: https://spark.apache.org/
|
||||||
|
[opensearchserver]: http://www.opensearchserver.com
|
||||||
|
[anacrolix/torrent]: https://github.com/anacrolix/torrent
|
||||||
|
[whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home
|
||||||
|
[pilosa]: https://www.pilosa.com/
|
||||||
|
[kylin]: http://kylin.apache.org/
|
||||||
|
[pinot]: http://github.com/linkedin/pinot/wiki
|
||||||
|
[vsts]: https://www.visualstudio.com/team-services/
|
||||||
|
[atlas]: https://github.com/Netflix/atlas
|
||||||
|
|
||||||
|
Roaring bitmaps are found to work well in many important applications:
|
||||||
|
|
||||||
|
> Use Roaring for bitmap compression whenever possible. Do not use other bitmap compression methods ([Wang et al., SIGMOD 2017](http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf))
|
||||||
|
|
||||||
|
|
||||||
|
The ``roaring`` Go library is used by
|
||||||
|
* [anacrolix/torrent]
|
||||||
|
* [InfluxDB](https://www.influxdata.com)
|
||||||
|
* [Pilosa](https://www.pilosa.com/)
|
||||||
|
* [Bleve](http://www.blevesearch.com)
|
||||||
|
* [Weaviate](https://github.com/weaviate/weaviate)
|
||||||
|
* [lindb](https://github.com/lindb/lindb)
|
||||||
|
* [Elasticell](https://github.com/deepfabric/elasticell)
|
||||||
|
* [SourceGraph](https://github.com/sourcegraph/sourcegraph)
|
||||||
|
* [M3](https://github.com/m3db/m3)
|
||||||
|
* [trident](https://github.com/NetApp/trident)
|
||||||
|
* [Husky](https://www.datadoghq.com/blog/engineering/introducing-husky/)
|
||||||
|
* [FrostDB](https://github.com/polarsignals/frostdb)
|
||||||
|
|
||||||
|
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
|
||||||
|
|
||||||
|
|
||||||
|
There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps
|
||||||
|
from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec).
|
||||||
|
|
||||||
|
|
||||||
|
This code is licensed under Apache License, Version 2.0 (ASL2.0).
|
||||||
|
|
||||||
|
Copyright 2016-... by the authors.
|
||||||
|
|
||||||
|
When should you use a bitmap?
|
||||||
|
===================================
|
||||||
|
|
||||||
|
|
||||||
|
Sets are a fundamental abstraction in
|
||||||
|
software. They can be implemented in various
|
||||||
|
ways, as hash sets, as trees, and so forth.
|
||||||
|
In databases and search engines, sets are often an integral
|
||||||
|
part of indexes. For example, we may need to maintain a set
|
||||||
|
of all documents or rows (represented by numerical identifier)
|
||||||
|
that satisfy some property. Besides adding or removing
|
||||||
|
elements from the set, we need fast functions
|
||||||
|
to compute the intersection, the union, the difference between sets, and so on.
|
||||||
|
|
||||||
|
|
||||||
|
To implement a set
|
||||||
|
of integers, a particularly appealing strategy is the
|
||||||
|
bitmap (also called bitset or bit vector). Using n bits,
|
||||||
|
we can represent any set made of the integers from the range
|
||||||
|
[0,n): the ith bit is set to one if integer i is present in the set.
|
||||||
|
Commodity processors use words of W=32 or W=64 bits. By combining many such words, we can
|
||||||
|
support large values of n. Intersections, unions and differences can then be implemented
|
||||||
|
as bitwise AND, OR and ANDNOT operations.
|
||||||
|
More complicated set functions can also be implemented as bitwise operations.
|
||||||
|
|
||||||
|
When the bitset approach is applicable, it can be orders of
|
||||||
|
magnitude faster than other possible implementation of a set (e.g., as a hash set)
|
||||||
|
while using several times less memory.
|
||||||
|
|
||||||
|
However, a bitset, even a compressed one is not always applicable. For example, if
|
||||||
|
you have 1000 random-looking integers, then a simple array might be the best representation.
|
||||||
|
We refer to this case as the "sparse" scenario.
|
||||||
|
|
||||||
|
When should you use compressed bitmaps?
|
||||||
|
===================================
|
||||||
|
|
||||||
|
An uncompressed BitSet can use a lot of memory. For example, if you take a BitSet
|
||||||
|
and set the bit at position 1,000,000 to true and you have just over 100kB. That is over 100kB
|
||||||
|
to store the position of one bit. This is wasteful even if you do not care about memory:
|
||||||
|
suppose that you need to compute the intersection between this BitSet and another one
|
||||||
|
that has a bit at position 1,000,001 to true, then you need to go through all these zeroes,
|
||||||
|
whether you like it or not. That can become very wasteful.
|
||||||
|
|
||||||
|
This being said, there are definitively cases where attempting to use compressed bitmaps is wasteful.
|
||||||
|
For example, if you have a small universe size. E.g., your bitmaps represent sets of integers
|
||||||
|
from [0,n) where n is small (e.g., n=64 or n=128). If you can use uncompressed BitSet and
|
||||||
|
it does not blow up your memory usage, then compressed bitmaps are probably not useful
|
||||||
|
to you. In fact, if you do not need compression, then a BitSet offers remarkable speed.
|
||||||
|
|
||||||
|
The sparse scenario is another use case where compressed bitmaps should not be used.
|
||||||
|
Keep in mind that random-looking data is usually not compressible. E.g., if you have a small set of
|
||||||
|
32-bit random integers, it is not mathematically possible to use far less than 32 bits per integer,
|
||||||
|
and attempts at compression can be counterproductive.
|
||||||
|
|
||||||
|
How does Roaring compares with the alternatives?
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
|
||||||
|
Most alternatives to Roaring are part of a larger family of compressed bitmaps that are run-length-encoded
|
||||||
|
bitmaps. They identify long runs of 1s or 0s and they represent them with a marker word.
|
||||||
|
If you have a local mix of 1s and 0, you use an uncompressed word.
|
||||||
|
|
||||||
|
There are many formats in this family:
|
||||||
|
|
||||||
|
* Oracle's BBC is an obsolete format at this point: though it may provide good compression,
|
||||||
|
it is likely much slower than more recent alternatives due to excessive branching.
|
||||||
|
* WAH is a patented variation on BBC that provides better performance.
|
||||||
|
* Concise is a variation on the patented WAH. It some specific instances, it can compress
|
||||||
|
much better than WAH (up to 2x better), but it is generally slower.
|
||||||
|
* EWAH is both free of patent, and it is faster than all the above. On the downside, it
|
||||||
|
does not compress quite as well. It is faster because it allows some form of "skipping"
|
||||||
|
over uncompressed words. So though none of these formats are great at random access, EWAH
|
||||||
|
is better than the alternatives.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
There is a big problem with these formats however that can hurt you badly in some cases: there is no random access. If you want to check whether a given value is present in the set, you have to start from the beginning and "uncompress" the whole thing. This means that if you want to intersect a big set with a large set, you still have to uncompress the whole big set in the worst case...
|
||||||
|
|
||||||
|
Roaring solves this problem. It works in the following manner. It divides the data into chunks of 2<sup>16</sup> integers
|
||||||
|
(e.g., [0, 2<sup>16</sup>), [2<sup>16</sup>, 2 x 2<sup>16</sup>), ...). Within a chunk, it can use an uncompressed bitmap, a simple list of integers,
|
||||||
|
or a list of runs. Whatever format it uses, they all allow you to check for the presence of any one value quickly
|
||||||
|
(e.g., with a binary search). The net result is that Roaring can compute many operations much faster than run-length-encoded
|
||||||
|
formats like WAH, EWAH, Concise... Maybe surprisingly, Roaring also generally offers better compression ratios.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### References
|
||||||
|
|
||||||
|
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
|
||||||
|
- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
|
||||||
|
Better bitmap performance with Roaring bitmaps,
|
||||||
|
Software: Practice and Experience 46 (5), 2016.[arXiv:1402.6407](http://arxiv.org/abs/1402.6407) This paper used data from http://lemire.me/data/realroaring2014.html
|
||||||
|
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. [arXiv:1603.06549](http://arxiv.org/abs/1603.06549)
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
|
||||||
|
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
|
||||||
|
|
||||||
|
they include
|
||||||
|
- github.com/bits-and-blooms/bitset
|
||||||
|
- github.com/mschoch/smat
|
||||||
|
- github.com/glycerine/go-unsnap-stream
|
||||||
|
- github.com/philhofer/fwd
|
||||||
|
- github.com/jtolds/gls
|
||||||
|
|
||||||
|
Note that the smat library requires Go 1.6 or better.
|
||||||
|
|
||||||
|
#### Installation
|
||||||
|
|
||||||
|
- go get -t github.com/RoaringBitmap/roaring
|
||||||
|
|
||||||
|
### Instructions for contributors
|
||||||
|
|
||||||
|
Using bash or other common shells:
|
||||||
|
```
|
||||||
|
$ git clone git@github.com:RoaringBitmap/roaring.git
|
||||||
|
$ export GO111MODULE=on
|
||||||
|
$ go mod tidy
|
||||||
|
$ go test -v
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
Here is a simplified but complete example:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/RoaringBitmap/roaring"
|
||||||
|
"bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// example inspired by https://github.com/fzandona/goroar
|
||||||
|
fmt.Println("==roaring==")
|
||||||
|
rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||||
|
fmt.Println(rb1.String())
|
||||||
|
|
||||||
|
rb2 := roaring.BitmapOf(3, 4, 1000)
|
||||||
|
fmt.Println(rb2.String())
|
||||||
|
|
||||||
|
rb3 := roaring.New()
|
||||||
|
fmt.Println(rb3.String())
|
||||||
|
|
||||||
|
fmt.Println("Cardinality: ", rb1.GetCardinality())
|
||||||
|
|
||||||
|
fmt.Println("Contains 3? ", rb1.Contains(3))
|
||||||
|
|
||||||
|
rb1.And(rb2)
|
||||||
|
|
||||||
|
rb3.Add(1)
|
||||||
|
rb3.Add(5)
|
||||||
|
|
||||||
|
rb3.Or(rb1)
|
||||||
|
|
||||||
|
// computes union of the three bitmaps in parallel using 4 workers
|
||||||
|
roaring.ParOr(4, rb1, rb2, rb3)
|
||||||
|
// computes intersection of the three bitmaps in parallel using 4 workers
|
||||||
|
roaring.ParAnd(4, rb1, rb2, rb3)
|
||||||
|
|
||||||
|
|
||||||
|
// prints 1, 3, 4, 5, 1000
|
||||||
|
i := rb3.Iterator()
|
||||||
|
for i.HasNext() {
|
||||||
|
fmt.Println(i.Next())
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
|
||||||
|
// next we include an example of serialization
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
rb1.WriteTo(buf) // we omit error handling
|
||||||
|
newrb:= roaring.New()
|
||||||
|
newrb.ReadFrom(buf)
|
||||||
|
if rb1.Equals(newrb) {
|
||||||
|
fmt.Println("I wrote the content to a byte stream and read it back.")
|
||||||
|
}
|
||||||
|
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If you wish to use serialization and handle errors, you might want to
|
||||||
|
consider the following sample of code:
|
||||||
|
|
||||||
|
```go
|
||||||
|
rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
size,err:=rb.WriteTo(buf)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Failed writing")
|
||||||
|
}
|
||||||
|
newrb:= New()
|
||||||
|
size,err=newrb.ReadFrom(buf)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Failed reading")
|
||||||
|
}
|
||||||
|
if ! rb.Equals(newrb) {
|
||||||
|
t.Errorf("Cannot retrieve serialized version")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Given N integers in [0,x), then the serialized size in bytes of
|
||||||
|
a Roaring bitmap should never exceed this bound:
|
||||||
|
|
||||||
|
`` 8 + 9 * ((long)x+65535)/65536 + 2 * N ``
|
||||||
|
|
||||||
|
That is, given a fixed overhead for the universe size (x), Roaring
|
||||||
|
bitmaps never use more than 2 bytes per integer. You can call
|
||||||
|
``BoundSerializedSizeInBytes`` for a more precise estimate.
|
||||||
|
|
||||||
|
### 64-bit Roaring
|
||||||
|
|
||||||
|
By default, roaring is used to stored unsigned 32-bit integers. However, we also offer
|
||||||
|
an extension dedicated to 64-bit integers. It supports roughly the same functions:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/RoaringBitmap/roaring/roaring64"
|
||||||
|
"bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// example inspired by https://github.com/fzandona/goroar
|
||||||
|
fmt.Println("==roaring64==")
|
||||||
|
rb1 := roaring64.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||||
|
fmt.Println(rb1.String())
|
||||||
|
|
||||||
|
rb2 := roaring64.BitmapOf(3, 4, 1000)
|
||||||
|
fmt.Println(rb2.String())
|
||||||
|
|
||||||
|
rb3 := roaring64.New()
|
||||||
|
fmt.Println(rb3.String())
|
||||||
|
|
||||||
|
fmt.Println("Cardinality: ", rb1.GetCardinality())
|
||||||
|
|
||||||
|
fmt.Println("Contains 3? ", rb1.Contains(3))
|
||||||
|
|
||||||
|
rb1.And(rb2)
|
||||||
|
|
||||||
|
rb3.Add(1)
|
||||||
|
rb3.Add(5)
|
||||||
|
|
||||||
|
rb3.Or(rb1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// prints 1, 3, 4, 5, 1000
|
||||||
|
i := rb3.Iterator()
|
||||||
|
for i.HasNext() {
|
||||||
|
fmt.Println(i.Next())
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
|
||||||
|
// next we include an example of serialization
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
rb1.WriteTo(buf) // we omit error handling
|
||||||
|
newrb:= roaring64.New()
|
||||||
|
newrb.ReadFrom(buf)
|
||||||
|
if rb1.Equals(newrb) {
|
||||||
|
fmt.Println("I wrote the content to a byte stream and read it back.")
|
||||||
|
}
|
||||||
|
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Only the 32-bit roaring format is standard and cross-operable between Java, C++, C and Go. There is no guarantee that the 64-bit versions are compatible.
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
Current documentation is available at https://pkg.go.dev/github.com/RoaringBitmap/roaring and https://pkg.go.dev/github.com/RoaringBitmap/roaring/roaring64
|
||||||
|
|
||||||
|
### Goroutine safety
|
||||||
|
|
||||||
|
In general, it should not generally be considered safe to access
|
||||||
|
the same bitmaps using different goroutines--they are left
|
||||||
|
unsynchronized for performance. Should you want to access
|
||||||
|
a Bitmap from more than one goroutine, you should
|
||||||
|
provide synchronization. Typically this is done by using channels to pass
|
||||||
|
the *Bitmap around (in Go style; so there is only ever one owner),
|
||||||
|
or by using `sync.Mutex` to serialize operations on Bitmaps.
|
||||||
|
|
||||||
|
### Coverage
|
||||||
|
|
||||||
|
We test our software. For a report on our test coverage, see
|
||||||
|
|
||||||
|
https://coveralls.io/github/RoaringBitmap/roaring?branch=master
|
||||||
|
|
||||||
|
### Benchmark
|
||||||
|
|
||||||
|
Type
|
||||||
|
|
||||||
|
go test -bench Benchmark -run -
|
||||||
|
|
||||||
|
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
|
||||||
|
run the following:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
go get github.com/RoaringBitmap/real-roaring-datasets
|
||||||
|
BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
|
||||||
|
```
|
||||||
|
|
||||||
|
### Iterative use
|
||||||
|
|
||||||
|
You can use roaring with gore:
|
||||||
|
|
||||||
|
- go get -u github.com/motemen/gore
|
||||||
|
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
|
||||||
|
- go get github.com/RoaringBitmap/roaring
|
||||||
|
|
||||||
|
```go
|
||||||
|
$ gore
|
||||||
|
gore version 0.2.6 :help for help
|
||||||
|
gore> :import github.com/RoaringBitmap/roaring
|
||||||
|
gore> x:=roaring.New()
|
||||||
|
gore> x.Add(1)
|
||||||
|
gore> x.String()
|
||||||
|
"{1}"
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Fuzzy testing
|
||||||
|
|
||||||
|
You can help us test further the library with fuzzy testing:
|
||||||
|
|
||||||
|
go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||||
|
go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||||
|
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||||
|
go-fuzz-build github.com/RoaringBitmap/roaring
|
||||||
|
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 -func FuzzSmat
|
||||||
|
|
||||||
|
Let it run, and if the # of crashers is > 0, check out the reports in
|
||||||
|
the workdir where you should be able to find the panic goroutine stack
|
||||||
|
traces.
|
||||||
|
|
||||||
|
You may also replace `-func FuzzSmat` by `-func FuzzSerializationBuffer` or `-func FuzzSerializationStream`.
|
||||||
|
|
||||||
|
### Alternative in Go
|
||||||
|
|
||||||
|
There is a Go version wrapping the C/C++ implementation https://github.com/RoaringBitmap/gocroaring
|
||||||
|
|
||||||
|
For an alternative implementation in Go, see https://github.com/fzandona/goroar
|
||||||
|
The two versions were written independently.
|
||||||
|
|
||||||
|
|
||||||
|
### Mailing list/discussion group
|
||||||
|
|
||||||
|
https://groups.google.com/forum/#!forum/roaring-bitmaps
|
||||||
1101
vendor/github.com/RoaringBitmap/roaring/arraycontainer.go
generated
vendored
Normal file
1101
vendor/github.com/RoaringBitmap/roaring/arraycontainer.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1236
vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go
generated
vendored
Normal file
1236
vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
13
vendor/github.com/RoaringBitmap/roaring/clz.go
generated
vendored
Normal file
13
vendor/github.com/RoaringBitmap/roaring/clz.go
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
//go:build go1.9
|
||||||
|
// +build go1.9
|
||||||
|
|
||||||
|
// "go1.9", from Go version 1.9 onward
|
||||||
|
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
import "math/bits"
|
||||||
|
|
||||||
|
func countLeadingZeros(x uint64) int {
|
||||||
|
return bits.LeadingZeros64(x)
|
||||||
|
}
|
||||||
37
vendor/github.com/RoaringBitmap/roaring/clz_compat.go
generated
vendored
Normal file
37
vendor/github.com/RoaringBitmap/roaring/clz_compat.go
generated
vendored
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
//go:build !go1.9
|
||||||
|
// +build !go1.9
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
// LeadingZeroBits returns the number of consecutive most significant zero
|
||||||
|
// bits of x.
|
||||||
|
func countLeadingZeros(i uint64) int {
|
||||||
|
if i == 0 {
|
||||||
|
return 64
|
||||||
|
}
|
||||||
|
n := 1
|
||||||
|
x := uint32(i >> 32)
|
||||||
|
if x == 0 {
|
||||||
|
n += 32
|
||||||
|
x = uint32(i)
|
||||||
|
}
|
||||||
|
if (x >> 16) == 0 {
|
||||||
|
n += 16
|
||||||
|
x <<= 16
|
||||||
|
}
|
||||||
|
if (x >> 24) == 0 {
|
||||||
|
n += 8
|
||||||
|
x <<= 8
|
||||||
|
}
|
||||||
|
if x>>28 == 0 {
|
||||||
|
n += 4
|
||||||
|
x <<= 4
|
||||||
|
}
|
||||||
|
if x>>30 == 0 {
|
||||||
|
n += 2
|
||||||
|
x <<= 2
|
||||||
|
|
||||||
|
}
|
||||||
|
n -= int(x >> 31)
|
||||||
|
return n
|
||||||
|
}
|
||||||
13
vendor/github.com/RoaringBitmap/roaring/ctz.go
generated
vendored
Normal file
13
vendor/github.com/RoaringBitmap/roaring/ctz.go
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
//go:build go1.9
|
||||||
|
// +build go1.9
|
||||||
|
|
||||||
|
// "go1.9", from Go version 1.9 onward
|
||||||
|
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
import "math/bits"
|
||||||
|
|
||||||
|
func countTrailingZeros(x uint64) int {
|
||||||
|
return bits.TrailingZeros64(x)
|
||||||
|
}
|
||||||
72
vendor/github.com/RoaringBitmap/roaring/ctz_compat.go
generated
vendored
Normal file
72
vendor/github.com/RoaringBitmap/roaring/ctz_compat.go
generated
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
//go:build !go1.9
|
||||||
|
// +build !go1.9
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
// Reuse of portions of go/src/math/big standard lib code
|
||||||
|
// under this license:
|
||||||
|
/*
|
||||||
|
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const deBruijn32 = 0x077CB531
|
||||||
|
|
||||||
|
var deBruijn32Lookup = []byte{
|
||||||
|
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||||
|
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
|
||||||
|
}
|
||||||
|
|
||||||
|
const deBruijn64 = 0x03f79d71b4ca8b09
|
||||||
|
|
||||||
|
var deBruijn64Lookup = []byte{
|
||||||
|
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
||||||
|
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
||||||
|
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
||||||
|
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
|
||||||
|
}
|
||||||
|
|
||||||
|
// trailingZeroBits returns the number of consecutive least significant zero
|
||||||
|
// bits of x.
|
||||||
|
func countTrailingZeros(x uint64) int {
|
||||||
|
// x & -x leaves only the right-most bit set in the word. Let k be the
|
||||||
|
// index of that bit. Since only a single bit is set, the value is two
|
||||||
|
// to the power of k. Multiplying by a power of two is equivalent to
|
||||||
|
// left shifting, in this case by k bits. The de Bruijn constant is
|
||||||
|
// such that all six bit, consecutive substrings are distinct.
|
||||||
|
// Therefore, if we have a left shifted version of this constant we can
|
||||||
|
// find by how many bits it was shifted by looking at which six bit
|
||||||
|
// substring ended up at the top of the word.
|
||||||
|
// (Knuth, volume 4, section 7.3.1)
|
||||||
|
if x == 0 {
|
||||||
|
// We have to special case 0; the fomula
|
||||||
|
// below doesn't work for 0.
|
||||||
|
return 64
|
||||||
|
}
|
||||||
|
return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58])
|
||||||
|
}
|
||||||
313
vendor/github.com/RoaringBitmap/roaring/fastaggregation.go
generated
vendored
Normal file
313
vendor/github.com/RoaringBitmap/roaring/fastaggregation.go
generated
vendored
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"container/heap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Or function that requires repairAfterLazy
|
||||||
|
func lazyOR(x1, x2 *Bitmap) *Bitmap {
|
||||||
|
answer := NewBitmap()
|
||||||
|
pos1 := 0
|
||||||
|
pos2 := 0
|
||||||
|
length1 := x1.highlowcontainer.size()
|
||||||
|
length2 := x2.highlowcontainer.size()
|
||||||
|
main:
|
||||||
|
for (pos1 < length1) && (pos2 < length2) {
|
||||||
|
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||||
|
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||||
|
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
|
||||||
|
pos1++
|
||||||
|
if pos1 == length1 {
|
||||||
|
break main
|
||||||
|
}
|
||||||
|
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||||
|
} else if s1 > s2 {
|
||||||
|
answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
|
||||||
|
pos2++
|
||||||
|
if pos2 == length2 {
|
||||||
|
break main
|
||||||
|
}
|
||||||
|
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||||
|
} else {
|
||||||
|
c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
|
||||||
|
answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
|
||||||
|
pos1++
|
||||||
|
pos2++
|
||||||
|
if (pos1 == length1) || (pos2 == length2) {
|
||||||
|
break main
|
||||||
|
}
|
||||||
|
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||||
|
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pos1 == length1 {
|
||||||
|
answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
|
||||||
|
} else if pos2 == length2 {
|
||||||
|
answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
// In-place Or function that requires repairAfterLazy
|
||||||
|
func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap {
|
||||||
|
pos1 := 0
|
||||||
|
pos2 := 0
|
||||||
|
length1 := x1.highlowcontainer.size()
|
||||||
|
length2 := x2.highlowcontainer.size()
|
||||||
|
main:
|
||||||
|
for (pos1 < length1) && (pos2 < length2) {
|
||||||
|
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||||
|
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||||
|
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
pos1++
|
||||||
|
if pos1 == length1 {
|
||||||
|
break main
|
||||||
|
}
|
||||||
|
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||||
|
} else if s1 > s2 {
|
||||||
|
x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
|
||||||
|
pos2++
|
||||||
|
pos1++
|
||||||
|
length1++
|
||||||
|
if pos2 == length2 {
|
||||||
|
break main
|
||||||
|
}
|
||||||
|
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||||
|
} else {
|
||||||
|
c1 := x1.highlowcontainer.getWritableContainerAtIndex(pos1)
|
||||||
|
x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2))
|
||||||
|
x1.highlowcontainer.needCopyOnWrite[pos1] = false
|
||||||
|
pos1++
|
||||||
|
pos2++
|
||||||
|
if (pos1 == length1) || (pos2 == length2) {
|
||||||
|
break main
|
||||||
|
}
|
||||||
|
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||||
|
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pos1 == length1 {
|
||||||
|
x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
|
||||||
|
}
|
||||||
|
return x1
|
||||||
|
}
|
||||||
|
|
||||||
|
// to be called after lazy aggregates
|
||||||
|
func (x1 *Bitmap) repairAfterLazy() {
|
||||||
|
for pos := 0; pos < x1.highlowcontainer.size(); pos++ {
|
||||||
|
c := x1.highlowcontainer.getContainerAtIndex(pos)
|
||||||
|
switch c.(type) {
|
||||||
|
case *bitmapContainer:
|
||||||
|
if c.(*bitmapContainer).cardinality == invalidCardinality {
|
||||||
|
c = x1.highlowcontainer.getWritableContainerAtIndex(pos)
|
||||||
|
c.(*bitmapContainer).computeCardinality()
|
||||||
|
if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize {
|
||||||
|
x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer())
|
||||||
|
} else if c.(*bitmapContainer).isFull() {
|
||||||
|
x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastAnd computes the intersection between many bitmaps quickly
|
||||||
|
// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
|
||||||
|
// of manually calling "And" many times.
|
||||||
|
//
|
||||||
|
// Performance hints: if you have very large and tiny bitmaps,
|
||||||
|
// it may be beneficial performance-wise to put a tiny bitmap
|
||||||
|
// in first position.
|
||||||
|
func FastAnd(bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
if len(bitmaps) == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
} else if len(bitmaps) == 1 {
|
||||||
|
return bitmaps[0].Clone()
|
||||||
|
}
|
||||||
|
answer := And(bitmaps[0], bitmaps[1])
|
||||||
|
for _, bm := range bitmaps[2:] {
|
||||||
|
answer.And(bm)
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
|
||||||
|
// It might also be faster than calling Or repeatedly.
|
||||||
|
func FastOr(bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
if len(bitmaps) == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
} else if len(bitmaps) == 1 {
|
||||||
|
return bitmaps[0].Clone()
|
||||||
|
}
|
||||||
|
answer := lazyOR(bitmaps[0], bitmaps[1])
|
||||||
|
for _, bm := range bitmaps[2:] {
|
||||||
|
answer = answer.lazyOR(bm)
|
||||||
|
}
|
||||||
|
// here is where repairAfterLazy is called.
|
||||||
|
answer.repairAfterLazy()
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
// HeapOr computes the union between many bitmaps quickly using a heap.
|
||||||
|
// It might be faster than calling Or repeatedly.
|
||||||
|
func HeapOr(bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
if len(bitmaps) == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
}
|
||||||
|
// TODO: for better speed, we could do the operation lazily, see Java implementation
|
||||||
|
pq := make(priorityQueue, len(bitmaps))
|
||||||
|
for i, bm := range bitmaps {
|
||||||
|
pq[i] = &item{bm, i}
|
||||||
|
}
|
||||||
|
heap.Init(&pq)
|
||||||
|
|
||||||
|
for pq.Len() > 1 {
|
||||||
|
x1 := heap.Pop(&pq).(*item)
|
||||||
|
x2 := heap.Pop(&pq).(*item)
|
||||||
|
heap.Push(&pq, &item{Or(x1.value, x2.value), 0})
|
||||||
|
}
|
||||||
|
return heap.Pop(&pq).(*item).value
|
||||||
|
}
|
||||||
|
|
||||||
|
// HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated).
|
||||||
|
// Internally, this function uses a heap.
|
||||||
|
// It might be faster than calling Xor repeatedly.
|
||||||
|
func HeapXor(bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
if len(bitmaps) == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pq := make(priorityQueue, len(bitmaps))
|
||||||
|
for i, bm := range bitmaps {
|
||||||
|
pq[i] = &item{bm, i}
|
||||||
|
}
|
||||||
|
heap.Init(&pq)
|
||||||
|
|
||||||
|
for pq.Len() > 1 {
|
||||||
|
x1 := heap.Pop(&pq).(*item)
|
||||||
|
x2 := heap.Pop(&pq).(*item)
|
||||||
|
heap.Push(&pq, &item{Xor(x1.value, x2.value), 0})
|
||||||
|
}
|
||||||
|
return heap.Pop(&pq).(*item).value
|
||||||
|
}
|
||||||
|
|
||||||
|
// AndAny provides a result equivalent to x1.And(FastOr(bitmaps)).
|
||||||
|
// It's optimized to minimize allocations. It also might be faster than separate calls.
|
||||||
|
func (x1 *Bitmap) AndAny(bitmaps ...*Bitmap) {
|
||||||
|
if len(bitmaps) == 0 {
|
||||||
|
return
|
||||||
|
} else if len(bitmaps) == 1 {
|
||||||
|
x1.And(bitmaps[0])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
type withPos struct {
|
||||||
|
bitmap *roaringArray
|
||||||
|
pos int
|
||||||
|
key uint16
|
||||||
|
}
|
||||||
|
filters := make([]withPos, 0, len(bitmaps))
|
||||||
|
|
||||||
|
for _, b := range bitmaps {
|
||||||
|
if b.highlowcontainer.size() > 0 {
|
||||||
|
filters = append(filters, withPos{
|
||||||
|
bitmap: &b.highlowcontainer,
|
||||||
|
pos: 0,
|
||||||
|
key: b.highlowcontainer.getKeyAtIndex(0),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
basePos := 0
|
||||||
|
intersections := 0
|
||||||
|
keyContainers := make([]container, 0, len(filters))
|
||||||
|
var (
|
||||||
|
tmpArray *arrayContainer
|
||||||
|
tmpBitmap *bitmapContainer
|
||||||
|
minNextKey uint16
|
||||||
|
)
|
||||||
|
|
||||||
|
for basePos < x1.highlowcontainer.size() && len(filters) > 0 {
|
||||||
|
baseKey := x1.highlowcontainer.getKeyAtIndex(basePos)
|
||||||
|
|
||||||
|
// accumulate containers for current key, find next minimal key in filters
|
||||||
|
// and exclude filters that do not have related values anymore
|
||||||
|
i := 0
|
||||||
|
maxPossibleOr := 0
|
||||||
|
minNextKey = MaxUint16
|
||||||
|
for _, f := range filters {
|
||||||
|
if f.key < baseKey {
|
||||||
|
f.pos = f.bitmap.advanceUntil(baseKey, f.pos)
|
||||||
|
if f.pos == f.bitmap.size() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
f.key = f.bitmap.getKeyAtIndex(f.pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
if f.key == baseKey {
|
||||||
|
cont := f.bitmap.getContainerAtIndex(f.pos)
|
||||||
|
keyContainers = append(keyContainers, cont)
|
||||||
|
maxPossibleOr += cont.getCardinality()
|
||||||
|
|
||||||
|
f.pos++
|
||||||
|
if f.pos == f.bitmap.size() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
f.key = f.bitmap.getKeyAtIndex(f.pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
minNextKey = minOfUint16(minNextKey, f.key)
|
||||||
|
filters[i] = f
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
filters = filters[:i]
|
||||||
|
|
||||||
|
if len(keyContainers) == 0 {
|
||||||
|
basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var ored container
|
||||||
|
|
||||||
|
if len(keyContainers) == 1 {
|
||||||
|
ored = keyContainers[0]
|
||||||
|
} else {
|
||||||
|
//TODO: special case for run containers?
|
||||||
|
if maxPossibleOr > arrayDefaultMaxSize {
|
||||||
|
if tmpBitmap == nil {
|
||||||
|
tmpBitmap = newBitmapContainer()
|
||||||
|
}
|
||||||
|
tmpBitmap.resetTo(keyContainers[0])
|
||||||
|
ored = tmpBitmap
|
||||||
|
} else {
|
||||||
|
if tmpArray == nil {
|
||||||
|
tmpArray = newArrayContainerCapacity(maxPossibleOr)
|
||||||
|
}
|
||||||
|
tmpArray.realloc(maxPossibleOr)
|
||||||
|
tmpArray.resetTo(keyContainers[0])
|
||||||
|
ored = tmpArray
|
||||||
|
}
|
||||||
|
for _, c := range keyContainers[1:] {
|
||||||
|
ored = ored.ior(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := x1.highlowcontainer.getWritableContainerAtIndex(basePos).iand(ored)
|
||||||
|
if !result.isEmpty() {
|
||||||
|
x1.highlowcontainer.replaceKeyAndContainerAtIndex(intersections, baseKey, result, false)
|
||||||
|
intersections++
|
||||||
|
}
|
||||||
|
|
||||||
|
keyContainers = keyContainers[:0]
|
||||||
|
basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
|
||||||
|
}
|
||||||
|
|
||||||
|
x1.highlowcontainer.resize(intersections)
|
||||||
|
}
|
||||||
215
vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go
generated
vendored
Normal file
215
vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go
generated
vendored
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
package internal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ByteInput typed interface around io.Reader or raw bytes
|
||||||
|
type ByteInput interface {
|
||||||
|
// Next returns a slice containing the next n bytes from the buffer,
|
||||||
|
// advancing the buffer as if the bytes had been returned by Read.
|
||||||
|
Next(n int) ([]byte, error)
|
||||||
|
// NextReturnsSafeSlice returns true if Next() returns a safe slice as opposed
|
||||||
|
// to a slice that points to an underlying buffer possibly owned by another system.
|
||||||
|
// When NextReturnsSafeSlice returns false, the result from Next() should be copied
|
||||||
|
// before it is modified (i.e., it is immutable).
|
||||||
|
NextReturnsSafeSlice() bool
|
||||||
|
// ReadUInt32 reads uint32 with LittleEndian order
|
||||||
|
ReadUInt32() (uint32, error)
|
||||||
|
// ReadUInt16 reads uint16 with LittleEndian order
|
||||||
|
ReadUInt16() (uint16, error)
|
||||||
|
// GetReadBytes returns read bytes
|
||||||
|
GetReadBytes() int64
|
||||||
|
// SkipBytes skips exactly n bytes
|
||||||
|
SkipBytes(n int) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewByteInputFromReader creates reader wrapper
|
||||||
|
func NewByteInputFromReader(reader io.Reader) ByteInput {
|
||||||
|
return &ByteInputAdapter{
|
||||||
|
r: reader,
|
||||||
|
readBytes: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewByteInput creates raw bytes wrapper
|
||||||
|
func NewByteInput(buf []byte) ByteInput {
|
||||||
|
return &ByteBuffer{
|
||||||
|
buf: buf,
|
||||||
|
off: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ByteBuffer raw bytes wrapper
|
||||||
|
type ByteBuffer struct {
|
||||||
|
buf []byte
|
||||||
|
off int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewByteBuffer creates a new ByteBuffer.
|
||||||
|
func NewByteBuffer(buf []byte) *ByteBuffer {
|
||||||
|
return &ByteBuffer{
|
||||||
|
buf: buf,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ io.Reader = (*ByteBuffer)(nil)
|
||||||
|
|
||||||
|
// Read implements io.Reader.
|
||||||
|
func (b *ByteBuffer) Read(p []byte) (int, error) {
|
||||||
|
data, err := b.Next(len(p))
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
copy(p, data)
|
||||||
|
return len(data), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next returns a slice containing the next n bytes from the reader
|
||||||
|
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
|
||||||
|
func (b *ByteBuffer) Next(n int) ([]byte, error) {
|
||||||
|
m := len(b.buf) - b.off
|
||||||
|
|
||||||
|
if n > m {
|
||||||
|
return nil, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
data := b.buf[b.off : b.off+n]
|
||||||
|
b.off += n
|
||||||
|
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextReturnsSafeSlice returns false since ByteBuffer might hold
|
||||||
|
// an array owned by some other systems.
|
||||||
|
func (b *ByteBuffer) NextReturnsSafeSlice() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadUInt32 reads uint32 with LittleEndian order
|
||||||
|
func (b *ByteBuffer) ReadUInt32() (uint32, error) {
|
||||||
|
if len(b.buf)-b.off < 4 {
|
||||||
|
return 0, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
v := binary.LittleEndian.Uint32(b.buf[b.off:])
|
||||||
|
b.off += 4
|
||||||
|
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadUInt16 reads uint16 with LittleEndian order
|
||||||
|
func (b *ByteBuffer) ReadUInt16() (uint16, error) {
|
||||||
|
if len(b.buf)-b.off < 2 {
|
||||||
|
return 0, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
v := binary.LittleEndian.Uint16(b.buf[b.off:])
|
||||||
|
b.off += 2
|
||||||
|
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetReadBytes returns read bytes
|
||||||
|
func (b *ByteBuffer) GetReadBytes() int64 {
|
||||||
|
return int64(b.off)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipBytes skips exactly n bytes
|
||||||
|
func (b *ByteBuffer) SkipBytes(n int) error {
|
||||||
|
m := len(b.buf) - b.off
|
||||||
|
|
||||||
|
if n > m {
|
||||||
|
return io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
b.off += n
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset resets the given buffer with a new byte slice
|
||||||
|
func (b *ByteBuffer) Reset(buf []byte) {
|
||||||
|
b.buf = buf
|
||||||
|
b.off = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// ByteInputAdapter reader wrapper
|
||||||
|
type ByteInputAdapter struct {
|
||||||
|
r io.Reader
|
||||||
|
readBytes int
|
||||||
|
buf [4]byte
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ io.Reader = (*ByteInputAdapter)(nil)
|
||||||
|
|
||||||
|
// Read implements io.Reader.
|
||||||
|
func (b *ByteInputAdapter) Read(buf []byte) (int, error) {
|
||||||
|
m, err := io.ReadAtLeast(b.r, buf, len(buf))
|
||||||
|
b.readBytes += m
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next returns a slice containing the next n bytes from the buffer,
|
||||||
|
// advancing the buffer as if the bytes had been returned by Read.
|
||||||
|
func (b *ByteInputAdapter) Next(n int) ([]byte, error) {
|
||||||
|
buf := make([]byte, n)
|
||||||
|
_, err := b.Read(buf)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextReturnsSafeSlice returns true since ByteInputAdapter always returns a slice
|
||||||
|
// allocated with make([]byte, ...)
|
||||||
|
func (b *ByteInputAdapter) NextReturnsSafeSlice() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadUInt32 reads uint32 with LittleEndian order
|
||||||
|
func (b *ByteInputAdapter) ReadUInt32() (uint32, error) {
|
||||||
|
buf := b.buf[:4]
|
||||||
|
_, err := b.Read(buf)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return binary.LittleEndian.Uint32(buf), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadUInt16 reads uint16 with LittleEndian order
|
||||||
|
func (b *ByteInputAdapter) ReadUInt16() (uint16, error) {
|
||||||
|
buf := b.buf[:2]
|
||||||
|
_, err := b.Read(buf)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return binary.LittleEndian.Uint16(buf), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetReadBytes returns read bytes
|
||||||
|
func (b *ByteInputAdapter) GetReadBytes() int64 {
|
||||||
|
return int64(b.readBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipBytes skips exactly n bytes
|
||||||
|
func (b *ByteInputAdapter) SkipBytes(n int) error {
|
||||||
|
_, err := b.Next(n)
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset resets the given buffer with a new stream
|
||||||
|
func (b *ByteInputAdapter) Reset(stream io.Reader) {
|
||||||
|
b.r = stream
|
||||||
|
b.readBytes = 0
|
||||||
|
}
|
||||||
21
vendor/github.com/RoaringBitmap/roaring/internal/pools.go
generated
vendored
Normal file
21
vendor/github.com/RoaringBitmap/roaring/internal/pools.go
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package internal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ByteInputAdapterPool shared pool
|
||||||
|
ByteInputAdapterPool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
return &ByteInputAdapter{}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// ByteBufferPool shared pool
|
||||||
|
ByteBufferPool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
return &ByteBuffer{}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
32
vendor/github.com/RoaringBitmap/roaring/manyiterator.go
generated
vendored
Normal file
32
vendor/github.com/RoaringBitmap/roaring/manyiterator.go
generated
vendored
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
type manyIterable interface {
|
||||||
|
nextMany(hs uint32, buf []uint32) int
|
||||||
|
nextMany64(hs uint64, buf []uint64) int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
|
||||||
|
n := 0
|
||||||
|
l := si.loc
|
||||||
|
s := si.slice
|
||||||
|
for n < len(buf) && l < len(s) {
|
||||||
|
buf[n] = uint32(s[l]) | hs
|
||||||
|
l++
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
si.loc = l
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *shortIterator) nextMany64(hs uint64, buf []uint64) int {
|
||||||
|
n := 0
|
||||||
|
l := si.loc
|
||||||
|
s := si.slice
|
||||||
|
for n < len(buf) && l < len(s) {
|
||||||
|
buf[n] = uint64(s[l]) | hs
|
||||||
|
l++
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
si.loc = l
|
||||||
|
return n
|
||||||
|
}
|
||||||
612
vendor/github.com/RoaringBitmap/roaring/parallel.go
generated
vendored
Normal file
612
vendor/github.com/RoaringBitmap/roaring/parallel.go
generated
vendored
Normal file
@@ -0,0 +1,612 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"container/heap"
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
var defaultWorkerCount = runtime.NumCPU()
|
||||||
|
|
||||||
|
type bitmapContainerKey struct {
|
||||||
|
key uint16
|
||||||
|
idx int
|
||||||
|
bitmap *Bitmap
|
||||||
|
}
|
||||||
|
|
||||||
|
type multipleContainers struct {
|
||||||
|
key uint16
|
||||||
|
containers []container
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
type keyedContainer struct {
|
||||||
|
key uint16
|
||||||
|
container container
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
type bitmapContainerHeap []bitmapContainerKey
|
||||||
|
|
||||||
|
func (h bitmapContainerHeap) Len() int { return len(h) }
|
||||||
|
func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
|
||||||
|
func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||||
|
|
||||||
|
func (h *bitmapContainerHeap) Push(x interface{}) {
|
||||||
|
// Push and Pop use pointer receivers because they modify the slice's length,
|
||||||
|
// not just its contents.
|
||||||
|
*h = append(*h, x.(bitmapContainerKey))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *bitmapContainerHeap) Pop() interface{} {
|
||||||
|
old := *h
|
||||||
|
n := len(old)
|
||||||
|
x := old[n-1]
|
||||||
|
*h = old[0 : n-1]
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h bitmapContainerHeap) Peek() bitmapContainerKey {
|
||||||
|
return h[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
|
||||||
|
k := h.Peek()
|
||||||
|
key = k.key
|
||||||
|
container = k.bitmap.highlowcontainer.containers[k.idx]
|
||||||
|
|
||||||
|
newIdx := k.idx + 1
|
||||||
|
if newIdx < k.bitmap.highlowcontainer.size() {
|
||||||
|
k = bitmapContainerKey{
|
||||||
|
k.bitmap.highlowcontainer.keys[newIdx],
|
||||||
|
newIdx,
|
||||||
|
k.bitmap,
|
||||||
|
}
|
||||||
|
(*h)[0] = k
|
||||||
|
heap.Fix(h, 0)
|
||||||
|
} else {
|
||||||
|
heap.Pop(h)
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
|
||||||
|
if h.Len() == 0 {
|
||||||
|
return multipleContainers{}
|
||||||
|
}
|
||||||
|
|
||||||
|
key, container := h.popIncrementing()
|
||||||
|
containers = append(containers, container)
|
||||||
|
|
||||||
|
for h.Len() > 0 && key == h.Peek().key {
|
||||||
|
_, container = h.popIncrementing()
|
||||||
|
containers = append(containers, container)
|
||||||
|
}
|
||||||
|
|
||||||
|
return multipleContainers{
|
||||||
|
key,
|
||||||
|
containers,
|
||||||
|
-1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
|
||||||
|
// Initialize heap
|
||||||
|
var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
|
||||||
|
for _, bitmap := range bitmaps {
|
||||||
|
if !bitmap.IsEmpty() {
|
||||||
|
key := bitmapContainerKey{
|
||||||
|
bitmap.highlowcontainer.keys[0],
|
||||||
|
0,
|
||||||
|
bitmap,
|
||||||
|
}
|
||||||
|
h = append(h, key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
heap.Init(&h)
|
||||||
|
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
|
func repairAfterLazy(c container) container {
|
||||||
|
switch t := c.(type) {
|
||||||
|
case *bitmapContainer:
|
||||||
|
if t.cardinality == invalidCardinality {
|
||||||
|
t.computeCardinality()
|
||||||
|
}
|
||||||
|
|
||||||
|
if t.getCardinality() <= arrayDefaultMaxSize {
|
||||||
|
return t.toArrayContainer()
|
||||||
|
} else if c.(*bitmapContainer).isFull() {
|
||||||
|
return newRunContainer16Range(0, MaxUint16)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
func toBitmapContainer(c container) container {
|
||||||
|
switch t := c.(type) {
|
||||||
|
case *arrayContainer:
|
||||||
|
return t.toBitmapContainer()
|
||||||
|
case *runContainer16:
|
||||||
|
if !t.isFull() {
|
||||||
|
return t.toBitmapContainer()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
|
||||||
|
expectedKeys := -1
|
||||||
|
appendedKeys := 0
|
||||||
|
var keys []uint16
|
||||||
|
var containers []container
|
||||||
|
for appendedKeys != expectedKeys {
|
||||||
|
select {
|
||||||
|
case item := <-resultChan:
|
||||||
|
if len(keys) <= item.idx {
|
||||||
|
keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
|
||||||
|
containers = append(containers, make([]container, item.idx-len(containers)+1)...)
|
||||||
|
}
|
||||||
|
keys[item.idx] = item.key
|
||||||
|
containers[item.idx] = item.container
|
||||||
|
|
||||||
|
appendedKeys++
|
||||||
|
case msg := <-expectedKeysChan:
|
||||||
|
expectedKeys = msg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
answer := &Bitmap{
|
||||||
|
roaringArray{
|
||||||
|
make([]uint16, 0, expectedKeys),
|
||||||
|
make([]container, 0, expectedKeys),
|
||||||
|
make([]bool, 0, expectedKeys),
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for i := range keys {
|
||||||
|
if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
|
||||||
|
answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bitmapChan <- answer
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
|
||||||
|
// where the parameter "parallelism" determines how many workers are to be used
|
||||||
|
// (if it is set to 0, a default number of workers is chosen)
|
||||||
|
// ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
|
||||||
|
func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
|
||||||
|
bitmapCount := len(bitmaps)
|
||||||
|
if bitmapCount == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
} else if bitmapCount == 1 {
|
||||||
|
return bitmaps[0].Clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
if parallelism == 0 {
|
||||||
|
parallelism = defaultWorkerCount
|
||||||
|
}
|
||||||
|
|
||||||
|
h := newBitmapContainerHeap(bitmaps...)
|
||||||
|
|
||||||
|
bitmapChan := make(chan *Bitmap)
|
||||||
|
inputChan := make(chan multipleContainers, 128)
|
||||||
|
resultChan := make(chan keyedContainer, 32)
|
||||||
|
expectedKeysChan := make(chan int)
|
||||||
|
|
||||||
|
pool := sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
return make([]container, 0, len(bitmaps))
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
orFunc := func() {
|
||||||
|
// Assumes only structs with >=2 containers are passed
|
||||||
|
for input := range inputChan {
|
||||||
|
c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
|
||||||
|
for _, next := range input.containers[2:] {
|
||||||
|
c = c.lazyIOR(next)
|
||||||
|
}
|
||||||
|
c = repairAfterLazy(c)
|
||||||
|
kx := keyedContainer{
|
||||||
|
input.key,
|
||||||
|
c,
|
||||||
|
input.idx,
|
||||||
|
}
|
||||||
|
resultChan <- kx
|
||||||
|
pool.Put(input.containers[:0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||||||
|
|
||||||
|
for i := 0; i < parallelism; i++ {
|
||||||
|
go orFunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
idx := 0
|
||||||
|
for h.Len() > 0 {
|
||||||
|
ck := h.Next(pool.Get().([]container))
|
||||||
|
if len(ck.containers) == 1 {
|
||||||
|
resultChan <- keyedContainer{
|
||||||
|
ck.key,
|
||||||
|
ck.containers[0],
|
||||||
|
idx,
|
||||||
|
}
|
||||||
|
pool.Put(ck.containers[:0])
|
||||||
|
} else {
|
||||||
|
ck.idx = idx
|
||||||
|
inputChan <- ck
|
||||||
|
}
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
expectedKeysChan <- idx
|
||||||
|
|
||||||
|
bitmap := <-bitmapChan
|
||||||
|
|
||||||
|
close(inputChan)
|
||||||
|
close(resultChan)
|
||||||
|
close(expectedKeysChan)
|
||||||
|
|
||||||
|
return bitmap
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
|
||||||
|
// where the parameter "parallelism" determines how many workers are to be used
|
||||||
|
// (if it is set to 0, a default number of workers is chosen)
|
||||||
|
func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
bitmapCount := len(bitmaps)
|
||||||
|
if bitmapCount == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
} else if bitmapCount == 1 {
|
||||||
|
return bitmaps[0].Clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
if parallelism == 0 {
|
||||||
|
parallelism = defaultWorkerCount
|
||||||
|
}
|
||||||
|
|
||||||
|
h := newBitmapContainerHeap(bitmaps...)
|
||||||
|
|
||||||
|
bitmapChan := make(chan *Bitmap)
|
||||||
|
inputChan := make(chan multipleContainers, 128)
|
||||||
|
resultChan := make(chan keyedContainer, 32)
|
||||||
|
expectedKeysChan := make(chan int)
|
||||||
|
|
||||||
|
andFunc := func() {
|
||||||
|
// Assumes only structs with >=2 containers are passed
|
||||||
|
for input := range inputChan {
|
||||||
|
c := input.containers[0].and(input.containers[1])
|
||||||
|
for _, next := range input.containers[2:] {
|
||||||
|
if c.isEmpty() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c = c.iand(next)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send a nil explicitly if the result of the intersection is an empty container
|
||||||
|
if c.isEmpty() {
|
||||||
|
c = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
kx := keyedContainer{
|
||||||
|
input.key,
|
||||||
|
c,
|
||||||
|
input.idx,
|
||||||
|
}
|
||||||
|
resultChan <- kx
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||||||
|
|
||||||
|
for i := 0; i < parallelism; i++ {
|
||||||
|
go andFunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
idx := 0
|
||||||
|
for h.Len() > 0 {
|
||||||
|
ck := h.Next(make([]container, 0, 4))
|
||||||
|
if len(ck.containers) == bitmapCount {
|
||||||
|
ck.idx = idx
|
||||||
|
inputChan <- ck
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
expectedKeysChan <- idx
|
||||||
|
|
||||||
|
bitmap := <-bitmapChan
|
||||||
|
|
||||||
|
close(inputChan)
|
||||||
|
close(resultChan)
|
||||||
|
close(expectedKeysChan)
|
||||||
|
|
||||||
|
return bitmap
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParOr computes the union (OR) of all provided bitmaps in parallel,
|
||||||
|
// where the parameter "parallelism" determines how many workers are to be used
|
||||||
|
// (if it is set to 0, a default number of workers is chosen)
|
||||||
|
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
var lKey uint16 = MaxUint16
|
||||||
|
var hKey uint16
|
||||||
|
|
||||||
|
bitmapsFiltered := bitmaps[:0]
|
||||||
|
for _, b := range bitmaps {
|
||||||
|
if !b.IsEmpty() {
|
||||||
|
bitmapsFiltered = append(bitmapsFiltered, b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bitmaps = bitmapsFiltered
|
||||||
|
|
||||||
|
for _, b := range bitmaps {
|
||||||
|
lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
|
||||||
|
hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
|
||||||
|
}
|
||||||
|
|
||||||
|
if lKey == MaxUint16 && hKey == 0 {
|
||||||
|
return New()
|
||||||
|
} else if len(bitmaps) == 1 {
|
||||||
|
return bitmaps[0].Clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
keyRange := int(hKey) - int(lKey) + 1
|
||||||
|
if keyRange == 1 {
|
||||||
|
// revert to FastOr. Since the key range is 0
|
||||||
|
// no container-level aggregation parallelism is achievable
|
||||||
|
return FastOr(bitmaps...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if parallelism == 0 {
|
||||||
|
parallelism = defaultWorkerCount
|
||||||
|
}
|
||||||
|
|
||||||
|
var chunkSize int
|
||||||
|
var chunkCount int
|
||||||
|
if parallelism*4 > int(keyRange) {
|
||||||
|
chunkSize = 1
|
||||||
|
chunkCount = int(keyRange)
|
||||||
|
} else {
|
||||||
|
chunkCount = parallelism * 4
|
||||||
|
chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
|
||||||
|
}
|
||||||
|
|
||||||
|
if chunkCount*chunkSize < int(keyRange) {
|
||||||
|
// it's fine to panic to indicate an implementation error
|
||||||
|
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
|
||||||
|
}
|
||||||
|
|
||||||
|
chunks := make([]*roaringArray, chunkCount)
|
||||||
|
|
||||||
|
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
|
||||||
|
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
|
||||||
|
|
||||||
|
orFunc := func() {
|
||||||
|
for spec := range chunkSpecChan {
|
||||||
|
ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
|
||||||
|
for _, b := range bitmaps[2:] {
|
||||||
|
ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, c := range ra.containers {
|
||||||
|
ra.containers[i] = repairAfterLazy(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkChan <- parChunk{ra, spec.idx}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < parallelism; i++ {
|
||||||
|
go orFunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for i := 0; i < chunkCount; i++ {
|
||||||
|
spec := parChunkSpec{
|
||||||
|
start: uint16(int(lKey) + i*chunkSize),
|
||||||
|
end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
|
||||||
|
idx: int(i),
|
||||||
|
}
|
||||||
|
chunkSpecChan <- spec
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
chunksRemaining := chunkCount
|
||||||
|
for chunk := range chunkChan {
|
||||||
|
chunks[chunk.idx] = chunk.ra
|
||||||
|
chunksRemaining--
|
||||||
|
if chunksRemaining == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(chunkChan)
|
||||||
|
close(chunkSpecChan)
|
||||||
|
|
||||||
|
containerCount := 0
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
containerCount += chunk.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
result := Bitmap{
|
||||||
|
roaringArray{
|
||||||
|
containers: make([]container, containerCount),
|
||||||
|
keys: make([]uint16, containerCount),
|
||||||
|
needCopyOnWrite: make([]bool, containerCount),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resultOffset := 0
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
|
||||||
|
copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
|
||||||
|
copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
|
||||||
|
resultOffset += chunk.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return &result
|
||||||
|
}
|
||||||
|
|
||||||
|
type parChunkSpec struct {
|
||||||
|
start uint16
|
||||||
|
end uint16
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
type parChunk struct {
|
||||||
|
ra *roaringArray
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c parChunk) size() int {
|
||||||
|
return c.ra.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
|
||||||
|
for idx, key := range ra.keys {
|
||||||
|
if key >= start && key <= last {
|
||||||
|
return idx
|
||||||
|
} else if key > last {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ra.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||||||
|
answer := newRoaringArray()
|
||||||
|
length1 := ra1.size()
|
||||||
|
length2 := ra2.size()
|
||||||
|
|
||||||
|
idx1 := parNaiveStartAt(ra1, start, last)
|
||||||
|
idx2 := parNaiveStartAt(ra2, start, last)
|
||||||
|
|
||||||
|
var key1 uint16
|
||||||
|
var key2 uint16
|
||||||
|
if idx1 < length1 && idx2 < length2 {
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
|
||||||
|
for key1 <= last && key2 <= last {
|
||||||
|
|
||||||
|
if key1 < key2 {
|
||||||
|
answer.appendCopy(*ra1, idx1)
|
||||||
|
idx1++
|
||||||
|
if idx1 == length1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
} else if key1 > key2 {
|
||||||
|
answer.appendCopy(*ra2, idx2)
|
||||||
|
idx2++
|
||||||
|
if idx2 == length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
} else {
|
||||||
|
c1 := ra1.getFastContainerAtIndex(idx1, false)
|
||||||
|
|
||||||
|
answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||||||
|
idx1++
|
||||||
|
idx2++
|
||||||
|
if idx1 == length1 || idx2 == length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx2 < length2 {
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
for key2 <= last {
|
||||||
|
answer.appendCopy(*ra2, idx2)
|
||||||
|
idx2++
|
||||||
|
if idx2 == length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx1 < length1 {
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
for key1 <= last {
|
||||||
|
answer.appendCopy(*ra1, idx1)
|
||||||
|
idx1++
|
||||||
|
if idx1 == length1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||||||
|
length1 := ra1.size()
|
||||||
|
length2 := ra2.size()
|
||||||
|
|
||||||
|
idx1 := 0
|
||||||
|
idx2 := parNaiveStartAt(ra2, start, last)
|
||||||
|
|
||||||
|
var key1 uint16
|
||||||
|
var key2 uint16
|
||||||
|
if idx1 < length1 && idx2 < length2 {
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
|
||||||
|
for key1 <= last && key2 <= last {
|
||||||
|
if key1 < key2 {
|
||||||
|
idx1++
|
||||||
|
if idx1 >= length1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
} else if key1 > key2 {
|
||||||
|
ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
|
||||||
|
ra1.needCopyOnWrite[idx1] = true
|
||||||
|
idx2++
|
||||||
|
idx1++
|
||||||
|
length1++
|
||||||
|
if idx2 >= length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
} else {
|
||||||
|
c1 := ra1.getFastContainerAtIndex(idx1, true)
|
||||||
|
|
||||||
|
ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||||||
|
ra1.needCopyOnWrite[idx1] = false
|
||||||
|
idx1++
|
||||||
|
idx2++
|
||||||
|
if idx1 >= length1 || idx2 >= length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if idx2 < length2 {
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
for key2 <= last {
|
||||||
|
ra1.appendCopy(*ra2, idx2)
|
||||||
|
idx2++
|
||||||
|
if idx2 >= length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ra1
|
||||||
|
}
|
||||||
13
vendor/github.com/RoaringBitmap/roaring/popcnt.go
generated
vendored
Normal file
13
vendor/github.com/RoaringBitmap/roaring/popcnt.go
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
//go:build go1.9
|
||||||
|
// +build go1.9
|
||||||
|
|
||||||
|
// "go1.9", from Go version 1.9 onward
|
||||||
|
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
import "math/bits"
|
||||||
|
|
||||||
|
func popcount(x uint64) uint64 {
|
||||||
|
return uint64(bits.OnesCount64(x))
|
||||||
|
}
|
||||||
103
vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s
generated
vendored
Normal file
103
vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
// +build amd64,!appengine,!go1.9
|
||||||
|
|
||||||
|
TEXT ·hasAsm(SB),4,$0-1
|
||||||
|
MOVQ $1, AX
|
||||||
|
CPUID
|
||||||
|
SHRQ $23, CX
|
||||||
|
ANDQ $1, CX
|
||||||
|
MOVB CX, ret+0(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
|
||||||
|
|
||||||
|
TEXT ·popcntSliceAsm(SB),4,$0-32
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntSliceEnd
|
||||||
|
popcntSliceLoop:
|
||||||
|
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
LOOP popcntSliceLoop
|
||||||
|
popcntSliceEnd:
|
||||||
|
MOVQ AX, ret+24(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntMaskSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntMaskSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
NOTQ DX
|
||||||
|
ANDQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntMaskSliceLoop
|
||||||
|
popcntMaskSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntAndSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntAndSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntAndSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
ANDQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntAndSliceLoop
|
||||||
|
popcntAndSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntOrSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntOrSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntOrSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
ORQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntOrSliceLoop
|
||||||
|
popcntOrSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntXorSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntXorSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntXorSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
XORQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntXorSliceLoop
|
||||||
|
popcntXorSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
68
vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go
generated
vendored
Normal file
68
vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go
generated
vendored
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
//go:build amd64 && !appengine && !go1.9
|
||||||
|
// +build amd64,!appengine,!go1.9
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
// *** the following functions are defined in popcnt_amd64.s
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func hasAsm() bool
|
||||||
|
|
||||||
|
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
|
||||||
|
var useAsm = hasAsm()
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntSliceAsm(s []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntMaskSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntAndSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntOrSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntXorSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
func popcntSlice(s []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntSliceAsm(s)
|
||||||
|
}
|
||||||
|
return popcntSliceGo(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntMaskSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntMaskSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntAndSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntAndSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntAndSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntOrSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntOrSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntOrSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntXorSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntXorSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntXorSliceGo(s, m)
|
||||||
|
}
|
||||||
18
vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go
generated
vendored
Normal file
18
vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go
generated
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
//go:build !go1.9
|
||||||
|
// +build !go1.9
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
// bit population count, take from
|
||||||
|
// https://code.google.com/p/go/issues/detail?id=4988#c11
|
||||||
|
// credit: https://code.google.com/u/arnehormann/
|
||||||
|
// credit: https://play.golang.org/p/U7SogJ7psJ
|
||||||
|
// credit: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||||
|
func popcount(x uint64) uint64 {
|
||||||
|
x -= (x >> 1) & 0x5555555555555555
|
||||||
|
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
|
||||||
|
x += x >> 4
|
||||||
|
x &= 0x0f0f0f0f0f0f0f0f
|
||||||
|
x *= 0x0101010101010101
|
||||||
|
return x >> 56
|
||||||
|
}
|
||||||
24
vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go
generated
vendored
Normal file
24
vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go
generated
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
//go:build !amd64 || appengine || go1.9
|
||||||
|
// +build !amd64 appengine go1.9
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
func popcntSlice(s []uint64) uint64 {
|
||||||
|
return popcntSliceGo(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntMaskSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntAndSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntAndSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntOrSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntOrSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntXorSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntXorSliceGo(s, m)
|
||||||
|
}
|
||||||
41
vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go
generated
vendored
Normal file
41
vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go
generated
vendored
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
func popcntSliceGo(s []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for _, x := range s {
|
||||||
|
cnt += popcount(x)
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntMaskSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] &^ m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntAndSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] & m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntOrSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] | m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntXorSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] ^ m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
101
vendor/github.com/RoaringBitmap/roaring/priorityqueue.go
generated
vendored
Normal file
101
vendor/github.com/RoaringBitmap/roaring/priorityqueue.go
generated
vendored
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
import "container/heap"
|
||||||
|
|
||||||
|
/////////////
|
||||||
|
// The priorityQueue is used to keep Bitmaps sorted.
|
||||||
|
////////////
|
||||||
|
|
||||||
|
type item struct {
|
||||||
|
value *Bitmap
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
type priorityQueue []*item
|
||||||
|
|
||||||
|
func (pq priorityQueue) Len() int { return len(pq) }
|
||||||
|
|
||||||
|
func (pq priorityQueue) Less(i, j int) bool {
|
||||||
|
return pq[i].value.GetSizeInBytes() < pq[j].value.GetSizeInBytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq priorityQueue) Swap(i, j int) {
|
||||||
|
pq[i], pq[j] = pq[j], pq[i]
|
||||||
|
pq[i].index = i
|
||||||
|
pq[j].index = j
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *priorityQueue) Push(x interface{}) {
|
||||||
|
n := len(*pq)
|
||||||
|
item := x.(*item)
|
||||||
|
item.index = n
|
||||||
|
*pq = append(*pq, item)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *priorityQueue) Pop() interface{} {
|
||||||
|
old := *pq
|
||||||
|
n := len(old)
|
||||||
|
item := old[n-1]
|
||||||
|
item.index = -1 // for safety
|
||||||
|
*pq = old[0 : n-1]
|
||||||
|
return item
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *priorityQueue) update(item *item, value *Bitmap) {
|
||||||
|
item.value = value
|
||||||
|
heap.Fix(pq, item.index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////
|
||||||
|
// The containerPriorityQueue is used to keep the containers of various Bitmaps sorted.
|
||||||
|
////////////
|
||||||
|
|
||||||
|
type containeritem struct {
|
||||||
|
value *Bitmap
|
||||||
|
keyindex int
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
type containerPriorityQueue []*containeritem
|
||||||
|
|
||||||
|
func (pq containerPriorityQueue) Len() int { return len(pq) }
|
||||||
|
|
||||||
|
func (pq containerPriorityQueue) Less(i, j int) bool {
|
||||||
|
k1 := pq[i].value.highlowcontainer.getKeyAtIndex(pq[i].keyindex)
|
||||||
|
k2 := pq[j].value.highlowcontainer.getKeyAtIndex(pq[j].keyindex)
|
||||||
|
if k1 != k2 {
|
||||||
|
return k1 < k2
|
||||||
|
}
|
||||||
|
c1 := pq[i].value.highlowcontainer.getContainerAtIndex(pq[i].keyindex)
|
||||||
|
c2 := pq[j].value.highlowcontainer.getContainerAtIndex(pq[j].keyindex)
|
||||||
|
|
||||||
|
return c1.getCardinality() > c2.getCardinality()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq containerPriorityQueue) Swap(i, j int) {
|
||||||
|
pq[i], pq[j] = pq[j], pq[i]
|
||||||
|
pq[i].index = i
|
||||||
|
pq[j].index = j
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *containerPriorityQueue) Push(x interface{}) {
|
||||||
|
n := len(*pq)
|
||||||
|
item := x.(*containeritem)
|
||||||
|
item.index = n
|
||||||
|
*pq = append(*pq, item)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *containerPriorityQueue) Pop() interface{} {
|
||||||
|
old := *pq
|
||||||
|
n := len(old)
|
||||||
|
item := old[n-1]
|
||||||
|
item.index = -1 // for safety
|
||||||
|
*pq = old[0 : n-1]
|
||||||
|
return item
|
||||||
|
}
|
||||||
|
|
||||||
|
//func (pq *containerPriorityQueue) update(item *containeritem, value *Bitmap, keyindex int) {
|
||||||
|
// item.value = value
|
||||||
|
// item.keyindex = keyindex
|
||||||
|
// heap.Fix(pq, item.index)
|
||||||
|
//}
|
||||||
1918
vendor/github.com/RoaringBitmap/roaring/roaring.go
generated
vendored
Normal file
1918
vendor/github.com/RoaringBitmap/roaring/roaring.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
106
vendor/github.com/RoaringBitmap/roaring/roaring64/Makefile
generated
vendored
Normal file
106
vendor/github.com/RoaringBitmap/roaring/roaring64/Makefile
generated
vendored
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Display general help about this command
|
||||||
|
help:
|
||||||
|
@echo ""
|
||||||
|
@echo "The following commands are available:"
|
||||||
|
@echo ""
|
||||||
|
@echo " make qa : Run all the tests"
|
||||||
|
@echo " make test : Run the unit tests"
|
||||||
|
@echo ""
|
||||||
|
@echo " make format : Format the source code"
|
||||||
|
@echo " make fmtcheck : Check if the source code has been formatted"
|
||||||
|
@echo " make vet : Check for suspicious constructs"
|
||||||
|
@echo " make lint : Check for style errors"
|
||||||
|
@echo ""
|
||||||
|
@echo " make deps : Get the dependencies"
|
||||||
|
@echo " make clean : Remove any build artifact"
|
||||||
|
@echo " make nuke : Deletes any intermediate file"
|
||||||
|
@echo ""
|
||||||
|
@echo " make fuzz-smat : Fuzzy testing with smat"
|
||||||
|
@echo " make fuzz-stream : Fuzzy testing with stream deserialization"
|
||||||
|
@echo " make fuzz-buffer : Fuzzy testing with buffer deserialization"
|
||||||
|
@echo ""
|
||||||
|
|
||||||
|
# Alias for help target
|
||||||
|
all: help
|
||||||
|
test:
|
||||||
|
go test
|
||||||
|
# Format the source code
|
||||||
|
format:
|
||||||
|
@find ./ -type f -name "*.go" -exec gofmt -w {} \;
|
||||||
|
|
||||||
|
# Check if the source code has been formatted
|
||||||
|
fmtcheck:
|
||||||
|
@mkdir -p target
|
||||||
|
@find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff
|
||||||
|
@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
|
||||||
|
|
||||||
|
# Check for syntax errors
|
||||||
|
vet:
|
||||||
|
GOPATH=$(GOPATH) go vet ./...
|
||||||
|
|
||||||
|
# Check for style errors
|
||||||
|
lint:
|
||||||
|
GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Alias to run all quality-assurance checks
|
||||||
|
qa: fmtcheck test vet lint
|
||||||
|
|
||||||
|
# --- INSTALL ---
|
||||||
|
|
||||||
|
# Get the dependencies
|
||||||
|
deps:
|
||||||
|
GOPATH=$(GOPATH) go get github.com/stretchr/testify
|
||||||
|
GOPATH=$(GOPATH) go get github.com/bits-and-blooms/bitset
|
||||||
|
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
|
||||||
|
GOPATH=$(GOPATH) go get github.com/mschoch/smat
|
||||||
|
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||||
|
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||||
|
GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream
|
||||||
|
GOPATH=$(GOPATH) go get github.com/philhofer/fwd
|
||||||
|
GOPATH=$(GOPATH) go get github.com/jtolds/gls
|
||||||
|
|
||||||
|
fuzz-smat:
|
||||||
|
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||||
|
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
|
||||||
|
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||||
|
|
||||||
|
|
||||||
|
fuzz-stream:
|
||||||
|
go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
|
||||||
|
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||||
|
|
||||||
|
fuzz-buffer:
|
||||||
|
go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
|
||||||
|
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||||
|
|
||||||
|
# Remove any build artifact
|
||||||
|
clean:
|
||||||
|
GOPATH=$(GOPATH) go clean ./...
|
||||||
|
|
||||||
|
# Deletes any intermediate file
|
||||||
|
nuke:
|
||||||
|
rm -rf ./target
|
||||||
|
GOPATH=$(GOPATH) go clean -i ./...
|
||||||
|
|
||||||
|
|
||||||
|
cover:
|
||||||
|
go test -coverprofile=coverage.out
|
||||||
|
go tool cover -html=coverage.out
|
||||||
|
|
||||||
|
fetch-real-roaring-datasets:
|
||||||
|
# pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets
|
||||||
|
git submodule init
|
||||||
|
git submodule update
|
||||||
916
vendor/github.com/RoaringBitmap/roaring/roaring64/bsi64.go
generated
vendored
Normal file
916
vendor/github.com/RoaringBitmap/roaring/roaring64/bsi64.go
generated
vendored
Normal file
@@ -0,0 +1,916 @@
|
|||||||
|
package roaring64
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"math/bits"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Min64BitSigned - Minimum 64 bit value
|
||||||
|
Min64BitSigned = -9223372036854775808
|
||||||
|
// Max64BitSigned - Maximum 64 bit value
|
||||||
|
Max64BitSigned = 9223372036854775807
|
||||||
|
)
|
||||||
|
|
||||||
|
// BSI is at its simplest is an array of bitmaps that represent an encoded
|
||||||
|
// binary value. The advantage of a BSI is that comparisons can be made
|
||||||
|
// across ranges of values whereas a bitmap can only represent the existence
|
||||||
|
// of a single value for a given column ID. Another usage scenario involves
|
||||||
|
// storage of high cardinality values.
|
||||||
|
//
|
||||||
|
// It depends upon the bitmap libraries. It is not thread safe, so
|
||||||
|
// upstream concurrency guards must be provided.
|
||||||
|
type BSI struct {
|
||||||
|
bA []Bitmap
|
||||||
|
eBM Bitmap // Existence BitMap
|
||||||
|
MaxValue int64
|
||||||
|
MinValue int64
|
||||||
|
runOptimized bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewBSI constructs a new BSI. Note that it is your responsibility to ensure that
|
||||||
|
// the min/max values are set correctly. Queries CompareValue, MinMax, etc. will not
|
||||||
|
// work correctly if the min/max values are not set correctly.
|
||||||
|
func NewBSI(maxValue int64, minValue int64) *BSI {
|
||||||
|
|
||||||
|
bitsz := bits.Len64(uint64(minValue))
|
||||||
|
if bits.Len64(uint64(maxValue)) > bitsz {
|
||||||
|
bitsz = bits.Len64(uint64(maxValue))
|
||||||
|
}
|
||||||
|
ba := make([]Bitmap, bitsz)
|
||||||
|
return &BSI{bA: ba, MaxValue: maxValue, MinValue: minValue}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDefaultBSI constructs an auto-sized BSI
|
||||||
|
func NewDefaultBSI() *BSI {
|
||||||
|
return NewBSI(int64(0), int64(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
|
||||||
|
func (b *BSI) RunOptimize() {
|
||||||
|
b.eBM.RunOptimize()
|
||||||
|
for i := 0; i < len(b.bA); i++ {
|
||||||
|
b.bA[i].RunOptimize()
|
||||||
|
}
|
||||||
|
b.runOptimized = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasRunCompression returns true if the bitmap benefits from run compression
|
||||||
|
func (b *BSI) HasRunCompression() bool {
|
||||||
|
return b.runOptimized
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetExistenceBitmap returns a pointer to the underlying existence bitmap of the BSI
|
||||||
|
func (b *BSI) GetExistenceBitmap() *Bitmap {
|
||||||
|
return &b.eBM
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValueExists tests whether the value exists.
|
||||||
|
func (b *BSI) ValueExists(columnID uint64) bool {
|
||||||
|
|
||||||
|
return b.eBM.Contains(uint64(columnID))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCardinality returns a count of unique column IDs for which a value has been set.
|
||||||
|
func (b *BSI) GetCardinality() uint64 {
|
||||||
|
return b.eBM.GetCardinality()
|
||||||
|
}
|
||||||
|
|
||||||
|
// BitCount returns the number of bits needed to represent values.
|
||||||
|
func (b *BSI) BitCount() int {
|
||||||
|
return len(b.bA)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetValue sets a value for a given columnID.
|
||||||
|
func (b *BSI) SetValue(columnID uint64, value int64) {
|
||||||
|
// If max/min values are set to zero then automatically determine bit array size
|
||||||
|
if b.MaxValue == 0 && b.MinValue == 0 {
|
||||||
|
minBits := bits.Len64(uint64(value))
|
||||||
|
for len(b.bA) < minBits {
|
||||||
|
b.bA = append(b.bA, Bitmap{})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < b.BitCount(); i++ {
|
||||||
|
if uint64(value)&(1<<uint64(i)) > 0 {
|
||||||
|
b.bA[i].Add(columnID)
|
||||||
|
} else {
|
||||||
|
b.bA[i].Remove(columnID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.eBM.Add(columnID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetValue gets the value at the column ID. Second param will be false for non-existent values.
|
||||||
|
func (b *BSI) GetValue(columnID uint64) (value int64, exists bool) {
|
||||||
|
exists = b.eBM.Contains(columnID)
|
||||||
|
if !exists {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for i := 0; i < b.BitCount(); i++ {
|
||||||
|
if b.bA[i].Contains(columnID) {
|
||||||
|
value |= 1 << i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
type action func(t *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup)
|
||||||
|
|
||||||
|
func parallelExecutor(parallelism int, t *task, e action, foundSet *Bitmap) *Bitmap {
|
||||||
|
|
||||||
|
var n int = parallelism
|
||||||
|
if n == 0 {
|
||||||
|
n = runtime.NumCPU()
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsChan := make(chan *Bitmap, n)
|
||||||
|
|
||||||
|
card := foundSet.GetCardinality()
|
||||||
|
x := card / uint64(n)
|
||||||
|
|
||||||
|
remainder := card - (x * uint64(n))
|
||||||
|
var batch []uint64
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
iter := foundSet.ManyIterator()
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
if i == n-1 {
|
||||||
|
batch = make([]uint64, x+remainder)
|
||||||
|
} else {
|
||||||
|
batch = make([]uint64, x)
|
||||||
|
}
|
||||||
|
iter.NextMany(batch)
|
||||||
|
wg.Add(1)
|
||||||
|
go e(t, batch, resultsChan, &wg)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
close(resultsChan)
|
||||||
|
|
||||||
|
ba := make([]*Bitmap, 0)
|
||||||
|
for bm := range resultsChan {
|
||||||
|
ba = append(ba, bm)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ParOr(0, ba...)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
type bsiAction func(input *BSI, filterSet *Bitmap, batch []uint64, resultsChan chan *BSI, wg *sync.WaitGroup)
|
||||||
|
|
||||||
|
func parallelExecutorBSIResults(parallelism int, input *BSI, e bsiAction, foundSet, filterSet *Bitmap, sumResults bool) *BSI {
|
||||||
|
|
||||||
|
var n int = parallelism
|
||||||
|
if n == 0 {
|
||||||
|
n = runtime.NumCPU()
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsChan := make(chan *BSI, n)
|
||||||
|
|
||||||
|
card := foundSet.GetCardinality()
|
||||||
|
x := card / uint64(n)
|
||||||
|
|
||||||
|
remainder := card - (x * uint64(n))
|
||||||
|
var batch []uint64
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
iter := foundSet.ManyIterator()
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
if i == n-1 {
|
||||||
|
batch = make([]uint64, x+remainder)
|
||||||
|
} else {
|
||||||
|
batch = make([]uint64, x)
|
||||||
|
}
|
||||||
|
iter.NextMany(batch)
|
||||||
|
wg.Add(1)
|
||||||
|
go e(input, filterSet, batch, resultsChan, &wg)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
close(resultsChan)
|
||||||
|
|
||||||
|
ba := make([]*BSI, 0)
|
||||||
|
for bm := range resultsChan {
|
||||||
|
ba = append(ba, bm)
|
||||||
|
}
|
||||||
|
|
||||||
|
results := NewDefaultBSI()
|
||||||
|
if sumResults {
|
||||||
|
for _, v := range ba {
|
||||||
|
results.Add(v)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
results.ParOr(0, ba...)
|
||||||
|
}
|
||||||
|
return results
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Operation identifier
|
||||||
|
type Operation int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// LT less than
|
||||||
|
LT Operation = 1 + iota
|
||||||
|
// LE less than or equal
|
||||||
|
LE
|
||||||
|
// EQ equal
|
||||||
|
EQ
|
||||||
|
// GE greater than or equal
|
||||||
|
GE
|
||||||
|
// GT greater than
|
||||||
|
GT
|
||||||
|
// RANGE range
|
||||||
|
RANGE
|
||||||
|
// MIN find minimum
|
||||||
|
MIN
|
||||||
|
// MAX find maximum
|
||||||
|
MAX
|
||||||
|
)
|
||||||
|
|
||||||
|
type task struct {
|
||||||
|
bsi *BSI
|
||||||
|
op Operation
|
||||||
|
valueOrStart int64
|
||||||
|
end int64
|
||||||
|
values map[int64]struct{}
|
||||||
|
bits *Bitmap
|
||||||
|
}
|
||||||
|
|
||||||
|
// CompareValue compares value.
|
||||||
|
// Values should be in the range of the BSI (max, min). If the value is outside the range, the result
|
||||||
|
// might erroneous. The operation parameter indicates the type of comparison to be made.
|
||||||
|
// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
|
||||||
|
// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
|
||||||
|
// The parallelism parameter indicates the number of CPU threads to be applied for processing. A value
|
||||||
|
// of zero indicates that all available CPU resources will be potentially utilized.
|
||||||
|
func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int64,
|
||||||
|
foundSet *Bitmap) *Bitmap {
|
||||||
|
|
||||||
|
comp := &task{bsi: b, op: op, valueOrStart: valueOrStart, end: end}
|
||||||
|
if foundSet == nil {
|
||||||
|
return parallelExecutor(parallelism, comp, compareValue, &b.eBM)
|
||||||
|
}
|
||||||
|
return parallelExecutor(parallelism, comp, compareValue, foundSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) {
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
results := NewBitmap()
|
||||||
|
if e.bsi.runOptimized {
|
||||||
|
results.RunOptimize()
|
||||||
|
}
|
||||||
|
|
||||||
|
x := e.bsi.BitCount()
|
||||||
|
startIsNegative := x == 64 && uint64(e.valueOrStart)&(1<<uint64(x-1)) > 0
|
||||||
|
endIsNegative := x == 64 && uint64(e.end)&(1<<uint64(x-1)) > 0
|
||||||
|
|
||||||
|
for i := 0; i < len(batch); i++ {
|
||||||
|
cID := batch[i]
|
||||||
|
eq1, eq2 := true, true
|
||||||
|
lt1, lt2, gt1 := false, false, false
|
||||||
|
j := e.bsi.BitCount() - 1
|
||||||
|
isNegative := false
|
||||||
|
if x == 64 {
|
||||||
|
isNegative = e.bsi.bA[j].Contains(cID)
|
||||||
|
j--
|
||||||
|
}
|
||||||
|
compStartValue := e.valueOrStart
|
||||||
|
compEndValue := e.end
|
||||||
|
if isNegative != startIsNegative {
|
||||||
|
compStartValue = ^e.valueOrStart + 1
|
||||||
|
}
|
||||||
|
if isNegative != endIsNegative {
|
||||||
|
compEndValue = ^e.end + 1
|
||||||
|
}
|
||||||
|
for ; j >= 0; j-- {
|
||||||
|
sliceContainsBit := e.bsi.bA[j].Contains(cID)
|
||||||
|
|
||||||
|
if uint64(compStartValue)&(1<<uint64(j)) > 0 {
|
||||||
|
// BIT in value is SET
|
||||||
|
if !sliceContainsBit {
|
||||||
|
if eq1 {
|
||||||
|
if (e.op == GT || e.op == GE || e.op == RANGE) && startIsNegative && !isNegative {
|
||||||
|
gt1 = true
|
||||||
|
}
|
||||||
|
if e.op == LT || e.op == LE {
|
||||||
|
if !startIsNegative || (startIsNegative == isNegative) {
|
||||||
|
lt1 = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eq1 = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// BIT in value is CLEAR
|
||||||
|
if sliceContainsBit {
|
||||||
|
if eq1 {
|
||||||
|
if (e.op == LT || e.op == LE) && isNegative && !startIsNegative {
|
||||||
|
lt1 = true
|
||||||
|
}
|
||||||
|
if e.op == GT || e.op == GE || e.op == RANGE {
|
||||||
|
if startIsNegative || (startIsNegative == isNegative) {
|
||||||
|
gt1 = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eq1 = false
|
||||||
|
if e.op != RANGE {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if e.op == RANGE && uint64(compEndValue)&(1<<uint64(j)) > 0 {
|
||||||
|
// BIT in value is SET
|
||||||
|
if !sliceContainsBit {
|
||||||
|
if eq2 {
|
||||||
|
if !endIsNegative || (endIsNegative == isNegative) {
|
||||||
|
lt2 = true
|
||||||
|
}
|
||||||
|
eq2 = false
|
||||||
|
if startIsNegative && !endIsNegative {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if e.op == RANGE {
|
||||||
|
// BIT in value is CLEAR
|
||||||
|
if sliceContainsBit {
|
||||||
|
if eq2 {
|
||||||
|
if isNegative && !endIsNegative {
|
||||||
|
lt2 = true
|
||||||
|
}
|
||||||
|
eq2 = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
switch e.op {
|
||||||
|
case LT:
|
||||||
|
if lt1 {
|
||||||
|
results.Add(cID)
|
||||||
|
}
|
||||||
|
case LE:
|
||||||
|
if lt1 || (eq1 && (!startIsNegative || (startIsNegative && isNegative))) {
|
||||||
|
results.Add(cID)
|
||||||
|
}
|
||||||
|
case EQ:
|
||||||
|
if eq1 {
|
||||||
|
results.Add(cID)
|
||||||
|
}
|
||||||
|
case GE:
|
||||||
|
if gt1 || (eq1 && (startIsNegative || (!startIsNegative && !isNegative))) {
|
||||||
|
results.Add(cID)
|
||||||
|
}
|
||||||
|
case GT:
|
||||||
|
if gt1 {
|
||||||
|
results.Add(cID)
|
||||||
|
}
|
||||||
|
case RANGE:
|
||||||
|
if (eq1 || gt1) && (eq2 || lt2) {
|
||||||
|
results.Add(cID)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("Operation [%v] not supported here", e.op))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsChan <- results
|
||||||
|
}
|
||||||
|
|
||||||
|
// MinMax - Find minimum or maximum value.
|
||||||
|
func (b *BSI) MinMax(parallelism int, op Operation, foundSet *Bitmap) int64 {
|
||||||
|
|
||||||
|
var n int = parallelism
|
||||||
|
if n == 0 {
|
||||||
|
n = runtime.NumCPU()
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsChan := make(chan int64, n)
|
||||||
|
|
||||||
|
card := foundSet.GetCardinality()
|
||||||
|
x := card / uint64(n)
|
||||||
|
|
||||||
|
remainder := card - (x * uint64(n))
|
||||||
|
var batch []uint64
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
iter := foundSet.ManyIterator()
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
if i == n-1 {
|
||||||
|
batch = make([]uint64, x+remainder)
|
||||||
|
} else {
|
||||||
|
batch = make([]uint64, x)
|
||||||
|
}
|
||||||
|
iter.NextMany(batch)
|
||||||
|
wg.Add(1)
|
||||||
|
go b.minOrMax(op, batch, resultsChan, &wg)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
close(resultsChan)
|
||||||
|
var minMax int64
|
||||||
|
if op == MAX {
|
||||||
|
minMax = Min64BitSigned
|
||||||
|
} else {
|
||||||
|
minMax = Max64BitSigned
|
||||||
|
}
|
||||||
|
|
||||||
|
for val := range resultsChan {
|
||||||
|
if (op == MAX && val > minMax) || (op == MIN && val <= minMax) {
|
||||||
|
minMax = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return minMax
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg *sync.WaitGroup) {
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
x := b.BitCount()
|
||||||
|
var value int64 = Max64BitSigned
|
||||||
|
if op == MAX {
|
||||||
|
value = Min64BitSigned
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(batch); i++ {
|
||||||
|
cID := batch[i]
|
||||||
|
eq := true
|
||||||
|
lt, gt := false, false
|
||||||
|
j := b.BitCount() - 1
|
||||||
|
var cVal int64
|
||||||
|
valueIsNegative := uint64(value)&(1<<uint64(x-1)) > 0 && bits.Len64(uint64(value)) == 64
|
||||||
|
isNegative := false
|
||||||
|
if x == 64 {
|
||||||
|
isNegative = b.bA[j].Contains(cID)
|
||||||
|
if isNegative {
|
||||||
|
cVal |= 1 << uint64(j)
|
||||||
|
}
|
||||||
|
j--
|
||||||
|
}
|
||||||
|
compValue := value
|
||||||
|
if isNegative != valueIsNegative {
|
||||||
|
compValue = ^value + 1
|
||||||
|
}
|
||||||
|
for ; j >= 0; j-- {
|
||||||
|
sliceContainsBit := b.bA[j].Contains(cID)
|
||||||
|
if sliceContainsBit {
|
||||||
|
cVal |= 1 << uint64(j)
|
||||||
|
}
|
||||||
|
if uint64(compValue)&(1<<uint64(j)) > 0 {
|
||||||
|
// BIT in value is SET
|
||||||
|
if !sliceContainsBit {
|
||||||
|
if eq {
|
||||||
|
eq = false
|
||||||
|
if op == MAX && valueIsNegative && !isNegative {
|
||||||
|
gt = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if op == MIN && (!valueIsNegative || (valueIsNegative == isNegative)) {
|
||||||
|
lt = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// BIT in value is CLEAR
|
||||||
|
if sliceContainsBit {
|
||||||
|
if eq {
|
||||||
|
eq = false
|
||||||
|
if op == MIN && isNegative && !valueIsNegative {
|
||||||
|
lt = true
|
||||||
|
}
|
||||||
|
if op == MAX && (valueIsNegative || (valueIsNegative == isNegative)) {
|
||||||
|
gt = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if lt || gt {
|
||||||
|
value = cVal
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsChan <- value
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet
|
||||||
|
// is also returned (for calculating the average).
|
||||||
|
func (b *BSI) Sum(foundSet *Bitmap) (sum int64, count uint64) {
|
||||||
|
|
||||||
|
count = foundSet.GetCardinality()
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < b.BitCount(); i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(j int) {
|
||||||
|
defer wg.Done()
|
||||||
|
atomic.AddInt64(&sum, int64(foundSet.AndCardinality(&b.bA[j])<<uint(j)))
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transpose calls b.IntersectAndTranspose(0, b.eBM)
|
||||||
|
func (b *BSI) Transpose() *Bitmap {
|
||||||
|
return b.IntersectAndTranspose(0, &b.eBM)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IntersectAndTranspose is a matrix transpose function. Return a bitmap such that the values are represented as column IDs
|
||||||
|
// in the returned bitmap. This is accomplished by iterating over the foundSet and only including
|
||||||
|
// the column IDs in the source (foundSet) as compared with this BSI. This can be useful for
|
||||||
|
// vectoring one set of integers to another.
|
||||||
|
//
|
||||||
|
// TODO: This implementation is functional but not performant, needs to be re-written perhaps using SIMD SSE2 instructions.
|
||||||
|
func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *Bitmap) *Bitmap {
|
||||||
|
|
||||||
|
trans := &task{bsi: b}
|
||||||
|
return parallelExecutor(parallelism, trans, transpose, foundSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func transpose(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) {
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
results := NewBitmap()
|
||||||
|
if e.bsi.runOptimized {
|
||||||
|
results.RunOptimize()
|
||||||
|
}
|
||||||
|
for _, cID := range batch {
|
||||||
|
if value, ok := e.bsi.GetValue(uint64(cID)); ok {
|
||||||
|
results.Add(uint64(value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resultsChan <- results
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParOr is intended primarily to be a concatenation function to be used during bulk load operations.
|
||||||
|
// Care should be taken to make sure that columnIDs do not overlap (unless overlapping values are
|
||||||
|
// identical).
|
||||||
|
func (b *BSI) ParOr(parallelism int, bsis ...*BSI) {
|
||||||
|
|
||||||
|
// Consolidate sets
|
||||||
|
bits := len(b.bA)
|
||||||
|
for i := 0; i < len(bsis); i++ {
|
||||||
|
if len(bsis[i].bA) > bits {
|
||||||
|
bits = bsis[i].BitCount()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure we have enough bit slices
|
||||||
|
for bits > b.BitCount() {
|
||||||
|
bm := Bitmap{}
|
||||||
|
bm.RunOptimize()
|
||||||
|
b.bA = append(b.bA, bm)
|
||||||
|
}
|
||||||
|
|
||||||
|
a := make([][]*Bitmap, bits)
|
||||||
|
for i := range a {
|
||||||
|
a[i] = make([]*Bitmap, 0)
|
||||||
|
for _, x := range bsis {
|
||||||
|
if len(x.bA) > i {
|
||||||
|
a[i] = append(a[i], &x.bA[i])
|
||||||
|
} else {
|
||||||
|
if b.runOptimized {
|
||||||
|
a[i][0].RunOptimize()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consolidate existence bit maps
|
||||||
|
ebms := make([]*Bitmap, len(bsis))
|
||||||
|
for i := range ebms {
|
||||||
|
ebms[i] = &bsis[i].eBM
|
||||||
|
}
|
||||||
|
|
||||||
|
// First merge all the bit slices from all bsi maps that exist in target
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < bits; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(j int) {
|
||||||
|
defer wg.Done()
|
||||||
|
x := []*Bitmap{&b.bA[j]}
|
||||||
|
x = append(x, a[j]...)
|
||||||
|
b.bA[j] = *ParOr(parallelism, x...)
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
// merge all the EBM maps
|
||||||
|
x := []*Bitmap{&b.eBM}
|
||||||
|
x = append(x, ebms...)
|
||||||
|
b.eBM = *ParOr(parallelism, x...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalBinary de-serialize a BSI. The value at bitData[0] is the EBM. Other indices are in least to most
|
||||||
|
// significance order starting at bitData[1] (bit position 0).
|
||||||
|
func (b *BSI) UnmarshalBinary(bitData [][]byte) error {
|
||||||
|
|
||||||
|
for i := 1; i < len(bitData); i++ {
|
||||||
|
if bitData == nil || len(bitData[i]) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if b.BitCount() < i {
|
||||||
|
newBm := Bitmap{}
|
||||||
|
if b.runOptimized {
|
||||||
|
newBm.RunOptimize()
|
||||||
|
}
|
||||||
|
b.bA = append(b.bA, newBm)
|
||||||
|
}
|
||||||
|
if err := b.bA[i-1].UnmarshalBinary(bitData[i]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if b.runOptimized {
|
||||||
|
b.bA[i-1].RunOptimize()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
// First element of bitData is the EBM
|
||||||
|
if bitData[0] == nil {
|
||||||
|
b.eBM = Bitmap{}
|
||||||
|
if b.runOptimized {
|
||||||
|
b.eBM.RunOptimize()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := b.eBM.UnmarshalBinary(bitData[0]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if b.runOptimized {
|
||||||
|
b.eBM.RunOptimize()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadFrom reads a serialized version of this BSI from stream.
|
||||||
|
func (b *BSI) ReadFrom(stream io.Reader) (p int64, err error) {
|
||||||
|
bm, n, err := readBSIContainerFromStream(stream)
|
||||||
|
p += n
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("reading existence bitmap: %w", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
b.eBM = bm
|
||||||
|
b.bA = b.bA[:0]
|
||||||
|
for {
|
||||||
|
// This forces a new memory location to be allocated and if we're lucky it only escapes if
|
||||||
|
// there's no error.
|
||||||
|
var bm Bitmap
|
||||||
|
bm, n, err = readBSIContainerFromStream(stream)
|
||||||
|
p += n
|
||||||
|
if err == io.EOF {
|
||||||
|
err = nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("reading bit slice index %v: %w", len(b.bA), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
b.bA = append(b.bA, bm)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func readBSIContainerFromStream(r io.Reader) (bm Bitmap, p int64, err error) {
|
||||||
|
p, err = bm.ReadFrom(r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// MarshalBinary serializes a BSI
|
||||||
|
func (b *BSI) MarshalBinary() ([][]byte, error) {
|
||||||
|
|
||||||
|
var err error
|
||||||
|
data := make([][]byte, b.BitCount()+1)
|
||||||
|
// Add extra element for EBM (BitCount() + 1)
|
||||||
|
for i := 1; i < b.BitCount()+1; i++ {
|
||||||
|
data[i], err = b.bA[i-1].MarshalBinary()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Marshal EBM
|
||||||
|
data[0], err = b.eBM.MarshalBinary()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteTo writes a serialized version of this BSI to stream.
|
||||||
|
func (b *BSI) WriteTo(w io.Writer) (n int64, err error) {
|
||||||
|
n1, err := b.eBM.WriteTo(w)
|
||||||
|
n += n1
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, bm := range b.bA {
|
||||||
|
n1, err = bm.WriteTo(w)
|
||||||
|
n += n1
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchEqual returns a bitmap containing the column IDs where the values are contained within the list of values provided.
|
||||||
|
func (b *BSI) BatchEqual(parallelism int, values []int64) *Bitmap {
|
||||||
|
|
||||||
|
valMap := make(map[int64]struct{}, len(values))
|
||||||
|
for i := 0; i < len(values); i++ {
|
||||||
|
valMap[values[i]] = struct{}{}
|
||||||
|
}
|
||||||
|
comp := &task{bsi: b, values: valMap}
|
||||||
|
return parallelExecutor(parallelism, comp, batchEqual, &b.eBM)
|
||||||
|
}
|
||||||
|
|
||||||
|
func batchEqual(e *task, batch []uint64, resultsChan chan *Bitmap,
|
||||||
|
wg *sync.WaitGroup) {
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
results := NewBitmap()
|
||||||
|
if e.bsi.runOptimized {
|
||||||
|
results.RunOptimize()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(batch); i++ {
|
||||||
|
cID := batch[i]
|
||||||
|
if value, ok := e.bsi.GetValue(uint64(cID)); ok {
|
||||||
|
if _, yes := e.values[int64(value)]; yes {
|
||||||
|
results.Add(cID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resultsChan <- results
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearBits cleared the bits that exist in the target if they are also in the found set.
|
||||||
|
func ClearBits(foundSet, target *Bitmap) {
|
||||||
|
iter := foundSet.Iterator()
|
||||||
|
for iter.HasNext() {
|
||||||
|
cID := iter.Next()
|
||||||
|
target.Remove(cID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearValues removes the values found in foundSet
|
||||||
|
func (b *BSI) ClearValues(foundSet *Bitmap) {
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
ClearBits(foundSet, &b.eBM)
|
||||||
|
}()
|
||||||
|
for i := 0; i < b.BitCount(); i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(j int) {
|
||||||
|
defer wg.Done()
|
||||||
|
ClearBits(foundSet, &b.bA[j])
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
|
||||||
|
func (b *BSI) NewBSIRetainSet(foundSet *Bitmap) *BSI {
|
||||||
|
|
||||||
|
newBSI := NewBSI(b.MaxValue, b.MinValue)
|
||||||
|
newBSI.bA = make([]Bitmap, b.BitCount())
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
newBSI.eBM = *b.eBM.Clone()
|
||||||
|
newBSI.eBM.And(foundSet)
|
||||||
|
}()
|
||||||
|
for i := 0; i < b.BitCount(); i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(j int) {
|
||||||
|
defer wg.Done()
|
||||||
|
newBSI.bA[j] = *b.bA[j].Clone()
|
||||||
|
newBSI.bA[j].And(foundSet)
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
return newBSI
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clone performs a deep copy of BSI contents.
|
||||||
|
func (b *BSI) Clone() *BSI {
|
||||||
|
return b.NewBSIRetainSet(&b.eBM)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add - In-place sum the contents of another BSI with this BSI, column wise.
|
||||||
|
func (b *BSI) Add(other *BSI) {
|
||||||
|
|
||||||
|
b.eBM.Or(&other.eBM)
|
||||||
|
for i := 0; i < len(other.bA); i++ {
|
||||||
|
b.addDigit(&other.bA[i], i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BSI) addDigit(foundSet *Bitmap, i int) {
|
||||||
|
|
||||||
|
if i >= len(b.bA) {
|
||||||
|
b.bA = append(b.bA, Bitmap{})
|
||||||
|
}
|
||||||
|
carry := And(&b.bA[i], foundSet)
|
||||||
|
b.bA[i].Xor(foundSet)
|
||||||
|
if !carry.IsEmpty() {
|
||||||
|
if i+1 >= len(b.bA) {
|
||||||
|
b.bA = append(b.bA, Bitmap{})
|
||||||
|
}
|
||||||
|
b.addDigit(carry, i+1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TransposeWithCounts is a matrix transpose function that returns a BSI that has a columnID system defined by the values
|
||||||
|
// contained within the input BSI. Given that for BSIs, different columnIDs can have the same value. TransposeWithCounts
|
||||||
|
// is useful for situations where there is a one-to-many relationship between the vectored integer sets. The resulting BSI
|
||||||
|
// contains the number of times a particular value appeared in the input BSI.
|
||||||
|
func (b *BSI) TransposeWithCounts(parallelism int, foundSet, filterSet *Bitmap) *BSI {
|
||||||
|
|
||||||
|
return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, filterSet, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func transposeWithCounts(input *BSI, filterSet *Bitmap, batch []uint64, resultsChan chan *BSI, wg *sync.WaitGroup) {
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
results := NewDefaultBSI()
|
||||||
|
if input.runOptimized {
|
||||||
|
results.RunOptimize()
|
||||||
|
}
|
||||||
|
for _, cID := range batch {
|
||||||
|
if value, ok := input.GetValue(uint64(cID)); ok {
|
||||||
|
if !filterSet.Contains(uint64(value)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if val, ok2 := results.GetValue(uint64(value)); !ok2 {
|
||||||
|
results.SetValue(uint64(value), 1)
|
||||||
|
} else {
|
||||||
|
val++
|
||||||
|
results.SetValue(uint64(value), val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resultsChan <- results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment - In-place increment of values in a BSI. Found set select columns for incrementing.
|
||||||
|
func (b *BSI) Increment(foundSet *Bitmap) {
|
||||||
|
b.addDigit(foundSet, 0)
|
||||||
|
b.eBM.Or(foundSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IncrementAll - In-place increment of all values in a BSI.
|
||||||
|
func (b *BSI) IncrementAll() {
|
||||||
|
b.Increment(b.GetExistenceBitmap())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Equals - Check for semantic equality of two BSIs.
|
||||||
|
func (b *BSI) Equals(other *BSI) bool {
|
||||||
|
if !b.eBM.Equals(&other.eBM) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := 0; i < len(b.bA) || i < len(other.bA); i++ {
|
||||||
|
if i >= len(b.bA) {
|
||||||
|
if !other.bA[i].IsEmpty() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
} else if i >= len(other.bA) {
|
||||||
|
if !b.bA[i].IsEmpty() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !b.bA[i].Equals(&other.bA[i]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetSizeInBytes - the size in bytes of the data structure
|
||||||
|
func (b *BSI) GetSizeInBytes() int {
|
||||||
|
size := b.eBM.GetSizeInBytes()
|
||||||
|
for _, bm := range b.bA {
|
||||||
|
size += bm.GetSizeInBytes()
|
||||||
|
}
|
||||||
|
return int(size)
|
||||||
|
}
|
||||||
31
vendor/github.com/RoaringBitmap/roaring/roaring64/fastaggregation64.go
generated
vendored
Normal file
31
vendor/github.com/RoaringBitmap/roaring/roaring64/fastaggregation64.go
generated
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
package roaring64
|
||||||
|
|
||||||
|
// FastAnd computes the intersection between many bitmaps quickly
|
||||||
|
// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
|
||||||
|
// of manually calling "And" many times.
|
||||||
|
func FastAnd(bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
if len(bitmaps) == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
} else if len(bitmaps) == 1 {
|
||||||
|
return bitmaps[0].Clone()
|
||||||
|
}
|
||||||
|
answer := And(bitmaps[0], bitmaps[1])
|
||||||
|
for _, bm := range bitmaps[2:] {
|
||||||
|
answer.And(bm)
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
|
||||||
|
func FastOr(bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
if len(bitmaps) == 0 {
|
||||||
|
return NewBitmap()
|
||||||
|
} else if len(bitmaps) == 1 {
|
||||||
|
return bitmaps[0].Clone()
|
||||||
|
}
|
||||||
|
answer := Or(bitmaps[0], bitmaps[1])
|
||||||
|
for _, bm := range bitmaps[2:] {
|
||||||
|
answer.Or(bm)
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
169
vendor/github.com/RoaringBitmap/roaring/roaring64/iterables64.go
generated
vendored
Normal file
169
vendor/github.com/RoaringBitmap/roaring/roaring64/iterables64.go
generated
vendored
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
package roaring64
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/RoaringBitmap/roaring"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IntIterable64 allows you to iterate over the values in a Bitmap
|
||||||
|
type IntIterable64 interface {
|
||||||
|
HasNext() bool
|
||||||
|
Next() uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// IntPeekable64 allows you to look at the next value without advancing and
|
||||||
|
// advance as long as the next value is smaller than minval
|
||||||
|
type IntPeekable64 interface {
|
||||||
|
IntIterable64
|
||||||
|
// PeekNext peeks the next value without advancing the iterator
|
||||||
|
PeekNext() uint64
|
||||||
|
// AdvanceIfNeeded advances as long as the next value is smaller than minval
|
||||||
|
AdvanceIfNeeded(minval uint64)
|
||||||
|
}
|
||||||
|
|
||||||
|
type intIterator struct {
|
||||||
|
pos int
|
||||||
|
hs uint64
|
||||||
|
iter roaring.IntPeekable
|
||||||
|
highlowcontainer *roaringArray64
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasNext returns true if there are more integers to iterate over
|
||||||
|
func (ii *intIterator) HasNext() bool {
|
||||||
|
return ii.pos < ii.highlowcontainer.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ii *intIterator) init() {
|
||||||
|
if ii.highlowcontainer.size() > ii.pos {
|
||||||
|
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).Iterator()
|
||||||
|
ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next returns the next integer
|
||||||
|
func (ii *intIterator) Next() uint64 {
|
||||||
|
lowbits := ii.iter.Next()
|
||||||
|
x := uint64(lowbits) | ii.hs
|
||||||
|
if !ii.iter.HasNext() {
|
||||||
|
ii.pos = ii.pos + 1
|
||||||
|
ii.init()
|
||||||
|
}
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
// PeekNext peeks the next value without advancing the iterator
|
||||||
|
func (ii *intIterator) PeekNext() uint64 {
|
||||||
|
return uint64(ii.iter.PeekNext()&maxLowBit) | ii.hs
|
||||||
|
}
|
||||||
|
|
||||||
|
// AdvanceIfNeeded advances as long as the next value is smaller than minval
|
||||||
|
func (ii *intIterator) AdvanceIfNeeded(minval uint64) {
|
||||||
|
to := minval >> 32
|
||||||
|
|
||||||
|
for ii.HasNext() && (ii.hs>>32) < to {
|
||||||
|
ii.pos++
|
||||||
|
ii.init()
|
||||||
|
}
|
||||||
|
|
||||||
|
if ii.HasNext() && (ii.hs>>32) == to {
|
||||||
|
ii.iter.AdvanceIfNeeded(lowbits(minval))
|
||||||
|
|
||||||
|
if !ii.iter.HasNext() {
|
||||||
|
ii.pos++
|
||||||
|
ii.init()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newIntIterator(a *Bitmap) *intIterator {
|
||||||
|
p := new(intIterator)
|
||||||
|
p.pos = 0
|
||||||
|
p.highlowcontainer = &a.highlowcontainer
|
||||||
|
p.init()
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
type intReverseIterator struct {
|
||||||
|
pos int
|
||||||
|
hs uint64
|
||||||
|
iter roaring.IntIterable
|
||||||
|
highlowcontainer *roaringArray64
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasNext returns true if there are more integers to iterate over
|
||||||
|
func (ii *intReverseIterator) HasNext() bool {
|
||||||
|
return ii.pos >= 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ii *intReverseIterator) init() {
|
||||||
|
if ii.pos >= 0 {
|
||||||
|
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).ReverseIterator()
|
||||||
|
ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
|
||||||
|
} else {
|
||||||
|
ii.iter = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next returns the next integer
|
||||||
|
func (ii *intReverseIterator) Next() uint64 {
|
||||||
|
x := uint64(ii.iter.Next()) | ii.hs
|
||||||
|
if !ii.iter.HasNext() {
|
||||||
|
ii.pos = ii.pos - 1
|
||||||
|
ii.init()
|
||||||
|
}
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
func newIntReverseIterator(a *Bitmap) *intReverseIterator {
|
||||||
|
p := new(intReverseIterator)
|
||||||
|
p.highlowcontainer = &a.highlowcontainer
|
||||||
|
p.pos = a.highlowcontainer.size() - 1
|
||||||
|
p.init()
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// ManyIntIterable64 allows you to iterate over the values in a Bitmap
|
||||||
|
type ManyIntIterable64 interface {
|
||||||
|
// pass in a buffer to fill up with values, returns how many values were returned
|
||||||
|
NextMany([]uint64) int
|
||||||
|
}
|
||||||
|
|
||||||
|
type manyIntIterator struct {
|
||||||
|
pos int
|
||||||
|
hs uint64
|
||||||
|
iter roaring.ManyIntIterable
|
||||||
|
highlowcontainer *roaringArray64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ii *manyIntIterator) init() {
|
||||||
|
if ii.highlowcontainer.size() > ii.pos {
|
||||||
|
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).ManyIterator()
|
||||||
|
ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
|
||||||
|
} else {
|
||||||
|
ii.iter = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ii *manyIntIterator) NextMany(buf []uint64) int {
|
||||||
|
n := 0
|
||||||
|
for n < len(buf) {
|
||||||
|
if ii.iter == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
moreN := ii.iter.NextMany64(ii.hs, buf[n:])
|
||||||
|
n += moreN
|
||||||
|
if moreN == 0 {
|
||||||
|
ii.pos = ii.pos + 1
|
||||||
|
ii.init()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
func newManyIntIterator(a *Bitmap) *manyIntIterator {
|
||||||
|
p := new(manyIntIterator)
|
||||||
|
p.pos = 0
|
||||||
|
p.highlowcontainer = &a.highlowcontainer
|
||||||
|
p.init()
|
||||||
|
return p
|
||||||
|
}
|
||||||
292
vendor/github.com/RoaringBitmap/roaring/roaring64/parallel64.go
generated
vendored
Normal file
292
vendor/github.com/RoaringBitmap/roaring/roaring64/parallel64.go
generated
vendored
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
package roaring64
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/RoaringBitmap/roaring"
|
||||||
|
)
|
||||||
|
|
||||||
|
var defaultWorkerCount = runtime.NumCPU()
|
||||||
|
|
||||||
|
// ParOr computes the union (OR) of all provided bitmaps in parallel,
|
||||||
|
// where the parameter "parallelism" determines how many workers are to be used
|
||||||
|
// (if it is set to 0, a default number of workers is chosen)
|
||||||
|
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||||
|
var lKey uint32 = maxUint32
|
||||||
|
var hKey uint32
|
||||||
|
|
||||||
|
bitmapsFiltered := bitmaps[:0]
|
||||||
|
for _, b := range bitmaps {
|
||||||
|
if !b.IsEmpty() {
|
||||||
|
bitmapsFiltered = append(bitmapsFiltered, b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bitmaps = bitmapsFiltered
|
||||||
|
|
||||||
|
for _, b := range bitmaps {
|
||||||
|
lKey = minOfUint32(lKey, b.highlowcontainer.keys[0])
|
||||||
|
hKey = maxOfUint32(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
|
||||||
|
}
|
||||||
|
|
||||||
|
if lKey == maxUint32 && hKey == 0 {
|
||||||
|
return New()
|
||||||
|
} else if len(bitmaps) == 1 {
|
||||||
|
return bitmaps[0]
|
||||||
|
}
|
||||||
|
// The following might overflow and we do not want that!
|
||||||
|
// as it might lead to a channel of size 0 later which,
|
||||||
|
// on some systems, would block indefinitely.
|
||||||
|
keyRange := uint64(hKey) - uint64(lKey) + 1
|
||||||
|
if keyRange == 1 {
|
||||||
|
// revert to FastOr. Since the key range is 0
|
||||||
|
// no container-level aggregation parallelism is achievable
|
||||||
|
return FastOr(bitmaps...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if parallelism == 0 {
|
||||||
|
parallelism = defaultWorkerCount
|
||||||
|
}
|
||||||
|
// We cannot use int since int is 32-bit on 32-bit systems.
|
||||||
|
var chunkSize int64
|
||||||
|
var chunkCount int64
|
||||||
|
if int64(parallelism)*4 > int64(keyRange) {
|
||||||
|
chunkSize = 1
|
||||||
|
chunkCount = int64(keyRange)
|
||||||
|
} else {
|
||||||
|
chunkCount = int64(parallelism) * 4
|
||||||
|
chunkSize = (int64(keyRange) + chunkCount - 1) / chunkCount
|
||||||
|
}
|
||||||
|
|
||||||
|
if chunkCount*chunkSize < int64(keyRange) {
|
||||||
|
// it's fine to panic to indicate an implementation error
|
||||||
|
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
|
||||||
|
}
|
||||||
|
|
||||||
|
chunks := make([]*roaringArray64, chunkCount)
|
||||||
|
|
||||||
|
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
|
||||||
|
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
|
||||||
|
|
||||||
|
orFunc := func() {
|
||||||
|
for spec := range chunkSpecChan {
|
||||||
|
ra := orOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
|
||||||
|
for _, b := range bitmaps[2:] {
|
||||||
|
ra = iorOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkChan <- parChunk{ra, spec.idx}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < parallelism; i++ {
|
||||||
|
go orFunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for i := int64(0); i < chunkCount; i++ {
|
||||||
|
spec := parChunkSpec{
|
||||||
|
start: uint32(int64(lKey) + i*chunkSize),
|
||||||
|
end: uint32(minOfInt64(int64(lKey)+(i+1)*chunkSize-1, int64(hKey))),
|
||||||
|
idx: int(i),
|
||||||
|
}
|
||||||
|
chunkSpecChan <- spec
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
chunksRemaining := chunkCount
|
||||||
|
for chunk := range chunkChan {
|
||||||
|
chunks[chunk.idx] = chunk.ra
|
||||||
|
chunksRemaining--
|
||||||
|
if chunksRemaining == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(chunkChan)
|
||||||
|
close(chunkSpecChan)
|
||||||
|
|
||||||
|
containerCount := 0
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
containerCount += chunk.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
result := Bitmap{
|
||||||
|
roaringArray64{
|
||||||
|
containers: make([]*roaring.Bitmap, containerCount),
|
||||||
|
keys: make([]uint32, containerCount),
|
||||||
|
needCopyOnWrite: make([]bool, containerCount),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resultOffset := 0
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
|
||||||
|
copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
|
||||||
|
copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
|
||||||
|
resultOffset += chunk.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return &result
|
||||||
|
}
|
||||||
|
|
||||||
|
type parChunkSpec struct {
|
||||||
|
start uint32
|
||||||
|
end uint32
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
type parChunk struct {
|
||||||
|
ra *roaringArray64
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c parChunk) size() int {
|
||||||
|
return c.ra.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
func parNaiveStartAt(ra *roaringArray64, start uint32, last uint32) int {
|
||||||
|
for idx, key := range ra.keys {
|
||||||
|
if key >= start && key <= last {
|
||||||
|
return idx
|
||||||
|
} else if key > last {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ra.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
func orOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
|
||||||
|
answer := &roaringArray64{}
|
||||||
|
length1 := ra1.size()
|
||||||
|
length2 := ra2.size()
|
||||||
|
|
||||||
|
idx1 := parNaiveStartAt(ra1, start, last)
|
||||||
|
idx2 := parNaiveStartAt(ra2, start, last)
|
||||||
|
|
||||||
|
var key1 uint32
|
||||||
|
var key2 uint32
|
||||||
|
if idx1 < length1 && idx2 < length2 {
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
|
||||||
|
for key1 <= last && key2 <= last {
|
||||||
|
|
||||||
|
if key1 < key2 {
|
||||||
|
answer.appendCopy(*ra1, idx1)
|
||||||
|
idx1++
|
||||||
|
if idx1 == length1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
} else if key1 > key2 {
|
||||||
|
answer.appendCopy(*ra2, idx2)
|
||||||
|
idx2++
|
||||||
|
if idx2 == length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
} else {
|
||||||
|
c1 := ra1.getContainerAtIndex(idx1)
|
||||||
|
|
||||||
|
//answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||||||
|
answer.appendContainer(key1, roaring.Or(c1, ra2.getContainerAtIndex(idx2)), false)
|
||||||
|
idx1++
|
||||||
|
idx2++
|
||||||
|
if idx1 == length1 || idx2 == length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx2 < length2 {
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
for key2 <= last {
|
||||||
|
answer.appendCopy(*ra2, idx2)
|
||||||
|
idx2++
|
||||||
|
if idx2 == length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx1 < length1 {
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
for key1 <= last {
|
||||||
|
answer.appendCopy(*ra1, idx1)
|
||||||
|
idx1++
|
||||||
|
if idx1 == length1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
func iorOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
|
||||||
|
length1 := ra1.size()
|
||||||
|
length2 := ra2.size()
|
||||||
|
|
||||||
|
idx1 := 0
|
||||||
|
idx2 := parNaiveStartAt(ra2, start, last)
|
||||||
|
|
||||||
|
var key1 uint32
|
||||||
|
var key2 uint32
|
||||||
|
if idx1 < length1 && idx2 < length2 {
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
|
||||||
|
for key1 <= last && key2 <= last {
|
||||||
|
if key1 < key2 {
|
||||||
|
idx1++
|
||||||
|
if idx1 >= length1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
} else if key1 > key2 {
|
||||||
|
ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
|
||||||
|
ra1.needCopyOnWrite[idx1] = true
|
||||||
|
idx2++
|
||||||
|
idx1++
|
||||||
|
length1++
|
||||||
|
if idx2 >= length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
} else {
|
||||||
|
c1 := ra1.getWritableContainerAtIndex(idx1)
|
||||||
|
|
||||||
|
//ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||||||
|
c1.Or(ra2.getContainerAtIndex(idx2))
|
||||||
|
ra1.setContainerAtIndex(idx1, c1)
|
||||||
|
|
||||||
|
ra1.needCopyOnWrite[idx1] = false
|
||||||
|
idx1++
|
||||||
|
idx2++
|
||||||
|
if idx1 >= length1 || idx2 >= length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if idx2 < length2 {
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
for key2 <= last {
|
||||||
|
ra1.appendCopy(*ra2, idx2)
|
||||||
|
idx2++
|
||||||
|
if idx2 >= length2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ra1
|
||||||
|
}
|
||||||
1263
vendor/github.com/RoaringBitmap/roaring/roaring64/roaring64.go
generated
vendored
Normal file
1263
vendor/github.com/RoaringBitmap/roaring/roaring64/roaring64.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
403
vendor/github.com/RoaringBitmap/roaring/roaring64/roaringarray64.go
generated
vendored
Normal file
403
vendor/github.com/RoaringBitmap/roaring/roaring64/roaringarray64.go
generated
vendored
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
package roaring64
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/RoaringBitmap/roaring"
|
||||||
|
)
|
||||||
|
|
||||||
|
type roaringArray64 struct {
|
||||||
|
keys []uint32
|
||||||
|
containers []*roaring.Bitmap
|
||||||
|
needCopyOnWrite []bool
|
||||||
|
copyOnWrite bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// runOptimize compresses the element containers to minimize space consumed.
|
||||||
|
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
|
||||||
|
// A: since we aren't changing the logical content, just the representation,
|
||||||
|
//
|
||||||
|
// we don't bother to check the needCopyOnWrite bits. We replace
|
||||||
|
// (possibly all) elements of ra.containers in-place with space
|
||||||
|
// optimized versions.
|
||||||
|
func (ra *roaringArray64) runOptimize() {
|
||||||
|
for i := range ra.containers {
|
||||||
|
ra.containers[i].RunOptimize()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) appendContainer(key uint32, value *roaring.Bitmap, mustCopyOnWrite bool) {
|
||||||
|
ra.keys = append(ra.keys, key)
|
||||||
|
ra.containers = append(ra.containers, value)
|
||||||
|
ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) appendWithoutCopy(sa roaringArray64, startingindex int) {
|
||||||
|
mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
|
||||||
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) appendCopy(sa roaringArray64, startingindex int) {
|
||||||
|
// cow only if the two request it, or if we already have a lightweight copy
|
||||||
|
copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
|
||||||
|
if !copyonwrite {
|
||||||
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||||
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].Clone(), copyonwrite)
|
||||||
|
} else {
|
||||||
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].Clone(), copyonwrite)
|
||||||
|
if !sa.needsCopyOnWrite(startingindex) {
|
||||||
|
sa.setNeedsCopyOnWrite(startingindex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) appendWithoutCopyMany(sa roaringArray64, startingindex, end int) {
|
||||||
|
for i := startingindex; i < end; i++ {
|
||||||
|
ra.appendWithoutCopy(sa, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) appendCopyMany(sa roaringArray64, startingindex, end int) {
|
||||||
|
for i := startingindex; i < end; i++ {
|
||||||
|
ra.appendCopy(sa, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) appendCopiesUntil(sa roaringArray64, stoppingKey uint32) {
|
||||||
|
// cow only if the two request it, or if we already have a lightweight copy
|
||||||
|
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||||
|
|
||||||
|
for i := 0; i < sa.size(); i++ {
|
||||||
|
if sa.keys[i] >= stoppingKey {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||||
|
if thiscopyonewrite {
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||||
|
if !sa.needsCopyOnWrite(i) {
|
||||||
|
sa.setNeedsCopyOnWrite(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i].Clone(), thiscopyonewrite)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) appendCopiesAfter(sa roaringArray64, beforeStart uint32) {
|
||||||
|
// cow only if the two request it, or if we already have a lightweight copy
|
||||||
|
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||||
|
|
||||||
|
startLocation := sa.getIndex(beforeStart)
|
||||||
|
if startLocation >= 0 {
|
||||||
|
startLocation++
|
||||||
|
} else {
|
||||||
|
startLocation = -startLocation - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := startLocation; i < sa.size(); i++ {
|
||||||
|
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||||
|
if thiscopyonewrite {
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||||
|
if !sa.needsCopyOnWrite(i) {
|
||||||
|
sa.setNeedsCopyOnWrite(i)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i].Clone(), thiscopyonewrite)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) removeIndexRange(begin, end int) {
|
||||||
|
if end <= begin {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r := end - begin
|
||||||
|
|
||||||
|
copy(ra.keys[begin:], ra.keys[end:])
|
||||||
|
copy(ra.containers[begin:], ra.containers[end:])
|
||||||
|
copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
|
||||||
|
|
||||||
|
ra.resize(len(ra.keys) - r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) resize(newsize int) {
|
||||||
|
for k := newsize; k < len(ra.containers); k++ {
|
||||||
|
ra.keys[k] = 0
|
||||||
|
ra.needCopyOnWrite[k] = false
|
||||||
|
ra.containers[k] = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ra.keys = ra.keys[:newsize]
|
||||||
|
ra.containers = ra.containers[:newsize]
|
||||||
|
ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) clear() {
|
||||||
|
ra.resize(0)
|
||||||
|
ra.copyOnWrite = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) clone() *roaringArray64 {
|
||||||
|
|
||||||
|
sa := roaringArray64{}
|
||||||
|
sa.copyOnWrite = ra.copyOnWrite
|
||||||
|
|
||||||
|
// this is where copyOnWrite is used.
|
||||||
|
if ra.copyOnWrite {
|
||||||
|
sa.keys = make([]uint32, len(ra.keys))
|
||||||
|
copy(sa.keys, ra.keys)
|
||||||
|
sa.containers = make([]*roaring.Bitmap, len(ra.containers))
|
||||||
|
copy(sa.containers, ra.containers)
|
||||||
|
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||||
|
|
||||||
|
ra.markAllAsNeedingCopyOnWrite()
|
||||||
|
sa.markAllAsNeedingCopyOnWrite()
|
||||||
|
|
||||||
|
// sa.needCopyOnWrite is shared
|
||||||
|
} else {
|
||||||
|
// make a full copy
|
||||||
|
|
||||||
|
sa.keys = make([]uint32, len(ra.keys))
|
||||||
|
copy(sa.keys, ra.keys)
|
||||||
|
|
||||||
|
sa.containers = make([]*roaring.Bitmap, len(ra.containers))
|
||||||
|
for i := range sa.containers {
|
||||||
|
sa.containers[i] = ra.containers[i].Clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||||
|
}
|
||||||
|
return &sa
|
||||||
|
}
|
||||||
|
|
||||||
|
// clone all containers which have needCopyOnWrite set to true
|
||||||
|
// This can be used to make sure it is safe to munmap a []byte
|
||||||
|
// that the roaring array may still have a reference to.
|
||||||
|
func (ra *roaringArray64) cloneCopyOnWriteContainers() {
|
||||||
|
for i, needCopyOnWrite := range ra.needCopyOnWrite {
|
||||||
|
if needCopyOnWrite {
|
||||||
|
ra.containers[i] = ra.containers[i].Clone()
|
||||||
|
ra.needCopyOnWrite[i] = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unused function:
|
||||||
|
// func (ra *roaringArray64) containsKey(x uint32) bool {
|
||||||
|
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
||||||
|
// }
|
||||||
|
|
||||||
|
func (ra *roaringArray64) getContainer(x uint32) *roaring.Bitmap {
|
||||||
|
i := ra.binarySearch(0, int64(len(ra.keys)), x)
|
||||||
|
if i < 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ra.containers[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) getContainerAtIndex(i int) *roaring.Bitmap {
|
||||||
|
return ra.containers[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) getWritableContainerAtIndex(i int) *roaring.Bitmap {
|
||||||
|
if ra.needCopyOnWrite[i] {
|
||||||
|
ra.containers[i] = ra.containers[i].Clone()
|
||||||
|
ra.needCopyOnWrite[i] = false
|
||||||
|
}
|
||||||
|
return ra.containers[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) getIndex(x uint32) int {
|
||||||
|
// before the binary search, we optimize for frequent cases
|
||||||
|
size := len(ra.keys)
|
||||||
|
if (size == 0) || (ra.keys[size-1] == x) {
|
||||||
|
return size - 1
|
||||||
|
}
|
||||||
|
return ra.binarySearch(0, int64(size), x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) getKeyAtIndex(i int) uint32 {
|
||||||
|
return ra.keys[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) insertNewKeyValueAt(i int, key uint32, value *roaring.Bitmap) {
|
||||||
|
ra.keys = append(ra.keys, 0)
|
||||||
|
ra.containers = append(ra.containers, nil)
|
||||||
|
|
||||||
|
copy(ra.keys[i+1:], ra.keys[i:])
|
||||||
|
copy(ra.containers[i+1:], ra.containers[i:])
|
||||||
|
|
||||||
|
ra.keys[i] = key
|
||||||
|
ra.containers[i] = value
|
||||||
|
|
||||||
|
ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
|
||||||
|
copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
|
||||||
|
ra.needCopyOnWrite[i] = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) remove(key uint32) bool {
|
||||||
|
i := ra.binarySearch(0, int64(len(ra.keys)), key)
|
||||||
|
if i >= 0 { // if a new key
|
||||||
|
ra.removeAtIndex(i)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) removeAtIndex(i int) {
|
||||||
|
copy(ra.keys[i:], ra.keys[i+1:])
|
||||||
|
copy(ra.containers[i:], ra.containers[i+1:])
|
||||||
|
|
||||||
|
copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
|
||||||
|
|
||||||
|
ra.resize(len(ra.keys) - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) setContainerAtIndex(i int, c *roaring.Bitmap) {
|
||||||
|
ra.containers[i] = c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) replaceKeyAndContainerAtIndex(i int, key uint32, c *roaring.Bitmap, mustCopyOnWrite bool) {
|
||||||
|
ra.keys[i] = key
|
||||||
|
ra.containers[i] = c
|
||||||
|
ra.needCopyOnWrite[i] = mustCopyOnWrite
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) size() int {
|
||||||
|
return len(ra.keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) binarySearch(begin, end int64, ikey uint32) int {
|
||||||
|
low := begin
|
||||||
|
high := end - 1
|
||||||
|
for low+16 <= high {
|
||||||
|
middleIndex := low + (high-low)/2 // avoid overflow
|
||||||
|
middleValue := ra.keys[middleIndex]
|
||||||
|
|
||||||
|
if middleValue < ikey {
|
||||||
|
low = middleIndex + 1
|
||||||
|
} else if middleValue > ikey {
|
||||||
|
high = middleIndex - 1
|
||||||
|
} else {
|
||||||
|
return int(middleIndex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for ; low <= high; low++ {
|
||||||
|
val := ra.keys[low]
|
||||||
|
if val >= ikey {
|
||||||
|
if val == ikey {
|
||||||
|
return int(low)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -int(low + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) equals(o interface{}) bool {
|
||||||
|
srb, ok := o.(roaringArray64)
|
||||||
|
if ok {
|
||||||
|
|
||||||
|
if srb.size() != ra.size() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, k := range ra.keys {
|
||||||
|
if k != srb.keys[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, c := range ra.containers {
|
||||||
|
if !c.Equals(srb.containers[i]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) hasRunCompression() bool {
|
||||||
|
for _, c := range ra.containers {
|
||||||
|
if c.HasRunCompression() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) advanceUntil(min uint32, pos int) int {
|
||||||
|
lower := pos + 1
|
||||||
|
|
||||||
|
if lower >= len(ra.keys) || ra.keys[lower] >= min {
|
||||||
|
return lower
|
||||||
|
}
|
||||||
|
|
||||||
|
spansize := 1
|
||||||
|
|
||||||
|
for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
|
||||||
|
spansize *= 2
|
||||||
|
}
|
||||||
|
var upper int
|
||||||
|
if lower+spansize < len(ra.keys) {
|
||||||
|
upper = lower + spansize
|
||||||
|
} else {
|
||||||
|
upper = len(ra.keys) - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if ra.keys[upper] == min {
|
||||||
|
return upper
|
||||||
|
}
|
||||||
|
|
||||||
|
if ra.keys[upper] < min {
|
||||||
|
// means
|
||||||
|
// array
|
||||||
|
// has no
|
||||||
|
// item
|
||||||
|
// >= min
|
||||||
|
// pos = array.length;
|
||||||
|
return len(ra.keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
// we know that the next-smallest span was too small
|
||||||
|
lower += (spansize >> 1)
|
||||||
|
|
||||||
|
mid := 0
|
||||||
|
for lower+1 != upper {
|
||||||
|
mid = (lower + upper) >> 1
|
||||||
|
if ra.keys[mid] == min {
|
||||||
|
return mid
|
||||||
|
} else if ra.keys[mid] < min {
|
||||||
|
lower = mid
|
||||||
|
} else {
|
||||||
|
upper = mid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return upper
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) markAllAsNeedingCopyOnWrite() {
|
||||||
|
for i := range ra.needCopyOnWrite {
|
||||||
|
ra.needCopyOnWrite[i] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) needsCopyOnWrite(i int) bool {
|
||||||
|
return ra.needCopyOnWrite[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray64) setNeedsCopyOnWrite(i int) {
|
||||||
|
ra.needCopyOnWrite[i] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// should be dirt cheap
|
||||||
|
func (ra *roaringArray64) serializedSizeInBytes() uint64 {
|
||||||
|
answer := uint64(8)
|
||||||
|
for _, c := range ra.containers {
|
||||||
|
answer += 4
|
||||||
|
answer += c.GetSerializedSizeInBytes()
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
49
vendor/github.com/RoaringBitmap/roaring/roaring64/util.go
generated
vendored
Normal file
49
vendor/github.com/RoaringBitmap/roaring/roaring64/util.go
generated
vendored
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
package roaring64
|
||||||
|
|
||||||
|
import "github.com/RoaringBitmap/roaring"
|
||||||
|
|
||||||
|
func highbits(x uint64) uint32 {
|
||||||
|
return uint32(x >> 32)
|
||||||
|
}
|
||||||
|
|
||||||
|
func lowbits(x uint64) uint32 {
|
||||||
|
return uint32(x & maxLowBit)
|
||||||
|
}
|
||||||
|
|
||||||
|
const maxLowBit = roaring.MaxUint32
|
||||||
|
const maxUint32 = roaring.MaxUint32
|
||||||
|
|
||||||
|
func minOfInt64(a, b int64) int64 {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func minOfInt(a, b int) int {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func maxOfInt(a, b int) int {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func maxOfUint32(a, b uint32) uint32 {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func minOfUint32(a, b uint32) uint32 {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
761
vendor/github.com/RoaringBitmap/roaring/roaringarray.go
generated
vendored
Normal file
761
vendor/github.com/RoaringBitmap/roaring/roaringarray.go
generated
vendored
Normal file
@@ -0,0 +1,761 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/RoaringBitmap/roaring/internal"
|
||||||
|
)
|
||||||
|
|
||||||
|
type container interface {
|
||||||
|
// addOffset returns the (low, high) parts of the shifted container.
|
||||||
|
// Whenever one of them would be empty, nil will be returned instead to
|
||||||
|
// avoid unnecessary allocations.
|
||||||
|
addOffset(uint16) (container, container)
|
||||||
|
|
||||||
|
clone() container
|
||||||
|
and(container) container
|
||||||
|
andCardinality(container) int
|
||||||
|
iand(container) container // i stands for inplace
|
||||||
|
andNot(container) container
|
||||||
|
iandNot(container) container // i stands for inplace
|
||||||
|
isEmpty() bool
|
||||||
|
getCardinality() int
|
||||||
|
// rank returns the number of integers that are
|
||||||
|
// smaller or equal to x. rank(infinity) would be getCardinality().
|
||||||
|
rank(uint16) int
|
||||||
|
|
||||||
|
iadd(x uint16) bool // inplace, returns true if x was new.
|
||||||
|
iaddReturnMinimized(uint16) container // may change return type to minimize storage.
|
||||||
|
|
||||||
|
//addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
||||||
|
iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||||
|
|
||||||
|
iremove(x uint16) bool // inplace, returns true if x was present.
|
||||||
|
iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
|
||||||
|
|
||||||
|
not(start, final int) container // range is [firstOfRange,lastOfRange)
|
||||||
|
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||||
|
xor(r container) container
|
||||||
|
getShortIterator() shortPeekable
|
||||||
|
iterate(cb func(x uint16) bool) bool
|
||||||
|
getReverseIterator() shortIterable
|
||||||
|
getManyIterator() manyIterable
|
||||||
|
contains(i uint16) bool
|
||||||
|
maximum() uint16
|
||||||
|
minimum() uint16
|
||||||
|
|
||||||
|
// equals is now logical equals; it does not require the
|
||||||
|
// same underlying container types, but compares across
|
||||||
|
// any of the implementations.
|
||||||
|
equals(r container) bool
|
||||||
|
|
||||||
|
fillLeastSignificant16bits(array []uint32, i int, mask uint32) int
|
||||||
|
or(r container) container
|
||||||
|
orCardinality(r container) int
|
||||||
|
isFull() bool
|
||||||
|
ior(r container) container // i stands for inplace
|
||||||
|
intersects(r container) bool // whether the two containers intersect
|
||||||
|
lazyOR(r container) container
|
||||||
|
lazyIOR(r container) container
|
||||||
|
getSizeInBytes() int
|
||||||
|
//removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
||||||
|
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
|
||||||
|
selectInt(x uint16) int // selectInt returns the xth integer in the container
|
||||||
|
serializedSizeInBytes() int
|
||||||
|
writeTo(io.Writer) (int, error)
|
||||||
|
|
||||||
|
numberOfRuns() int
|
||||||
|
toEfficientContainer() container
|
||||||
|
String() string
|
||||||
|
containerType() contype
|
||||||
|
}
|
||||||
|
|
||||||
|
type contype uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
bitmapContype contype = iota
|
||||||
|
arrayContype
|
||||||
|
run16Contype
|
||||||
|
run32Contype
|
||||||
|
)
|
||||||
|
|
||||||
|
// careful: range is [firstOfRange,lastOfRange]
|
||||||
|
func rangeOfOnes(start, last int) container {
|
||||||
|
if start > MaxUint16 {
|
||||||
|
panic("rangeOfOnes called with start > MaxUint16")
|
||||||
|
}
|
||||||
|
if last > MaxUint16 {
|
||||||
|
panic("rangeOfOnes called with last > MaxUint16")
|
||||||
|
}
|
||||||
|
if start < 0 {
|
||||||
|
panic("rangeOfOnes called with start < 0")
|
||||||
|
}
|
||||||
|
if last < 0 {
|
||||||
|
panic("rangeOfOnes called with last < 0")
|
||||||
|
}
|
||||||
|
return newRunContainer16Range(uint16(start), uint16(last))
|
||||||
|
}
|
||||||
|
|
||||||
|
type roaringArray struct {
|
||||||
|
keys []uint16
|
||||||
|
containers []container `msg:"-"` // don't try to serialize directly.
|
||||||
|
needCopyOnWrite []bool
|
||||||
|
copyOnWrite bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func newRoaringArray() *roaringArray {
|
||||||
|
return &roaringArray{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// runOptimize compresses the element containers to minimize space consumed.
|
||||||
|
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
|
||||||
|
// A: since we aren't changing the logical content, just the representation,
|
||||||
|
//
|
||||||
|
// we don't bother to check the needCopyOnWrite bits. We replace
|
||||||
|
// (possibly all) elements of ra.containers in-place with space
|
||||||
|
// optimized versions.
|
||||||
|
func (ra *roaringArray) runOptimize() {
|
||||||
|
for i := range ra.containers {
|
||||||
|
ra.containers[i] = ra.containers[i].toEfficientContainer()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
|
||||||
|
ra.keys = append(ra.keys, key)
|
||||||
|
ra.containers = append(ra.containers, value)
|
||||||
|
ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
|
||||||
|
mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
|
||||||
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
|
||||||
|
// cow only if the two request it, or if we already have a lightweight copy
|
||||||
|
copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
|
||||||
|
if !copyonwrite {
|
||||||
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||||
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
|
||||||
|
} else {
|
||||||
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
|
||||||
|
if !sa.needsCopyOnWrite(startingindex) {
|
||||||
|
sa.setNeedsCopyOnWrite(startingindex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
|
||||||
|
for i := startingindex; i < end; i++ {
|
||||||
|
ra.appendWithoutCopy(sa, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
|
||||||
|
for i := startingindex; i < end; i++ {
|
||||||
|
ra.appendCopy(sa, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
|
||||||
|
// cow only if the two request it, or if we already have a lightweight copy
|
||||||
|
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||||
|
|
||||||
|
for i := 0; i < sa.size(); i++ {
|
||||||
|
if sa.keys[i] >= stoppingKey {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||||
|
if thiscopyonewrite {
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||||
|
if !sa.needsCopyOnWrite(i) {
|
||||||
|
sa.setNeedsCopyOnWrite(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
|
||||||
|
// cow only if the two request it, or if we already have a lightweight copy
|
||||||
|
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||||
|
|
||||||
|
startLocation := sa.getIndex(beforeStart)
|
||||||
|
if startLocation >= 0 {
|
||||||
|
startLocation++
|
||||||
|
} else {
|
||||||
|
startLocation = -startLocation - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := startLocation; i < sa.size(); i++ {
|
||||||
|
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||||
|
if thiscopyonewrite {
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||||
|
if !sa.needsCopyOnWrite(i) {
|
||||||
|
sa.setNeedsCopyOnWrite(i)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||||
|
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) removeIndexRange(begin, end int) {
|
||||||
|
if end <= begin {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r := end - begin
|
||||||
|
|
||||||
|
copy(ra.keys[begin:], ra.keys[end:])
|
||||||
|
copy(ra.containers[begin:], ra.containers[end:])
|
||||||
|
copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
|
||||||
|
|
||||||
|
ra.resize(len(ra.keys) - r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) resize(newsize int) {
|
||||||
|
for k := newsize; k < len(ra.containers); k++ {
|
||||||
|
ra.containers[k] = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ra.keys = ra.keys[:newsize]
|
||||||
|
ra.containers = ra.containers[:newsize]
|
||||||
|
ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) clear() {
|
||||||
|
ra.resize(0)
|
||||||
|
ra.copyOnWrite = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) clone() *roaringArray {
|
||||||
|
|
||||||
|
sa := roaringArray{}
|
||||||
|
sa.copyOnWrite = ra.copyOnWrite
|
||||||
|
|
||||||
|
// this is where copyOnWrite is used.
|
||||||
|
if ra.copyOnWrite {
|
||||||
|
sa.keys = make([]uint16, len(ra.keys))
|
||||||
|
copy(sa.keys, ra.keys)
|
||||||
|
sa.containers = make([]container, len(ra.containers))
|
||||||
|
copy(sa.containers, ra.containers)
|
||||||
|
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||||
|
|
||||||
|
ra.markAllAsNeedingCopyOnWrite()
|
||||||
|
sa.markAllAsNeedingCopyOnWrite()
|
||||||
|
|
||||||
|
// sa.needCopyOnWrite is shared
|
||||||
|
} else {
|
||||||
|
// make a full copy
|
||||||
|
|
||||||
|
sa.keys = make([]uint16, len(ra.keys))
|
||||||
|
copy(sa.keys, ra.keys)
|
||||||
|
|
||||||
|
sa.containers = make([]container, len(ra.containers))
|
||||||
|
for i := range sa.containers {
|
||||||
|
sa.containers[i] = ra.containers[i].clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||||
|
}
|
||||||
|
return &sa
|
||||||
|
}
|
||||||
|
|
||||||
|
// clone all containers which have needCopyOnWrite set to true
|
||||||
|
// This can be used to make sure it is safe to munmap a []byte
|
||||||
|
// that the roaring array may still have a reference to.
|
||||||
|
func (ra *roaringArray) cloneCopyOnWriteContainers() {
|
||||||
|
for i, needCopyOnWrite := range ra.needCopyOnWrite {
|
||||||
|
if needCopyOnWrite {
|
||||||
|
ra.containers[i] = ra.containers[i].clone()
|
||||||
|
ra.needCopyOnWrite[i] = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unused function:
|
||||||
|
//func (ra *roaringArray) containsKey(x uint16) bool {
|
||||||
|
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
||||||
|
//}
|
||||||
|
|
||||||
|
func (ra *roaringArray) getContainer(x uint16) container {
|
||||||
|
i := ra.binarySearch(0, int64(len(ra.keys)), x)
|
||||||
|
if i < 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ra.containers[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) getContainerAtIndex(i int) container {
|
||||||
|
return ra.containers[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
|
||||||
|
c := ra.getContainerAtIndex(i)
|
||||||
|
switch t := c.(type) {
|
||||||
|
case *arrayContainer:
|
||||||
|
c = t.toBitmapContainer()
|
||||||
|
case *runContainer16:
|
||||||
|
if !t.isFull() {
|
||||||
|
c = t.toBitmapContainer()
|
||||||
|
}
|
||||||
|
case *bitmapContainer:
|
||||||
|
if needsWriteable && ra.needCopyOnWrite[i] {
|
||||||
|
c = ra.containers[i].clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
// getUnionedWritableContainer switches behavior for in-place Or
|
||||||
|
// depending on whether the container requires a copy on write.
|
||||||
|
// If it does using the non-inplace or() method leads to fewer allocations.
|
||||||
|
func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) container {
|
||||||
|
if ra.needCopyOnWrite[pos] {
|
||||||
|
return ra.getContainerAtIndex(pos).or(other)
|
||||||
|
}
|
||||||
|
return ra.getContainerAtIndex(pos).ior(other)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
|
||||||
|
if ra.needCopyOnWrite[i] {
|
||||||
|
ra.containers[i] = ra.containers[i].clone()
|
||||||
|
ra.needCopyOnWrite[i] = false
|
||||||
|
}
|
||||||
|
return ra.containers[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) getIndex(x uint16) int {
|
||||||
|
// before the binary search, we optimize for frequent cases
|
||||||
|
size := len(ra.keys)
|
||||||
|
if (size == 0) || (ra.keys[size-1] == x) {
|
||||||
|
return size - 1
|
||||||
|
}
|
||||||
|
return ra.binarySearch(0, int64(size), x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
|
||||||
|
return ra.keys[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
|
||||||
|
ra.keys = append(ra.keys, 0)
|
||||||
|
ra.containers = append(ra.containers, nil)
|
||||||
|
|
||||||
|
copy(ra.keys[i+1:], ra.keys[i:])
|
||||||
|
copy(ra.containers[i+1:], ra.containers[i:])
|
||||||
|
|
||||||
|
ra.keys[i] = key
|
||||||
|
ra.containers[i] = value
|
||||||
|
|
||||||
|
ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
|
||||||
|
copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
|
||||||
|
ra.needCopyOnWrite[i] = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) remove(key uint16) bool {
|
||||||
|
i := ra.binarySearch(0, int64(len(ra.keys)), key)
|
||||||
|
if i >= 0 { // if a new key
|
||||||
|
ra.removeAtIndex(i)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) removeAtIndex(i int) {
|
||||||
|
copy(ra.keys[i:], ra.keys[i+1:])
|
||||||
|
copy(ra.containers[i:], ra.containers[i+1:])
|
||||||
|
|
||||||
|
copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
|
||||||
|
|
||||||
|
ra.resize(len(ra.keys) - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) setContainerAtIndex(i int, c container) {
|
||||||
|
ra.containers[i] = c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
|
||||||
|
ra.keys[i] = key
|
||||||
|
ra.containers[i] = c
|
||||||
|
ra.needCopyOnWrite[i] = mustCopyOnWrite
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) size() int {
|
||||||
|
return len(ra.keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
|
||||||
|
low := begin
|
||||||
|
high := end - 1
|
||||||
|
for low+16 <= high {
|
||||||
|
middleIndex := low + (high-low)/2 // avoid overflow
|
||||||
|
middleValue := ra.keys[middleIndex]
|
||||||
|
|
||||||
|
if middleValue < ikey {
|
||||||
|
low = middleIndex + 1
|
||||||
|
} else if middleValue > ikey {
|
||||||
|
high = middleIndex - 1
|
||||||
|
} else {
|
||||||
|
return int(middleIndex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for ; low <= high; low++ {
|
||||||
|
val := ra.keys[low]
|
||||||
|
if val >= ikey {
|
||||||
|
if val == ikey {
|
||||||
|
return int(low)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -int(low + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) equals(o interface{}) bool {
|
||||||
|
srb, ok := o.(roaringArray)
|
||||||
|
if ok {
|
||||||
|
|
||||||
|
if srb.size() != ra.size() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, k := range ra.keys {
|
||||||
|
if k != srb.keys[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, c := range ra.containers {
|
||||||
|
if !c.equals(srb.containers[i]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) headerSize() uint64 {
|
||||||
|
size := uint64(len(ra.keys))
|
||||||
|
if ra.hasRunCompression() {
|
||||||
|
if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
|
||||||
|
return 4 + (size+7)/8 + 4*size
|
||||||
|
}
|
||||||
|
return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
|
||||||
|
}
|
||||||
|
return 4 + 4 + 8*size
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// should be dirt cheap
|
||||||
|
func (ra *roaringArray) serializedSizeInBytes() uint64 {
|
||||||
|
answer := ra.headerSize()
|
||||||
|
for _, c := range ra.containers {
|
||||||
|
answer += uint64(c.serializedSizeInBytes())
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||||
|
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
|
||||||
|
hasRun := ra.hasRunCompression()
|
||||||
|
isRunSizeInBytes := 0
|
||||||
|
cookieSize := 8
|
||||||
|
if hasRun {
|
||||||
|
cookieSize = 4
|
||||||
|
isRunSizeInBytes = (len(ra.keys) + 7) / 8
|
||||||
|
}
|
||||||
|
descriptiveHeaderSize := 4 * len(ra.keys)
|
||||||
|
preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
|
||||||
|
|
||||||
|
buf := make([]byte, preambleSize+4*len(ra.keys))
|
||||||
|
|
||||||
|
nw := 0
|
||||||
|
|
||||||
|
if hasRun {
|
||||||
|
binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
|
||||||
|
nw += 2
|
||||||
|
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
|
||||||
|
nw += 2
|
||||||
|
// compute isRun bitmap without temporary allocation
|
||||||
|
var runbitmapslice = buf[nw : nw+isRunSizeInBytes]
|
||||||
|
for i, c := range ra.containers {
|
||||||
|
switch c.(type) {
|
||||||
|
case *runContainer16:
|
||||||
|
runbitmapslice[i/8] |= 1 << (uint(i) % 8)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nw += isRunSizeInBytes
|
||||||
|
} else {
|
||||||
|
binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
|
||||||
|
nw += 4
|
||||||
|
binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
|
||||||
|
nw += 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// descriptive header
|
||||||
|
for i, key := range ra.keys {
|
||||||
|
binary.LittleEndian.PutUint16(buf[nw:], key)
|
||||||
|
nw += 2
|
||||||
|
c := ra.containers[i]
|
||||||
|
binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
|
||||||
|
nw += 2
|
||||||
|
}
|
||||||
|
|
||||||
|
startOffset := int64(preambleSize + 4*len(ra.keys))
|
||||||
|
if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
|
||||||
|
// offset header
|
||||||
|
for _, c := range ra.containers {
|
||||||
|
binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
|
||||||
|
nw += 4
|
||||||
|
switch rc := c.(type) {
|
||||||
|
case *runContainer16:
|
||||||
|
startOffset += 2 + int64(len(rc.iv))*4
|
||||||
|
default:
|
||||||
|
startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
written, err := w.Write(buf[:nw])
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += int64(written)
|
||||||
|
|
||||||
|
for _, c := range ra.containers {
|
||||||
|
written, err := c.writeTo(w)
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += int64(written)
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||||
|
func (ra *roaringArray) toBytes() ([]byte, error) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
_, err := ra.writeTo(&buf)
|
||||||
|
return buf.Bytes(), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads a serialized roaringArray from a byte slice.
|
||||||
|
func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) {
|
||||||
|
var cookie uint32
|
||||||
|
var err error
|
||||||
|
if len(cookieHeader) > 0 && len(cookieHeader) != 4 {
|
||||||
|
return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header")
|
||||||
|
}
|
||||||
|
if len(cookieHeader) == 4 {
|
||||||
|
cookie = binary.LittleEndian.Uint32(cookieHeader)
|
||||||
|
} else {
|
||||||
|
cookie, err = stream.ReadUInt32()
|
||||||
|
if err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If NextReturnsSafeSlice is false, then willNeedCopyOnWrite should be true
|
||||||
|
willNeedCopyOnWrite := !stream.NextReturnsSafeSlice()
|
||||||
|
|
||||||
|
var size uint32
|
||||||
|
var isRunBitmap []byte
|
||||||
|
|
||||||
|
if cookie&0x0000FFFF == serialCookie {
|
||||||
|
size = uint32(cookie>>16 + 1)
|
||||||
|
// create is-run-container bitmap
|
||||||
|
isRunBitmapSize := (int(size) + 7) / 8
|
||||||
|
isRunBitmap, err = stream.Next(isRunBitmapSize)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
|
||||||
|
}
|
||||||
|
} else if cookie == serialCookieNoRunContainer {
|
||||||
|
size, err = stream.ReadUInt32()
|
||||||
|
if err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
||||||
|
}
|
||||||
|
|
||||||
|
if size > (1 << 16) {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
|
||||||
|
}
|
||||||
|
|
||||||
|
// descriptive header
|
||||||
|
buf, err := stream.Next(2 * 2 * int(size))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
keycard := byteSliceAsUint16Slice(buf)
|
||||||
|
|
||||||
|
if isRunBitmap == nil || size >= noOffsetThreshold {
|
||||||
|
if err := stream.SkipBytes(int(size) * 4); err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate slices upfront as number of containers is known
|
||||||
|
if cap(ra.containers) >= int(size) {
|
||||||
|
ra.containers = ra.containers[:size]
|
||||||
|
} else {
|
||||||
|
ra.containers = make([]container, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cap(ra.keys) >= int(size) {
|
||||||
|
ra.keys = ra.keys[:size]
|
||||||
|
} else {
|
||||||
|
ra.keys = make([]uint16, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cap(ra.needCopyOnWrite) >= int(size) {
|
||||||
|
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
|
||||||
|
} else {
|
||||||
|
ra.needCopyOnWrite = make([]bool, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := uint32(0); i < size; i++ {
|
||||||
|
key := keycard[2*i]
|
||||||
|
card := int(keycard[2*i+1]) + 1
|
||||||
|
ra.keys[i] = key
|
||||||
|
ra.needCopyOnWrite[i] = willNeedCopyOnWrite
|
||||||
|
|
||||||
|
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
|
||||||
|
// run container
|
||||||
|
nr, err := stream.ReadUInt16()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
buf, err := stream.Next(int(nr) * 4)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nb := runContainer16{
|
||||||
|
iv: byteSliceAsInterval16Slice(buf),
|
||||||
|
}
|
||||||
|
|
||||||
|
ra.containers[i] = &nb
|
||||||
|
} else if card > arrayDefaultMaxSize {
|
||||||
|
// bitmap container
|
||||||
|
buf, err := stream.Next(arrayDefaultMaxSize * 2)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nb := bitmapContainer{
|
||||||
|
cardinality: card,
|
||||||
|
bitmap: byteSliceAsUint64Slice(buf),
|
||||||
|
}
|
||||||
|
|
||||||
|
ra.containers[i] = &nb
|
||||||
|
} else {
|
||||||
|
// array container
|
||||||
|
buf, err := stream.Next(card * 2)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return stream.GetReadBytes(), fmt.Errorf("failed to read array container: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nb := arrayContainer{
|
||||||
|
byteSliceAsUint16Slice(buf),
|
||||||
|
}
|
||||||
|
|
||||||
|
ra.containers[i] = &nb
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stream.GetReadBytes(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) hasRunCompression() bool {
|
||||||
|
for _, c := range ra.containers {
|
||||||
|
switch c.(type) {
|
||||||
|
case *runContainer16:
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
|
||||||
|
lower := pos + 1
|
||||||
|
|
||||||
|
if lower >= len(ra.keys) || ra.keys[lower] >= min {
|
||||||
|
return lower
|
||||||
|
}
|
||||||
|
|
||||||
|
spansize := 1
|
||||||
|
|
||||||
|
for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
|
||||||
|
spansize *= 2
|
||||||
|
}
|
||||||
|
var upper int
|
||||||
|
if lower+spansize < len(ra.keys) {
|
||||||
|
upper = lower + spansize
|
||||||
|
} else {
|
||||||
|
upper = len(ra.keys) - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if ra.keys[upper] == min {
|
||||||
|
return upper
|
||||||
|
}
|
||||||
|
|
||||||
|
if ra.keys[upper] < min {
|
||||||
|
// means
|
||||||
|
// array
|
||||||
|
// has no
|
||||||
|
// item
|
||||||
|
// >= min
|
||||||
|
// pos = array.length;
|
||||||
|
return len(ra.keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
// we know that the next-smallest span was too small
|
||||||
|
lower += (spansize >> 1)
|
||||||
|
|
||||||
|
mid := 0
|
||||||
|
for lower+1 != upper {
|
||||||
|
mid = (lower + upper) >> 1
|
||||||
|
if ra.keys[mid] == min {
|
||||||
|
return mid
|
||||||
|
} else if ra.keys[mid] < min {
|
||||||
|
lower = mid
|
||||||
|
} else {
|
||||||
|
upper = mid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return upper
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
|
||||||
|
for i := range ra.needCopyOnWrite {
|
||||||
|
ra.needCopyOnWrite[i] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) needsCopyOnWrite(i int) bool {
|
||||||
|
return ra.needCopyOnWrite[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
|
||||||
|
ra.needCopyOnWrite[i] = true
|
||||||
|
}
|
||||||
2624
vendor/github.com/RoaringBitmap/roaring/runcontainer.go
generated
vendored
Normal file
2624
vendor/github.com/RoaringBitmap/roaring/runcontainer.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18
vendor/github.com/RoaringBitmap/roaring/serialization.go
generated
vendored
Normal file
18
vendor/github.com/RoaringBitmap/roaring/serialization.go
generated
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
// writeTo for runContainer16 follows this
|
||||||
|
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||||
|
func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
|
||||||
|
buf := make([]byte, 2+4*len(b.iv))
|
||||||
|
binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv)))
|
||||||
|
for i, v := range b.iv {
|
||||||
|
binary.LittleEndian.PutUint16(buf[2+i*4:], v.start)
|
||||||
|
binary.LittleEndian.PutUint16(buf[2+2+i*4:], v.length)
|
||||||
|
}
|
||||||
|
return stream.Write(buf)
|
||||||
|
}
|
||||||
145
vendor/github.com/RoaringBitmap/roaring/serialization_generic.go
generated
vendored
Normal file
145
vendor/github.com/RoaringBitmap/roaring/serialization_generic.go
generated
vendored
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
//go:build (!amd64 && !386 && !arm && !arm64 && !ppc64le && !mipsle && !mips64le && !mips64p32le && !wasm) || appengine
|
||||||
|
// +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (b *arrayContainer) writeTo(stream io.Writer) (int, error) {
|
||||||
|
buf := make([]byte, 2*len(b.content))
|
||||||
|
for i, v := range b.content {
|
||||||
|
base := i * 2
|
||||||
|
buf[base] = byte(v)
|
||||||
|
buf[base+1] = byte(v >> 8)
|
||||||
|
}
|
||||||
|
return stream.Write(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
|
||||||
|
err := binary.Read(stream, binary.LittleEndian, b.content)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return 2 * len(b.content), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||||
|
if b.cardinality <= arrayDefaultMaxSize {
|
||||||
|
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write set
|
||||||
|
buf := make([]byte, 8*len(b.bitmap))
|
||||||
|
for i, v := range b.bitmap {
|
||||||
|
base := i * 8
|
||||||
|
buf[base] = byte(v)
|
||||||
|
buf[base+1] = byte(v >> 8)
|
||||||
|
buf[base+2] = byte(v >> 16)
|
||||||
|
buf[base+3] = byte(v >> 24)
|
||||||
|
buf[base+4] = byte(v >> 32)
|
||||||
|
buf[base+5] = byte(v >> 40)
|
||||||
|
buf[base+6] = byte(v >> 48)
|
||||||
|
buf[base+7] = byte(v >> 56)
|
||||||
|
}
|
||||||
|
return stream.Write(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) {
|
||||||
|
err := binary.Read(stream, binary.LittleEndian, b.bitmap)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
b.computeCardinality()
|
||||||
|
return 8 * len(b.bitmap), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||||
|
by := make([]byte, len(bc.bitmap)*8)
|
||||||
|
for i := range bc.bitmap {
|
||||||
|
binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i])
|
||||||
|
}
|
||||||
|
return by
|
||||||
|
}
|
||||||
|
|
||||||
|
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||||
|
by := make([]byte, len(slice)*8)
|
||||||
|
|
||||||
|
for i, v := range slice {
|
||||||
|
binary.LittleEndian.PutUint64(by[i*8:], v)
|
||||||
|
}
|
||||||
|
|
||||||
|
return by
|
||||||
|
}
|
||||||
|
|
||||||
|
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||||
|
by := make([]byte, len(slice)*2)
|
||||||
|
|
||||||
|
for i, v := range slice {
|
||||||
|
binary.LittleEndian.PutUint16(by[i*2:], v)
|
||||||
|
}
|
||||||
|
|
||||||
|
return by
|
||||||
|
}
|
||||||
|
|
||||||
|
func interval16SliceAsByteSlice(slice []interval16) []byte {
|
||||||
|
by := make([]byte, len(slice)*4)
|
||||||
|
|
||||||
|
for i, v := range slice {
|
||||||
|
binary.LittleEndian.PutUint16(by[i*2:], v.start)
|
||||||
|
binary.LittleEndian.PutUint16(by[i*2+2:], v.length)
|
||||||
|
}
|
||||||
|
|
||||||
|
return by
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsUint16Slice(slice []byte) []uint16 {
|
||||||
|
if len(slice)%2 != 0 {
|
||||||
|
panic("Slice size should be divisible by 2")
|
||||||
|
}
|
||||||
|
|
||||||
|
b := make([]uint16, len(slice)/2)
|
||||||
|
|
||||||
|
for i := range b {
|
||||||
|
b[i] = binary.LittleEndian.Uint16(slice[2*i:])
|
||||||
|
}
|
||||||
|
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsUint64Slice(slice []byte) []uint64 {
|
||||||
|
if len(slice)%8 != 0 {
|
||||||
|
panic("Slice size should be divisible by 8")
|
||||||
|
}
|
||||||
|
|
||||||
|
b := make([]uint64, len(slice)/8)
|
||||||
|
|
||||||
|
for i := range b {
|
||||||
|
b[i] = binary.LittleEndian.Uint64(slice[8*i:])
|
||||||
|
}
|
||||||
|
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// Converts a byte slice to a interval16 slice.
|
||||||
|
// The function assumes that the slice byte buffer is run container data
|
||||||
|
// encoded according to Roaring Format Spec
|
||||||
|
func byteSliceAsInterval16Slice(byteSlice []byte) []interval16 {
|
||||||
|
if len(byteSlice)%4 != 0 {
|
||||||
|
panic("Slice size should be divisible by 4")
|
||||||
|
}
|
||||||
|
|
||||||
|
intervalSlice := make([]interval16, len(byteSlice)/4)
|
||||||
|
|
||||||
|
for i := range intervalSlice {
|
||||||
|
intervalSlice[i] = interval16{
|
||||||
|
start: binary.LittleEndian.Uint16(byteSlice[i*4:]),
|
||||||
|
length: binary.LittleEndian.Uint16(byteSlice[i*4+2:]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return intervalSlice
|
||||||
|
}
|
||||||
662
vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
generated
vendored
Normal file
662
vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
generated
vendored
Normal file
@@ -0,0 +1,662 @@
|
|||||||
|
//go:build (386 && !appengine) || (amd64 && !appengine) || (arm && !appengine) || (arm64 && !appengine) || (ppc64le && !appengine) || (mipsle && !appengine) || (mips64le && !appengine) || (mips64p32le && !appengine) || (wasm && !appengine)
|
||||||
|
// +build 386,!appengine amd64,!appengine arm,!appengine arm64,!appengine ppc64le,!appengine mipsle,!appengine mips64le,!appengine mips64p32le,!appengine wasm,!appengine
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"reflect"
|
||||||
|
"runtime"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
|
||||||
|
buf := uint16SliceAsByteSlice(ac.content)
|
||||||
|
return stream.Write(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||||
|
if bc.cardinality <= arrayDefaultMaxSize {
|
||||||
|
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
|
||||||
|
}
|
||||||
|
buf := uint64SliceAsByteSlice(bc.bitmap)
|
||||||
|
return stream.Write(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||||
|
// make a new slice header
|
||||||
|
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
|
||||||
|
// update its capacity and length
|
||||||
|
header.Len *= 8
|
||||||
|
header.Cap *= 8
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||||
|
runtime.KeepAlive(&slice)
|
||||||
|
|
||||||
|
// return it
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||||
|
// make a new slice header
|
||||||
|
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
|
||||||
|
// update its capacity and length
|
||||||
|
header.Len *= 2
|
||||||
|
header.Cap *= 2
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||||
|
runtime.KeepAlive(&slice)
|
||||||
|
|
||||||
|
// return it
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func interval16SliceAsByteSlice(slice []interval16) []byte {
|
||||||
|
// make a new slice header
|
||||||
|
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
|
||||||
|
// update its capacity and length
|
||||||
|
header.Len *= 4
|
||||||
|
header.Cap *= 4
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||||
|
runtime.KeepAlive(&slice)
|
||||||
|
|
||||||
|
// return it
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||||
|
return uint64SliceAsByteSlice(bc.bitmap)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deserialization code follows
|
||||||
|
|
||||||
|
// //
|
||||||
|
// These methods (byteSliceAsUint16Slice,...) do not make copies,
|
||||||
|
// they are pointer-based (unsafe). The caller is responsible to
|
||||||
|
// ensure that the input slice does not get garbage collected, deleted
|
||||||
|
// or modified while you hold the returned slince.
|
||||||
|
// //
|
||||||
|
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
|
||||||
|
if len(slice)%2 != 0 {
|
||||||
|
panic("Slice size should be divisible by 2")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / 2
|
||||||
|
rHeader.Cap = bHeader.Cap / 2
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
|
||||||
|
if len(slice)%8 != 0 {
|
||||||
|
panic("Slice size should be divisible by 8")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / 8
|
||||||
|
rHeader.Cap = bHeader.Cap / 8
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
|
||||||
|
if len(slice)%4 != 0 {
|
||||||
|
panic("Slice size should be divisible by 4")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / 4
|
||||||
|
rHeader.Cap = bHeader.Cap / 4
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsContainerSlice(slice []byte) (result []container) {
|
||||||
|
var c container
|
||||||
|
containerSize := int(unsafe.Sizeof(c))
|
||||||
|
|
||||||
|
if len(slice)%containerSize != 0 {
|
||||||
|
panic("Slice size should be divisible by unsafe.Sizeof(container)")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / containerSize
|
||||||
|
rHeader.Cap = bHeader.Cap / containerSize
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsBitsetSlice(slice []byte) (result []bitmapContainer) {
|
||||||
|
bitsetSize := int(unsafe.Sizeof(bitmapContainer{}))
|
||||||
|
if len(slice)%bitsetSize != 0 {
|
||||||
|
panic("Slice size should be divisible by unsafe.Sizeof(bitmapContainer)")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / bitsetSize
|
||||||
|
rHeader.Cap = bHeader.Cap / bitsetSize
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsArraySlice(slice []byte) (result []arrayContainer) {
|
||||||
|
arraySize := int(unsafe.Sizeof(arrayContainer{}))
|
||||||
|
if len(slice)%arraySize != 0 {
|
||||||
|
panic("Slice size should be divisible by unsafe.Sizeof(arrayContainer)")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / arraySize
|
||||||
|
rHeader.Cap = bHeader.Cap / arraySize
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsRun16Slice(slice []byte) (result []runContainer16) {
|
||||||
|
run16Size := int(unsafe.Sizeof(runContainer16{}))
|
||||||
|
if len(slice)%run16Size != 0 {
|
||||||
|
panic("Slice size should be divisible by unsafe.Sizeof(runContainer16)")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / run16Size
|
||||||
|
rHeader.Cap = bHeader.Cap / run16Size
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsBoolSlice(slice []byte) (result []bool) {
|
||||||
|
boolSize := int(unsafe.Sizeof(true))
|
||||||
|
if len(slice)%boolSize != 0 {
|
||||||
|
panic("Slice size should be divisible by unsafe.Sizeof(bool)")
|
||||||
|
}
|
||||||
|
// reference: https://go101.org/article/unsafe.html
|
||||||
|
|
||||||
|
// make a new slice header
|
||||||
|
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||||
|
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||||
|
|
||||||
|
// transfer the data from the given slice to a new variable (our result)
|
||||||
|
rHeader.Data = bHeader.Data
|
||||||
|
rHeader.Len = bHeader.Len / boolSize
|
||||||
|
rHeader.Cap = bHeader.Cap / boolSize
|
||||||
|
|
||||||
|
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||||
|
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// FrozenView creates a static view of a serialized bitmap stored in buf.
|
||||||
|
// It uses CRoaring's frozen bitmap format.
|
||||||
|
//
|
||||||
|
// The format specification is available here:
|
||||||
|
// https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/src/roaring.c#L2756-L2783
|
||||||
|
//
|
||||||
|
// The provided byte array (buf) is expected to be a constant.
|
||||||
|
// The function makes the best effort attempt not to copy data.
|
||||||
|
// Only little endian is supported. The function will err if it detects a big
|
||||||
|
// endian serialized file.
|
||||||
|
// You should take care not to modify buff as it will likely result in
|
||||||
|
// unexpected program behavior.
|
||||||
|
// If said buffer comes from a memory map, it's advisable to give it read
|
||||||
|
// only permissions, either at creation or by calling Mprotect from the
|
||||||
|
// golang.org/x/sys/unix package.
|
||||||
|
//
|
||||||
|
// Resulting bitmaps are effectively immutable in the following sense:
|
||||||
|
// a copy-on-write marker is used so that when you modify the resulting
|
||||||
|
// bitmap, copies of selected data (containers) are made.
|
||||||
|
// You should *not* change the copy-on-write status of the resulting
|
||||||
|
// bitmaps (SetCopyOnWrite).
|
||||||
|
//
|
||||||
|
// If buf becomes unavailable, then a bitmap created with
|
||||||
|
// FromBuffer would be effectively broken. Furthermore, any
|
||||||
|
// bitmap derived from this bitmap (e.g., via Or, And) might
|
||||||
|
// also be broken. Thus, before making buf unavailable, you should
|
||||||
|
// call CloneCopyOnWriteContainers on all such bitmaps.
|
||||||
|
func (rb *Bitmap) FrozenView(buf []byte) error {
|
||||||
|
return rb.highlowcontainer.frozenView(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verbatim specification from CRoaring.
|
||||||
|
*
|
||||||
|
* FROZEN SERIALIZATION FORMAT DESCRIPTION
|
||||||
|
*
|
||||||
|
* -- (beginning must be aligned by 32 bytes) --
|
||||||
|
* <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
|
||||||
|
* <run_data> rle16_t[total number of rle elements in all run containers]
|
||||||
|
* <array_data> uint16_t[total number of array elements in all array containers]
|
||||||
|
* <keys> uint16_t[num_containers]
|
||||||
|
* <counts> uint16_t[num_containers]
|
||||||
|
* <typecodes> uint8_t[num_containers]
|
||||||
|
* <header> uint32_t
|
||||||
|
*
|
||||||
|
* <header> is a 4-byte value which is a bit union of frozenCookie (15 bits)
|
||||||
|
* and the number of containers (17 bits).
|
||||||
|
*
|
||||||
|
* <counts> stores number of elements for every container.
|
||||||
|
* Its meaning depends on container type.
|
||||||
|
* For array and bitset containers, this value is the container cardinality minus one.
|
||||||
|
* For run container, it is the number of rle_t elements (n_runs).
|
||||||
|
*
|
||||||
|
* <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
|
||||||
|
* all containers of respective type.
|
||||||
|
*
|
||||||
|
* <*_data> and <keys> are kept close together because they are not accessed
|
||||||
|
* during deserilization. This may reduce IO in case of large mmaped bitmaps.
|
||||||
|
* All members have their native alignments during deserilization except <header>,
|
||||||
|
* which is not guaranteed to be aligned by 4 bytes.
|
||||||
|
*/
|
||||||
|
const frozenCookie = 13766
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ErrFrozenBitmapInvalidCookie is returned when the header does not contain the frozenCookie.
|
||||||
|
ErrFrozenBitmapInvalidCookie = errors.New("header does not contain the frozenCookie")
|
||||||
|
// ErrFrozenBitmapBigEndian is returned when the header is big endian.
|
||||||
|
ErrFrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported")
|
||||||
|
// ErrFrozenBitmapIncomplete is returned when the buffer is too small to contain a frozen bitmap.
|
||||||
|
ErrFrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap")
|
||||||
|
// ErrFrozenBitmapOverpopulated is returned when the number of containers is too large.
|
||||||
|
ErrFrozenBitmapOverpopulated = errors.New("too many containers")
|
||||||
|
// ErrFrozenBitmapUnexpectedData is returned when the buffer contains unexpected data.
|
||||||
|
ErrFrozenBitmapUnexpectedData = errors.New("spurious data in input")
|
||||||
|
// ErrFrozenBitmapInvalidTypecode is returned when the typecode is invalid.
|
||||||
|
ErrFrozenBitmapInvalidTypecode = errors.New("unrecognized typecode")
|
||||||
|
// ErrFrozenBitmapBufferTooSmall is returned when the buffer is too small.
|
||||||
|
ErrFrozenBitmapBufferTooSmall = errors.New("buffer too small")
|
||||||
|
)
|
||||||
|
|
||||||
|
func (ra *roaringArray) frozenView(buf []byte) error {
|
||||||
|
if len(buf) < 4 {
|
||||||
|
return ErrFrozenBitmapIncomplete
|
||||||
|
}
|
||||||
|
|
||||||
|
headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:])
|
||||||
|
if headerBE&0x7fff == frozenCookie {
|
||||||
|
return ErrFrozenBitmapBigEndian
|
||||||
|
}
|
||||||
|
|
||||||
|
header := binary.LittleEndian.Uint32(buf[len(buf)-4:])
|
||||||
|
buf = buf[:len(buf)-4]
|
||||||
|
|
||||||
|
if header&0x7fff != frozenCookie {
|
||||||
|
return ErrFrozenBitmapInvalidCookie
|
||||||
|
}
|
||||||
|
|
||||||
|
nCont := int(header >> 15)
|
||||||
|
if nCont > (1 << 16) {
|
||||||
|
return ErrFrozenBitmapOverpopulated
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1 byte per type, 2 bytes per key, 2 bytes per count.
|
||||||
|
if len(buf) < 5*nCont {
|
||||||
|
return ErrFrozenBitmapIncomplete
|
||||||
|
}
|
||||||
|
|
||||||
|
types := buf[len(buf)-nCont:]
|
||||||
|
buf = buf[:len(buf)-nCont]
|
||||||
|
|
||||||
|
counts := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
|
||||||
|
buf = buf[:len(buf)-2*nCont]
|
||||||
|
|
||||||
|
keys := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
|
||||||
|
buf = buf[:len(buf)-2*nCont]
|
||||||
|
|
||||||
|
nBitmap, nArray, nRun := 0, 0, 0
|
||||||
|
nArrayEl, nRunEl := 0, 0
|
||||||
|
for i, t := range types {
|
||||||
|
switch t {
|
||||||
|
case 1:
|
||||||
|
nBitmap++
|
||||||
|
case 2:
|
||||||
|
nArray++
|
||||||
|
nArrayEl += int(counts[i]) + 1
|
||||||
|
case 3:
|
||||||
|
nRun++
|
||||||
|
nRunEl += int(counts[i])
|
||||||
|
default:
|
||||||
|
return ErrFrozenBitmapInvalidTypecode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(buf) < (1<<13)*nBitmap+4*nRunEl+2*nArrayEl {
|
||||||
|
return ErrFrozenBitmapIncomplete
|
||||||
|
}
|
||||||
|
|
||||||
|
bitsetsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBitmap])
|
||||||
|
buf = buf[(1<<13)*nBitmap:]
|
||||||
|
|
||||||
|
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
|
||||||
|
buf = buf[4*nRunEl:]
|
||||||
|
|
||||||
|
arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
|
||||||
|
buf = buf[2*nArrayEl:]
|
||||||
|
|
||||||
|
if len(buf) != 0 {
|
||||||
|
return ErrFrozenBitmapUnexpectedData
|
||||||
|
}
|
||||||
|
|
||||||
|
var c container
|
||||||
|
containersSz := int(unsafe.Sizeof(c)) * nCont
|
||||||
|
bitsetsSz := int(unsafe.Sizeof(bitmapContainer{})) * nBitmap
|
||||||
|
arraysSz := int(unsafe.Sizeof(arrayContainer{})) * nArray
|
||||||
|
runsSz := int(unsafe.Sizeof(runContainer16{})) * nRun
|
||||||
|
needCOWSz := int(unsafe.Sizeof(true)) * nCont
|
||||||
|
|
||||||
|
bitmapArenaSz := containersSz + bitsetsSz + arraysSz + runsSz + needCOWSz
|
||||||
|
bitmapArena := make([]byte, bitmapArenaSz)
|
||||||
|
|
||||||
|
containers := byteSliceAsContainerSlice(bitmapArena[:containersSz])
|
||||||
|
bitmapArena = bitmapArena[containersSz:]
|
||||||
|
|
||||||
|
bitsets := byteSliceAsBitsetSlice(bitmapArena[:bitsetsSz])
|
||||||
|
bitmapArena = bitmapArena[bitsetsSz:]
|
||||||
|
|
||||||
|
arrays := byteSliceAsArraySlice(bitmapArena[:arraysSz])
|
||||||
|
bitmapArena = bitmapArena[arraysSz:]
|
||||||
|
|
||||||
|
runs := byteSliceAsRun16Slice(bitmapArena[:runsSz])
|
||||||
|
bitmapArena = bitmapArena[runsSz:]
|
||||||
|
|
||||||
|
needCOW := byteSliceAsBoolSlice(bitmapArena)
|
||||||
|
|
||||||
|
iBitset, iArray, iRun := 0, 0, 0
|
||||||
|
for i, t := range types {
|
||||||
|
needCOW[i] = true
|
||||||
|
|
||||||
|
switch t {
|
||||||
|
case 1:
|
||||||
|
containers[i] = &bitsets[iBitset]
|
||||||
|
bitsets[iBitset].cardinality = int(counts[i]) + 1
|
||||||
|
bitsets[iBitset].bitmap = bitsetsArena[:1024]
|
||||||
|
bitsetsArena = bitsetsArena[1024:]
|
||||||
|
iBitset++
|
||||||
|
case 2:
|
||||||
|
containers[i] = &arrays[iArray]
|
||||||
|
sz := int(counts[i]) + 1
|
||||||
|
arrays[iArray].content = arraysArena[:sz]
|
||||||
|
arraysArena = arraysArena[sz:]
|
||||||
|
iArray++
|
||||||
|
case 3:
|
||||||
|
containers[i] = &runs[iRun]
|
||||||
|
runs[iRun].iv = runsArena[:counts[i]]
|
||||||
|
runsArena = runsArena[counts[i]:]
|
||||||
|
iRun++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not consuming the full input is a bug.
|
||||||
|
if iBitset != nBitmap || len(bitsetsArena) != 0 ||
|
||||||
|
iArray != nArray || len(arraysArena) != 0 ||
|
||||||
|
iRun != nRun || len(runsArena) != 0 {
|
||||||
|
panic("we missed something")
|
||||||
|
}
|
||||||
|
|
||||||
|
ra.keys = keys
|
||||||
|
ra.containers = containers
|
||||||
|
ra.needCopyOnWrite = needCOW
|
||||||
|
ra.copyOnWrite = true
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetFrozenSizeInBytes returns the size in bytes of the frozen bitmap.
|
||||||
|
func (rb *Bitmap) GetFrozenSizeInBytes() uint64 {
|
||||||
|
nBits, nArrayEl, nRunEl := uint64(0), uint64(0), uint64(0)
|
||||||
|
for _, c := range rb.highlowcontainer.containers {
|
||||||
|
switch v := c.(type) {
|
||||||
|
case *bitmapContainer:
|
||||||
|
nBits++
|
||||||
|
case *arrayContainer:
|
||||||
|
nArrayEl += uint64(len(v.content))
|
||||||
|
case *runContainer16:
|
||||||
|
nRunEl += uint64(len(v.iv))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 4 + 5*uint64(len(rb.highlowcontainer.containers)) +
|
||||||
|
(nBits << 13) + 2*nArrayEl + 4*nRunEl
|
||||||
|
}
|
||||||
|
|
||||||
|
// Freeze serializes the bitmap in the CRoaring's frozen format.
|
||||||
|
func (rb *Bitmap) Freeze() ([]byte, error) {
|
||||||
|
sz := rb.GetFrozenSizeInBytes()
|
||||||
|
buf := make([]byte, sz)
|
||||||
|
_, err := rb.FreezeTo(buf)
|
||||||
|
return buf, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// FreezeTo serializes the bitmap in the CRoaring's frozen format.
|
||||||
|
func (rb *Bitmap) FreezeTo(buf []byte) (int, error) {
|
||||||
|
containers := rb.highlowcontainer.containers
|
||||||
|
nCont := len(containers)
|
||||||
|
|
||||||
|
nBits, nArrayEl, nRunEl := 0, 0, 0
|
||||||
|
for _, c := range containers {
|
||||||
|
switch v := c.(type) {
|
||||||
|
case *bitmapContainer:
|
||||||
|
nBits++
|
||||||
|
case *arrayContainer:
|
||||||
|
nArrayEl += len(v.content)
|
||||||
|
case *runContainer16:
|
||||||
|
nRunEl += len(v.iv)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
serialSize := 4 + 5*nCont + (1<<13)*nBits + 4*nRunEl + 2*nArrayEl
|
||||||
|
if len(buf) < serialSize {
|
||||||
|
return 0, ErrFrozenBitmapBufferTooSmall
|
||||||
|
}
|
||||||
|
|
||||||
|
bitsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBits])
|
||||||
|
buf = buf[(1<<13)*nBits:]
|
||||||
|
|
||||||
|
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
|
||||||
|
buf = buf[4*nRunEl:]
|
||||||
|
|
||||||
|
arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
|
||||||
|
buf = buf[2*nArrayEl:]
|
||||||
|
|
||||||
|
keys := byteSliceAsUint16Slice(buf[:2*nCont])
|
||||||
|
buf = buf[2*nCont:]
|
||||||
|
|
||||||
|
counts := byteSliceAsUint16Slice(buf[:2*nCont])
|
||||||
|
buf = buf[2*nCont:]
|
||||||
|
|
||||||
|
types := buf[:nCont]
|
||||||
|
buf = buf[nCont:]
|
||||||
|
|
||||||
|
header := uint32(frozenCookie | (nCont << 15))
|
||||||
|
binary.LittleEndian.PutUint32(buf[:4], header)
|
||||||
|
|
||||||
|
copy(keys, rb.highlowcontainer.keys[:])
|
||||||
|
|
||||||
|
for i, c := range containers {
|
||||||
|
switch v := c.(type) {
|
||||||
|
case *bitmapContainer:
|
||||||
|
copy(bitsArena, v.bitmap)
|
||||||
|
bitsArena = bitsArena[1024:]
|
||||||
|
counts[i] = uint16(v.cardinality - 1)
|
||||||
|
types[i] = 1
|
||||||
|
case *arrayContainer:
|
||||||
|
copy(arraysArena, v.content)
|
||||||
|
arraysArena = arraysArena[len(v.content):]
|
||||||
|
elems := len(v.content)
|
||||||
|
counts[i] = uint16(elems - 1)
|
||||||
|
types[i] = 2
|
||||||
|
case *runContainer16:
|
||||||
|
copy(runsArena, v.iv)
|
||||||
|
runs := len(v.iv)
|
||||||
|
runsArena = runsArena[runs:]
|
||||||
|
counts[i] = uint16(runs)
|
||||||
|
types[i] = 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return serialSize, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteFrozenTo serializes the bitmap in the CRoaring's frozen format.
|
||||||
|
func (rb *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) {
|
||||||
|
// FIXME: this is a naive version that iterates 4 times through the
|
||||||
|
// containers and allocates 3*len(containers) bytes; it's quite likely
|
||||||
|
// it can be done more efficiently.
|
||||||
|
containers := rb.highlowcontainer.containers
|
||||||
|
written := 0
|
||||||
|
|
||||||
|
for _, c := range containers {
|
||||||
|
c, ok := c.(*bitmapContainer)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n, err := wr.Write(uint64SliceAsByteSlice(c.bitmap))
|
||||||
|
written += n
|
||||||
|
if err != nil {
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range containers {
|
||||||
|
c, ok := c.(*runContainer16)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n, err := wr.Write(interval16SliceAsByteSlice(c.iv))
|
||||||
|
written += n
|
||||||
|
if err != nil {
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range containers {
|
||||||
|
c, ok := c.(*arrayContainer)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n, err := wr.Write(uint16SliceAsByteSlice(c.content))
|
||||||
|
written += n
|
||||||
|
if err != nil {
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := wr.Write(uint16SliceAsByteSlice(rb.highlowcontainer.keys))
|
||||||
|
written += n
|
||||||
|
if err != nil {
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
|
||||||
|
countTypeBuf := make([]byte, 3*len(containers))
|
||||||
|
counts := byteSliceAsUint16Slice(countTypeBuf[:2*len(containers)])
|
||||||
|
types := countTypeBuf[2*len(containers):]
|
||||||
|
|
||||||
|
for i, c := range containers {
|
||||||
|
switch c := c.(type) {
|
||||||
|
case *bitmapContainer:
|
||||||
|
counts[i] = uint16(c.cardinality - 1)
|
||||||
|
types[i] = 1
|
||||||
|
case *arrayContainer:
|
||||||
|
elems := len(c.content)
|
||||||
|
counts[i] = uint16(elems - 1)
|
||||||
|
types[i] = 2
|
||||||
|
case *runContainer16:
|
||||||
|
runs := len(c.iv)
|
||||||
|
counts[i] = uint16(runs)
|
||||||
|
types[i] = 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err = wr.Write(countTypeBuf)
|
||||||
|
written += n
|
||||||
|
if err != nil {
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
|
||||||
|
header := uint32(frozenCookie | (len(containers) << 15))
|
||||||
|
if err := binary.Write(wr, binary.LittleEndian, header); err != nil {
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
written += 4
|
||||||
|
|
||||||
|
return written, nil
|
||||||
|
}
|
||||||
22
vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go
generated
vendored
Normal file
22
vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go
generated
vendored
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
//go:build gofuzz
|
||||||
|
// +build gofuzz
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
import "bytes"
|
||||||
|
|
||||||
|
func FuzzSerializationStream(data []byte) int {
|
||||||
|
newrb := NewBitmap()
|
||||||
|
if _, err := newrb.ReadFrom(bytes.NewReader(data)); err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func FuzzSerializationBuffer(data []byte) int {
|
||||||
|
newrb := NewBitmap()
|
||||||
|
if _, err := newrb.FromBuffer(data); err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return 1
|
||||||
|
}
|
||||||
550
vendor/github.com/RoaringBitmap/roaring/setutil.go
generated
vendored
Normal file
550
vendor/github.com/RoaringBitmap/roaring/setutil.go
generated
vendored
Normal file
@@ -0,0 +1,550 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
func equal(a, b []uint16) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||||
|
if 0 == len(set2) {
|
||||||
|
buffer = buffer[:len(set1)]
|
||||||
|
for k := 0; k < len(set1); k++ {
|
||||||
|
buffer[k] = set1[k]
|
||||||
|
}
|
||||||
|
return len(set1)
|
||||||
|
}
|
||||||
|
if 0 == len(set1) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
pos := 0
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
buffer = buffer[:cap(buffer)]
|
||||||
|
s1 := set1[k1]
|
||||||
|
s2 := set2[k2]
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
buffer[pos] = s1
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
} else if s1 == s2 {
|
||||||
|
k1++
|
||||||
|
k2++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
for ; k1 < len(set1); k1++ {
|
||||||
|
buffer[pos] = set1[k1]
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
} else { // if (val1>val2)
|
||||||
|
k2++
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
for ; k1 < len(set1); k1++ {
|
||||||
|
buffer[pos] = set1[k1]
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||||
|
if 0 == len(set2) {
|
||||||
|
buffer = buffer[:len(set1)]
|
||||||
|
copy(buffer, set1[:])
|
||||||
|
return len(set1)
|
||||||
|
}
|
||||||
|
if 0 == len(set1) {
|
||||||
|
buffer = buffer[:len(set2)]
|
||||||
|
copy(buffer, set2[:])
|
||||||
|
return len(set2)
|
||||||
|
}
|
||||||
|
pos := 0
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
s1 := set1[k1]
|
||||||
|
s2 := set2[k2]
|
||||||
|
buffer = buffer[:cap(buffer)]
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
buffer[pos] = s1
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
for ; k2 < len(set2); k2++ {
|
||||||
|
buffer[pos] = set2[k2]
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
} else if s1 == s2 {
|
||||||
|
k1++
|
||||||
|
k2++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
for ; k2 < len(set2); k2++ {
|
||||||
|
buffer[pos] = set2[k2]
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
for ; k1 < len(set1); k1++ {
|
||||||
|
buffer[pos] = set1[k1]
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
s2 = set2[k2]
|
||||||
|
} else { // if (val1>val2)
|
||||||
|
buffer[pos] = s2
|
||||||
|
pos++
|
||||||
|
k2++
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
for ; k1 < len(set1); k1++ {
|
||||||
|
buffer[pos] = set1[k1]
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
|
||||||
|
func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
|
||||||
|
pos := 0
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
if 0 == len(set2) {
|
||||||
|
return len(set1)
|
||||||
|
}
|
||||||
|
if 0 == len(set1) {
|
||||||
|
return len(set2)
|
||||||
|
}
|
||||||
|
s1 := set1[k1]
|
||||||
|
s2 := set2[k2]
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
pos += len(set2) - k2
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
} else if s1 == s2 {
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
k2++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
pos += len(set2) - k2
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
pos += len(set1) - k1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
s2 = set2[k2]
|
||||||
|
} else { // if (set1[k1]>set2[k2])
|
||||||
|
pos++
|
||||||
|
k2++
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
pos += len(set1) - k1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
|
||||||
|
func intersection2by2(
|
||||||
|
set1 []uint16,
|
||||||
|
set2 []uint16,
|
||||||
|
buffer []uint16) int {
|
||||||
|
|
||||||
|
if len(set1)*64 < len(set2) {
|
||||||
|
return onesidedgallopingintersect2by2(set1, set2, buffer)
|
||||||
|
} else if len(set2)*64 < len(set1) {
|
||||||
|
return onesidedgallopingintersect2by2(set2, set1, buffer)
|
||||||
|
} else {
|
||||||
|
return localintersect2by2(set1, set2, buffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func intersection2by2Cardinality(
|
||||||
|
set1 []uint16,
|
||||||
|
set2 []uint16) int {
|
||||||
|
|
||||||
|
if len(set1)*64 < len(set2) {
|
||||||
|
return onesidedgallopingintersect2by2Cardinality(set1, set2)
|
||||||
|
} else if len(set2)*64 < len(set1) {
|
||||||
|
return onesidedgallopingintersect2by2Cardinality(set2, set1)
|
||||||
|
} else {
|
||||||
|
return localintersect2by2Cardinality(set1, set2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func intersects2by2(
|
||||||
|
set1 []uint16,
|
||||||
|
set2 []uint16) bool {
|
||||||
|
// could be optimized if one set is much larger than the other one
|
||||||
|
if (0 == len(set1)) || (0 == len(set2)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
s1 := set1[k1]
|
||||||
|
s2 := set2[k2]
|
||||||
|
mainwhile:
|
||||||
|
for {
|
||||||
|
|
||||||
|
if s2 < s1 {
|
||||||
|
for {
|
||||||
|
k2++
|
||||||
|
if k2 == len(set2) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
if s2 >= s1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if s1 < s2 {
|
||||||
|
for {
|
||||||
|
k1++
|
||||||
|
if k1 == len(set1) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
if s1 >= s2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// (set2[k2] == set1[k1])
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func localintersect2by2(
|
||||||
|
set1 []uint16,
|
||||||
|
set2 []uint16,
|
||||||
|
buffer []uint16) int {
|
||||||
|
|
||||||
|
if (0 == len(set1)) || (0 == len(set2)) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
pos := 0
|
||||||
|
buffer = buffer[:cap(buffer)]
|
||||||
|
s1 := set1[k1]
|
||||||
|
s2 := set2[k2]
|
||||||
|
mainwhile:
|
||||||
|
for {
|
||||||
|
if s2 < s1 {
|
||||||
|
for {
|
||||||
|
k2++
|
||||||
|
if k2 == len(set2) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
if s2 >= s1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if s1 < s2 {
|
||||||
|
for {
|
||||||
|
k1++
|
||||||
|
if k1 == len(set1) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
if s1 >= s2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// (set2[k2] == set1[k1])
|
||||||
|
buffer[pos] = s1
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
if k1 == len(set1) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
k2++
|
||||||
|
if k2 == len(set2) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
|
||||||
|
func localintersect2by2Cardinality(
|
||||||
|
set1 []uint16,
|
||||||
|
set2 []uint16) int {
|
||||||
|
|
||||||
|
if (0 == len(set1)) || (0 == len(set2)) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
pos := 0
|
||||||
|
s1 := set1[k1]
|
||||||
|
s2 := set2[k2]
|
||||||
|
mainwhile:
|
||||||
|
for {
|
||||||
|
if s2 < s1 {
|
||||||
|
for {
|
||||||
|
k2++
|
||||||
|
if k2 == len(set2) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
if s2 >= s1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if s1 < s2 {
|
||||||
|
for {
|
||||||
|
k1++
|
||||||
|
if k1 == len(set1) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
if s1 >= s2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// (set2[k2] == set1[k1])
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
if k1 == len(set1) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
k2++
|
||||||
|
if k2 == len(set2) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
|
||||||
|
func advanceUntil(
|
||||||
|
array []uint16,
|
||||||
|
pos int,
|
||||||
|
length int,
|
||||||
|
min uint16) int {
|
||||||
|
lower := pos + 1
|
||||||
|
|
||||||
|
if lower >= length || array[lower] >= min {
|
||||||
|
return lower
|
||||||
|
}
|
||||||
|
|
||||||
|
spansize := 1
|
||||||
|
|
||||||
|
for lower+spansize < length && array[lower+spansize] < min {
|
||||||
|
spansize *= 2
|
||||||
|
}
|
||||||
|
var upper int
|
||||||
|
if lower+spansize < length {
|
||||||
|
upper = lower + spansize
|
||||||
|
} else {
|
||||||
|
upper = length - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if array[upper] == min {
|
||||||
|
return upper
|
||||||
|
}
|
||||||
|
|
||||||
|
if array[upper] < min {
|
||||||
|
// means
|
||||||
|
// array
|
||||||
|
// has no
|
||||||
|
// item
|
||||||
|
// >= min
|
||||||
|
// pos = array.length;
|
||||||
|
return length
|
||||||
|
}
|
||||||
|
|
||||||
|
// we know that the next-smallest span was too small
|
||||||
|
lower += (spansize >> 1)
|
||||||
|
|
||||||
|
mid := 0
|
||||||
|
for lower+1 != upper {
|
||||||
|
mid = (lower + upper) >> 1
|
||||||
|
if array[mid] == min {
|
||||||
|
return mid
|
||||||
|
} else if array[mid] < min {
|
||||||
|
lower = mid
|
||||||
|
} else {
|
||||||
|
upper = mid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return upper
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func onesidedgallopingintersect2by2(
|
||||||
|
smallset []uint16,
|
||||||
|
largeset []uint16,
|
||||||
|
buffer []uint16) int {
|
||||||
|
|
||||||
|
if 0 == len(smallset) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
buffer = buffer[:cap(buffer)]
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
pos := 0
|
||||||
|
s1 := largeset[k1]
|
||||||
|
s2 := smallset[k2]
|
||||||
|
mainwhile:
|
||||||
|
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||||
|
if k1 == len(largeset) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s1 = largeset[k1]
|
||||||
|
}
|
||||||
|
if s2 < s1 {
|
||||||
|
k2++
|
||||||
|
if k2 == len(smallset) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s2 = smallset[k2]
|
||||||
|
} else {
|
||||||
|
|
||||||
|
buffer[pos] = s2
|
||||||
|
pos++
|
||||||
|
k2++
|
||||||
|
if k2 == len(smallset) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = smallset[k2]
|
||||||
|
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||||
|
if k1 == len(largeset) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s1 = largeset[k1]
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
|
||||||
|
func onesidedgallopingintersect2by2Cardinality(
|
||||||
|
smallset []uint16,
|
||||||
|
largeset []uint16) int {
|
||||||
|
|
||||||
|
if 0 == len(smallset) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
pos := 0
|
||||||
|
s1 := largeset[k1]
|
||||||
|
s2 := smallset[k2]
|
||||||
|
mainwhile:
|
||||||
|
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||||
|
if k1 == len(largeset) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s1 = largeset[k1]
|
||||||
|
}
|
||||||
|
if s2 < s1 {
|
||||||
|
k2++
|
||||||
|
if k2 == len(smallset) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s2 = smallset[k2]
|
||||||
|
} else {
|
||||||
|
|
||||||
|
pos++
|
||||||
|
k2++
|
||||||
|
if k2 == len(smallset) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = smallset[k2]
|
||||||
|
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||||
|
if k1 == len(largeset) {
|
||||||
|
break mainwhile
|
||||||
|
}
|
||||||
|
s1 = largeset[k1]
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
|
||||||
|
func binarySearch(array []uint16, ikey uint16) int {
|
||||||
|
low := 0
|
||||||
|
high := len(array) - 1
|
||||||
|
for low+16 <= high {
|
||||||
|
middleIndex := int(uint32(low+high) >> 1)
|
||||||
|
middleValue := array[middleIndex]
|
||||||
|
if middleValue < ikey {
|
||||||
|
low = middleIndex + 1
|
||||||
|
} else if middleValue > ikey {
|
||||||
|
high = middleIndex - 1
|
||||||
|
} else {
|
||||||
|
return middleIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for ; low <= high; low++ {
|
||||||
|
val := array[low]
|
||||||
|
if val >= ikey {
|
||||||
|
if val == ikey {
|
||||||
|
return low
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -(low + 1)
|
||||||
|
}
|
||||||
7
vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go
generated
vendored
Normal file
7
vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
//go:build arm64 && !gccgo && !appengine
|
||||||
|
// +build arm64,!gccgo,!appengine
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) (size int)
|
||||||
132
vendor/github.com/RoaringBitmap/roaring/setutil_arm64.s
generated
vendored
Normal file
132
vendor/github.com/RoaringBitmap/roaring/setutil_arm64.s
generated
vendored
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
// +build arm64,!gccgo,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
||||||
|
// This implements union2by2 using golang's version of arm64 assembly
|
||||||
|
// The algorithm is very similar to the generic one,
|
||||||
|
// but makes better use of arm64 features so is notably faster.
|
||||||
|
// The basic algorithm structure is as follows:
|
||||||
|
// 1. If either set is empty, copy the other set into the buffer and return the length
|
||||||
|
// 2. Otherwise, load the first element of each set into a variable (s1 and s2).
|
||||||
|
// 3. a. Compare the values of s1 and s2.
|
||||||
|
// b. add the smaller one to the buffer.
|
||||||
|
// c. perform a bounds check before incrementing.
|
||||||
|
// If one set is finished, copy the rest of the other set over.
|
||||||
|
// d. update s1 and or s2 to the next value, continue loop.
|
||||||
|
//
|
||||||
|
// Past the fact of the algorithm, this code makes use of several arm64 features
|
||||||
|
// Condition Codes:
|
||||||
|
// arm64's CMP operation sets 4 bits that can be used for branching,
|
||||||
|
// rather than just true or false.
|
||||||
|
// As a consequence, a single comparison gives enough information to distinguish the three cases
|
||||||
|
//
|
||||||
|
// Post-increment pointers after load/store:
|
||||||
|
// Instructions like `MOVHU.P 2(R0), R6`
|
||||||
|
// increment the register by a specified amount, in this example 2.
|
||||||
|
// Because uint16's are exactly 2 bytes and the length of the slices
|
||||||
|
// is part of the slice header,
|
||||||
|
// there is no need to separately track the index into the slice.
|
||||||
|
// Instead, the code can calculate the final read value and compare against that,
|
||||||
|
// using the post-increment reads to move the pointers along.
|
||||||
|
//
|
||||||
|
// TODO: CALL out to memmove once the list is exhausted.
|
||||||
|
// Right now it moves the necessary shorts so that the remaining count
|
||||||
|
// is a multiple of 4 and then copies 64 bits at a time.
|
||||||
|
|
||||||
|
TEXT ·union2by2(SB), NOSPLIT, $0-80
|
||||||
|
// R0, R1, and R2 for the pointers to the three slices
|
||||||
|
MOVD set1+0(FP), R0
|
||||||
|
MOVD set2+24(FP), R1
|
||||||
|
MOVD buffer+48(FP), R2
|
||||||
|
|
||||||
|
//R3 and R4 will be the values at which we will have finished reading set1 and set2.
|
||||||
|
// R3 should be R0 + 2 * set1_len+8(FP)
|
||||||
|
MOVD set1_len+8(FP), R3
|
||||||
|
MOVD set2_len+32(FP), R4
|
||||||
|
|
||||||
|
ADD R3<<1, R0, R3
|
||||||
|
ADD R4<<1, R1, R4
|
||||||
|
|
||||||
|
|
||||||
|
//Rather than counting the number of elements added separately
|
||||||
|
//Save the starting register of buffer.
|
||||||
|
MOVD buffer+48(FP), R5
|
||||||
|
|
||||||
|
// set1 is empty, just flush set2
|
||||||
|
CMP R0, R3
|
||||||
|
BEQ flush_right
|
||||||
|
|
||||||
|
// set2 is empty, just flush set1
|
||||||
|
CMP R1, R4
|
||||||
|
BEQ flush_left
|
||||||
|
|
||||||
|
// R6, R7 are the working space for s1 and s2
|
||||||
|
MOVD ZR, R6
|
||||||
|
MOVD ZR, R7
|
||||||
|
|
||||||
|
MOVHU.P 2(R0), R6
|
||||||
|
MOVHU.P 2(R1), R7
|
||||||
|
loop:
|
||||||
|
|
||||||
|
CMP R6, R7
|
||||||
|
BEQ pop_both // R6 == R7
|
||||||
|
BLS pop_right // R6 > R7
|
||||||
|
//pop_left: // R6 < R7
|
||||||
|
MOVHU.P R6, 2(R2)
|
||||||
|
CMP R0, R3
|
||||||
|
BEQ pop_then_flush_right
|
||||||
|
MOVHU.P 2(R0), R6
|
||||||
|
JMP loop
|
||||||
|
pop_both:
|
||||||
|
MOVHU.P R6, 2(R2) //could also use R7, since they are equal
|
||||||
|
CMP R0, R3
|
||||||
|
BEQ flush_right
|
||||||
|
CMP R1, R4
|
||||||
|
BEQ flush_left
|
||||||
|
MOVHU.P 2(R0), R6
|
||||||
|
MOVHU.P 2(R1), R7
|
||||||
|
JMP loop
|
||||||
|
pop_right:
|
||||||
|
MOVHU.P R7, 2(R2)
|
||||||
|
CMP R1, R4
|
||||||
|
BEQ pop_then_flush_left
|
||||||
|
MOVHU.P 2(R1), R7
|
||||||
|
JMP loop
|
||||||
|
|
||||||
|
pop_then_flush_right:
|
||||||
|
MOVHU.P R7, 2(R2)
|
||||||
|
flush_right:
|
||||||
|
MOVD R1, R0
|
||||||
|
MOVD R4, R3
|
||||||
|
JMP flush_left
|
||||||
|
pop_then_flush_left:
|
||||||
|
MOVHU.P R6, 2(R2)
|
||||||
|
flush_left:
|
||||||
|
CMP R0, R3
|
||||||
|
BEQ return
|
||||||
|
//figure out how many bytes to slough off. Must be a multiple of two
|
||||||
|
SUB R0, R3, R4
|
||||||
|
ANDS $6, R4
|
||||||
|
BEQ long_flush //handles the 0 mod 8 case
|
||||||
|
SUBS $4, R4, R4 // since possible values are 2, 4, 6, this splits evenly
|
||||||
|
BLT pop_single // exactly the 2 case
|
||||||
|
MOVW.P 4(R0), R6
|
||||||
|
MOVW.P R6, 4(R2)
|
||||||
|
BEQ long_flush // we're now aligned by 64 bits, as R4==4, otherwise 2 more
|
||||||
|
pop_single:
|
||||||
|
MOVHU.P 2(R0), R6
|
||||||
|
MOVHU.P R6, 2(R2)
|
||||||
|
long_flush:
|
||||||
|
// at this point we know R3 - R0 is a multiple of 8.
|
||||||
|
CMP R0, R3
|
||||||
|
BEQ return
|
||||||
|
MOVD.P 8(R0), R6
|
||||||
|
MOVD.P R6, 8(R2)
|
||||||
|
JMP long_flush
|
||||||
|
return:
|
||||||
|
// number of shorts written is (R5 - R2) >> 1
|
||||||
|
SUB R5, R2
|
||||||
|
LSR $1, R2, R2
|
||||||
|
MOVD R2, size+72(FP)
|
||||||
|
RET
|
||||||
64
vendor/github.com/RoaringBitmap/roaring/setutil_generic.go
generated
vendored
Normal file
64
vendor/github.com/RoaringBitmap/roaring/setutil_generic.go
generated
vendored
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
//go:build !arm64 || gccgo || appengine
|
||||||
|
// +build !arm64 gccgo appengine
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||||
|
pos := 0
|
||||||
|
k1 := 0
|
||||||
|
k2 := 0
|
||||||
|
if 0 == len(set2) {
|
||||||
|
buffer = buffer[:len(set1)]
|
||||||
|
copy(buffer, set1[:])
|
||||||
|
return len(set1)
|
||||||
|
}
|
||||||
|
if 0 == len(set1) {
|
||||||
|
buffer = buffer[:len(set2)]
|
||||||
|
copy(buffer, set2[:])
|
||||||
|
return len(set2)
|
||||||
|
}
|
||||||
|
s1 := set1[k1]
|
||||||
|
s2 := set2[k2]
|
||||||
|
buffer = buffer[:cap(buffer)]
|
||||||
|
for {
|
||||||
|
if s1 < s2 {
|
||||||
|
buffer[pos] = s1
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
copy(buffer[pos:], set2[k2:])
|
||||||
|
pos += len(set2) - k2
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
} else if s1 == s2 {
|
||||||
|
buffer[pos] = s1
|
||||||
|
pos++
|
||||||
|
k1++
|
||||||
|
k2++
|
||||||
|
if k1 >= len(set1) {
|
||||||
|
copy(buffer[pos:], set2[k2:])
|
||||||
|
pos += len(set2) - k2
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
copy(buffer[pos:], set1[k1:])
|
||||||
|
pos += len(set1) - k1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s1 = set1[k1]
|
||||||
|
s2 = set2[k2]
|
||||||
|
} else { // if (set1[k1]>set2[k2])
|
||||||
|
buffer[pos] = s2
|
||||||
|
pos++
|
||||||
|
k2++
|
||||||
|
if k2 >= len(set2) {
|
||||||
|
copy(buffer[pos:], set1[k1:])
|
||||||
|
pos += len(set1) - k1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s2 = set2[k2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos
|
||||||
|
}
|
||||||
52
vendor/github.com/RoaringBitmap/roaring/shortiterator.go
generated
vendored
Normal file
52
vendor/github.com/RoaringBitmap/roaring/shortiterator.go
generated
vendored
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
type shortIterable interface {
|
||||||
|
hasNext() bool
|
||||||
|
next() uint16
|
||||||
|
}
|
||||||
|
|
||||||
|
type shortPeekable interface {
|
||||||
|
shortIterable
|
||||||
|
peekNext() uint16
|
||||||
|
advanceIfNeeded(minval uint16)
|
||||||
|
}
|
||||||
|
|
||||||
|
type shortIterator struct {
|
||||||
|
slice []uint16
|
||||||
|
loc int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *shortIterator) hasNext() bool {
|
||||||
|
return si.loc < len(si.slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *shortIterator) next() uint16 {
|
||||||
|
a := si.slice[si.loc]
|
||||||
|
si.loc++
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *shortIterator) peekNext() uint16 {
|
||||||
|
return si.slice[si.loc]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *shortIterator) advanceIfNeeded(minval uint16) {
|
||||||
|
if si.hasNext() && si.peekNext() < minval {
|
||||||
|
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type reverseIterator struct {
|
||||||
|
slice []uint16
|
||||||
|
loc int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *reverseIterator) hasNext() bool {
|
||||||
|
return si.loc >= 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (si *reverseIterator) next() uint16 {
|
||||||
|
a := si.slice[si.loc]
|
||||||
|
si.loc--
|
||||||
|
return a
|
||||||
|
}
|
||||||
384
vendor/github.com/RoaringBitmap/roaring/smat.go
generated
vendored
Normal file
384
vendor/github.com/RoaringBitmap/roaring/smat.go
generated
vendored
Normal file
@@ -0,0 +1,384 @@
|
|||||||
|
//go:build gofuzz
|
||||||
|
// +build gofuzz
|
||||||
|
|
||||||
|
/*
|
||||||
|
# Instructions for smat testing for roaring
|
||||||
|
|
||||||
|
[smat](https://github.com/mschoch/smat) is a framework that provides
|
||||||
|
state machine assisted fuzz testing.
|
||||||
|
|
||||||
|
To run the smat tests for roaring...
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
$ go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||||
|
$ go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||||
|
|
||||||
|
## Steps
|
||||||
|
|
||||||
|
1. Generate initial smat corpus:
|
||||||
|
```
|
||||||
|
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Build go-fuzz test program with instrumentation:
|
||||||
|
```
|
||||||
|
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run go-fuzz:
|
||||||
|
```
|
||||||
|
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see output like...
|
||||||
|
```
|
||||||
|
2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s
|
||||||
|
2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s
|
||||||
|
2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s
|
||||||
|
2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s
|
||||||
|
2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s
|
||||||
|
2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s
|
||||||
|
2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s
|
||||||
|
2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s
|
||||||
|
2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s
|
||||||
|
2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s
|
||||||
|
2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s
|
||||||
|
2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s
|
||||||
|
2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s
|
||||||
|
2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s
|
||||||
|
2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s
|
||||||
|
2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Let it run, and if the # of crashers is > 0, check out the reports in
|
||||||
|
the workdir where you should be able to find the panic goroutine stack
|
||||||
|
traces.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/bits-and-blooms/bitset"
|
||||||
|
"github.com/mschoch/smat"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fuzz test using state machine driven by byte stream.
|
||||||
|
func FuzzSmat(data []byte) int {
|
||||||
|
return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'),
|
||||||
|
smatActionMap, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
var smatDebug = false
|
||||||
|
|
||||||
|
func smatLog(prefix, format string, args ...interface{}) {
|
||||||
|
if smatDebug {
|
||||||
|
fmt.Print(prefix)
|
||||||
|
fmt.Printf(format, args...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type smatContext struct {
|
||||||
|
pairs []*smatPair
|
||||||
|
|
||||||
|
// Two registers, x & y.
|
||||||
|
x int
|
||||||
|
y int
|
||||||
|
|
||||||
|
actions int
|
||||||
|
}
|
||||||
|
|
||||||
|
type smatPair struct {
|
||||||
|
bm *Bitmap
|
||||||
|
bs *bitset.BitSet
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
|
||||||
|
var smatActionMap = smat.ActionMap{
|
||||||
|
smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })),
|
||||||
|
smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })),
|
||||||
|
smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })),
|
||||||
|
smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })),
|
||||||
|
smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })),
|
||||||
|
smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })),
|
||||||
|
|
||||||
|
smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })),
|
||||||
|
|
||||||
|
smat.ActionID('['): smatAction(" pushPair", smatWrap(smatPushPair)),
|
||||||
|
smat.ActionID(']'): smatAction(" popPair", smatWrap(smatPopPair)),
|
||||||
|
|
||||||
|
smat.ActionID('B'): smatAction(" setBit", smatWrap(smatSetBit)),
|
||||||
|
smat.ActionID('b'): smatAction(" removeBit", smatWrap(smatRemoveBit)),
|
||||||
|
|
||||||
|
smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)),
|
||||||
|
smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)),
|
||||||
|
|
||||||
|
smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)),
|
||||||
|
|
||||||
|
smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)),
|
||||||
|
smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)),
|
||||||
|
|
||||||
|
smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)),
|
||||||
|
smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)),
|
||||||
|
|
||||||
|
smat.ActionID('e'): smatAction(" isEmpty", smatWrap(smatIsEmpty)),
|
||||||
|
|
||||||
|
smat.ActionID('i'): smatAction(" intersects", smatWrap(smatIntersects)),
|
||||||
|
|
||||||
|
smat.ActionID('f'): smatAction(" flip", smatWrap(smatFlip)),
|
||||||
|
|
||||||
|
smat.ActionID('-'): smatAction(" difference", smatWrap(smatDifference)),
|
||||||
|
}
|
||||||
|
|
||||||
|
var smatRunningPercentActions []smat.PercentAction
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var ids []int
|
||||||
|
for actionId := range smatActionMap {
|
||||||
|
ids = append(ids, int(actionId))
|
||||||
|
}
|
||||||
|
sort.Ints(ids)
|
||||||
|
|
||||||
|
pct := 100 / len(smatActionMap)
|
||||||
|
for _, actionId := range ids {
|
||||||
|
smatRunningPercentActions = append(smatRunningPercentActions,
|
||||||
|
smat.PercentAction{pct, smat.ActionID(actionId)})
|
||||||
|
}
|
||||||
|
|
||||||
|
smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc)
|
||||||
|
smatActionMap[smat.ActionID('T')] = smatAction("TEARDOWN", smatTeardownFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We only have one smat state: running.
|
||||||
|
func smatRunning(next byte) smat.ActionID {
|
||||||
|
return smat.PercentExecute(next, smatRunningPercentActions...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) {
|
||||||
|
return func(ctx smat.Context) (smat.State, error) {
|
||||||
|
c := ctx.(*smatContext)
|
||||||
|
c.actions++
|
||||||
|
|
||||||
|
smatLog(" ", "%s\n", name)
|
||||||
|
|
||||||
|
return f(ctx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates an smat action func based on a simple callback.
|
||||||
|
func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) {
|
||||||
|
return func(ctx smat.Context) (next smat.State, err error) {
|
||||||
|
c := ctx.(*smatContext)
|
||||||
|
cb(c)
|
||||||
|
return smatRunning, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invokes a callback function with the input v bounded to len(c.pairs).
|
||||||
|
func (c *smatContext) withPair(v int, cb func(*smatPair)) {
|
||||||
|
if len(c.pairs) > 0 {
|
||||||
|
if v < 0 {
|
||||||
|
v = -v
|
||||||
|
}
|
||||||
|
v = v % len(c.pairs)
|
||||||
|
cb(c.pairs[v])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
|
||||||
|
func smatSetupFunc(ctx smat.Context) (next smat.State, err error) {
|
||||||
|
return smatRunning, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
|
||||||
|
func smatPushPair(c *smatContext) {
|
||||||
|
c.pairs = append(c.pairs, &smatPair{
|
||||||
|
bm: NewBitmap(),
|
||||||
|
bs: bitset.New(100),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatPopPair(c *smatContext) {
|
||||||
|
if len(c.pairs) > 0 {
|
||||||
|
c.pairs = c.pairs[0 : len(c.pairs)-1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatSetBit(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(p *smatPair) {
|
||||||
|
y := uint32(c.y)
|
||||||
|
p.bm.AddInt(int(y))
|
||||||
|
p.bs.Set(uint(y))
|
||||||
|
p.checkEquals()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatRemoveBit(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(p *smatPair) {
|
||||||
|
y := uint32(c.y)
|
||||||
|
p.bm.Remove(y)
|
||||||
|
p.bs.Clear(uint(y))
|
||||||
|
p.checkEquals()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatAnd(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c.withPair(c.y, func(py *smatPair) {
|
||||||
|
px.bm.And(py.bm)
|
||||||
|
px.bs = px.bs.Intersection(py.bs)
|
||||||
|
px.checkEquals()
|
||||||
|
py.checkEquals()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatOr(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c.withPair(c.y, func(py *smatPair) {
|
||||||
|
px.bm.Or(py.bm)
|
||||||
|
px.bs = px.bs.Union(py.bs)
|
||||||
|
px.checkEquals()
|
||||||
|
py.checkEquals()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatAndCardinality(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c.withPair(c.y, func(py *smatPair) {
|
||||||
|
c0 := px.bm.AndCardinality(py.bm)
|
||||||
|
c1 := px.bs.IntersectionCardinality(py.bs)
|
||||||
|
if c0 != uint64(c1) {
|
||||||
|
panic("expected same add cardinality")
|
||||||
|
}
|
||||||
|
px.checkEquals()
|
||||||
|
py.checkEquals()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatOrCardinality(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c.withPair(c.y, func(py *smatPair) {
|
||||||
|
c0 := px.bm.OrCardinality(py.bm)
|
||||||
|
c1 := px.bs.UnionCardinality(py.bs)
|
||||||
|
if c0 != uint64(c1) {
|
||||||
|
panic("expected same or cardinality")
|
||||||
|
}
|
||||||
|
px.checkEquals()
|
||||||
|
py.checkEquals()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatRunOptimize(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
px.bm.RunOptimize()
|
||||||
|
px.checkEquals()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatClear(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
px.bm.Clear()
|
||||||
|
px.bs = px.bs.ClearAll()
|
||||||
|
px.checkEquals()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatCardinality(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c0 := px.bm.GetCardinality()
|
||||||
|
c1 := px.bs.Count()
|
||||||
|
if c0 != uint64(c1) {
|
||||||
|
panic("expected same cardinality")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatIsEmpty(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c0 := px.bm.IsEmpty()
|
||||||
|
c1 := px.bs.None()
|
||||||
|
if c0 != c1 {
|
||||||
|
panic("expected same is empty")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatIntersects(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c.withPair(c.y, func(py *smatPair) {
|
||||||
|
v0 := px.bm.Intersects(py.bm)
|
||||||
|
v1 := px.bs.IntersectionCardinality(py.bs) > 0
|
||||||
|
if v0 != v1 {
|
||||||
|
panic("intersects not equal")
|
||||||
|
}
|
||||||
|
|
||||||
|
px.checkEquals()
|
||||||
|
py.checkEquals()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatFlip(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(p *smatPair) {
|
||||||
|
y := uint32(c.y)
|
||||||
|
p.bm.Flip(uint64(y), uint64(y)+1)
|
||||||
|
p.bs = p.bs.Flip(uint(y))
|
||||||
|
p.checkEquals()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func smatDifference(c *smatContext) {
|
||||||
|
c.withPair(c.x, func(px *smatPair) {
|
||||||
|
c.withPair(c.y, func(py *smatPair) {
|
||||||
|
px.bm.AndNot(py.bm)
|
||||||
|
px.bs = px.bs.Difference(py.bs)
|
||||||
|
px.checkEquals()
|
||||||
|
py.checkEquals()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *smatPair) checkEquals() {
|
||||||
|
if !p.equalsBitSet(p.bs, p.bm) {
|
||||||
|
panic("bitset mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool {
|
||||||
|
for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) {
|
||||||
|
if !b.ContainsInt(int(i)) {
|
||||||
|
fmt.Printf("in a bitset, not b bitmap, i: %d\n", i)
|
||||||
|
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
|
||||||
|
a.String(), b.String())
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i := b.Iterator()
|
||||||
|
for i.HasNext() {
|
||||||
|
v := i.Next()
|
||||||
|
if !a.Test(uint(v)) {
|
||||||
|
fmt.Printf("in b bitmap, not a bitset, v: %d\n", v)
|
||||||
|
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
|
||||||
|
a.String(), b.String())
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
305
vendor/github.com/RoaringBitmap/roaring/util.go
generated
vendored
Normal file
305
vendor/github.com/RoaringBitmap/roaring/util.go
generated
vendored
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
package roaring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"math/rand"
|
||||||
|
"sort"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
arrayDefaultMaxSize = 4096 // containers with 4096 or fewer integers should be array containers.
|
||||||
|
arrayLazyLowerBound = 1024
|
||||||
|
maxCapacity = 1 << 16
|
||||||
|
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
|
||||||
|
invalidCardinality = -1
|
||||||
|
serialCookie = 12347 // runs, arrays, and bitmaps
|
||||||
|
noOffsetThreshold = 4
|
||||||
|
|
||||||
|
// MaxUint32 is the largest uint32 value.
|
||||||
|
MaxUint32 = math.MaxUint32
|
||||||
|
|
||||||
|
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
|
||||||
|
// bound for ranges.
|
||||||
|
MaxRange uint64 = MaxUint32 + 1
|
||||||
|
|
||||||
|
// MaxUint16 is the largest 16 bit unsigned int.
|
||||||
|
// This is the largest value an interval16 can store.
|
||||||
|
MaxUint16 = math.MaxUint16
|
||||||
|
|
||||||
|
// Compute wordSizeInBytes, the size of a word in bytes.
|
||||||
|
_m = ^uint64(0)
|
||||||
|
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
|
||||||
|
wordSizeInBytes = 1 << _logS
|
||||||
|
|
||||||
|
// other constants used in ctz_generic.go
|
||||||
|
wordSizeInBits = wordSizeInBytes << 3 // word size in bits
|
||||||
|
)
|
||||||
|
|
||||||
|
const maxWord = 1<<wordSizeInBits - 1
|
||||||
|
|
||||||
|
// doesn't apply to runContainers
|
||||||
|
func getSizeInBytesFromCardinality(card int) int {
|
||||||
|
if card > arrayDefaultMaxSize {
|
||||||
|
// bitmapContainer
|
||||||
|
return maxCapacity / 8
|
||||||
|
}
|
||||||
|
// arrayContainer
|
||||||
|
return 2 * card
|
||||||
|
}
|
||||||
|
|
||||||
|
func fill(arr []uint64, val uint64) {
|
||||||
|
for i := range arr {
|
||||||
|
arr[i] = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func fillRange(arr []uint64, start, end int, val uint64) {
|
||||||
|
for i := start; i < end; i++ {
|
||||||
|
arr[i] = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func fillArrayAND(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||||
|
if len(bitmap1) != len(bitmap2) {
|
||||||
|
panic("array lengths don't match")
|
||||||
|
}
|
||||||
|
// TODO: rewrite in assembly
|
||||||
|
pos := 0
|
||||||
|
for k := range bitmap1 {
|
||||||
|
bitset := bitmap1[k] & bitmap2[k]
|
||||||
|
for bitset != 0 {
|
||||||
|
t := bitset & -bitset
|
||||||
|
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||||
|
pos = pos + 1
|
||||||
|
bitset ^= t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func fillArrayANDNOT(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||||
|
if len(bitmap1) != len(bitmap2) {
|
||||||
|
panic("array lengths don't match")
|
||||||
|
}
|
||||||
|
// TODO: rewrite in assembly
|
||||||
|
pos := 0
|
||||||
|
for k := range bitmap1 {
|
||||||
|
bitset := bitmap1[k] &^ bitmap2[k]
|
||||||
|
for bitset != 0 {
|
||||||
|
t := bitset & -bitset
|
||||||
|
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||||
|
pos = pos + 1
|
||||||
|
bitset ^= t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||||
|
if len(bitmap1) != len(bitmap2) {
|
||||||
|
panic("array lengths don't match")
|
||||||
|
}
|
||||||
|
// TODO: rewrite in assembly
|
||||||
|
pos := 0
|
||||||
|
for k := 0; k < len(bitmap1); k++ {
|
||||||
|
bitset := bitmap1[k] ^ bitmap2[k]
|
||||||
|
for bitset != 0 {
|
||||||
|
t := bitset & -bitset
|
||||||
|
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||||
|
pos = pos + 1
|
||||||
|
bitset ^= t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func highbits(x uint32) uint16 {
|
||||||
|
return uint16(x >> 16)
|
||||||
|
}
|
||||||
|
func lowbits(x uint32) uint16 {
|
||||||
|
return uint16(x & maxLowBit)
|
||||||
|
}
|
||||||
|
|
||||||
|
const maxLowBit = 0xFFFF
|
||||||
|
|
||||||
|
func flipBitmapRange(bitmap []uint64, start int, end int) {
|
||||||
|
if start >= end {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
firstword := start / 64
|
||||||
|
endword := (end - 1) / 64
|
||||||
|
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
|
||||||
|
for i := firstword; i < endword; i++ {
|
||||||
|
bitmap[i] = ^bitmap[i]
|
||||||
|
}
|
||||||
|
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func resetBitmapRange(bitmap []uint64, start int, end int) {
|
||||||
|
if start >= end {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
firstword := start / 64
|
||||||
|
endword := (end - 1) / 64
|
||||||
|
if firstword == endword {
|
||||||
|
bitmap[firstword] &= ^((^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
bitmap[firstword] &= ^(^uint64(0) << uint(start%64))
|
||||||
|
for i := firstword + 1; i < endword; i++ {
|
||||||
|
bitmap[i] = 0
|
||||||
|
}
|
||||||
|
bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func setBitmapRange(bitmap []uint64, start int, end int) {
|
||||||
|
if start >= end {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
firstword := start / 64
|
||||||
|
endword := (end - 1) / 64
|
||||||
|
if firstword == endword {
|
||||||
|
bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
bitmap[firstword] |= ^uint64(0) << uint(start%64)
|
||||||
|
for i := firstword + 1; i < endword; i++ {
|
||||||
|
bitmap[i] = ^uint64(0)
|
||||||
|
}
|
||||||
|
bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func flipBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||||
|
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||||
|
flipBitmapRange(bitmap, start, end)
|
||||||
|
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||||
|
return int(after - before)
|
||||||
|
}
|
||||||
|
|
||||||
|
func resetBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||||
|
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||||
|
resetBitmapRange(bitmap, start, end)
|
||||||
|
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||||
|
return int(after - before)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||||
|
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||||
|
setBitmapRange(bitmap, start, end)
|
||||||
|
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||||
|
return int(after - before)
|
||||||
|
}
|
||||||
|
|
||||||
|
func wordCardinalityForBitmapRange(bitmap []uint64, start int, end int) uint64 {
|
||||||
|
answer := uint64(0)
|
||||||
|
if start >= end {
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
firstword := start / 64
|
||||||
|
endword := (end - 1) / 64
|
||||||
|
for i := firstword; i <= endword; i++ {
|
||||||
|
answer += popcount(bitmap[i])
|
||||||
|
}
|
||||||
|
return answer
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectBitPosition(w uint64, j int) int {
|
||||||
|
seen := 0
|
||||||
|
|
||||||
|
// Divide 64bit
|
||||||
|
part := w & 0xFFFFFFFF
|
||||||
|
n := popcount(part)
|
||||||
|
if n <= uint64(j) {
|
||||||
|
part = w >> 32
|
||||||
|
seen += 32
|
||||||
|
j -= int(n)
|
||||||
|
}
|
||||||
|
w = part
|
||||||
|
|
||||||
|
// Divide 32bit
|
||||||
|
part = w & 0xFFFF
|
||||||
|
n = popcount(part)
|
||||||
|
if n <= uint64(j) {
|
||||||
|
part = w >> 16
|
||||||
|
seen += 16
|
||||||
|
j -= int(n)
|
||||||
|
}
|
||||||
|
w = part
|
||||||
|
|
||||||
|
// Divide 16bit
|
||||||
|
part = w & 0xFF
|
||||||
|
n = popcount(part)
|
||||||
|
if n <= uint64(j) {
|
||||||
|
part = w >> 8
|
||||||
|
seen += 8
|
||||||
|
j -= int(n)
|
||||||
|
}
|
||||||
|
w = part
|
||||||
|
|
||||||
|
// Lookup in final byte
|
||||||
|
var counter uint
|
||||||
|
for counter = 0; counter < 8; counter++ {
|
||||||
|
j -= int((w >> counter) & 1)
|
||||||
|
if j < 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return seen + int(counter)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func panicOn(err error) {
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ph struct {
|
||||||
|
orig int
|
||||||
|
rand int
|
||||||
|
}
|
||||||
|
|
||||||
|
type pha []ph
|
||||||
|
|
||||||
|
func (p pha) Len() int { return len(p) }
|
||||||
|
func (p pha) Less(i, j int) bool { return p[i].rand < p[j].rand }
|
||||||
|
func (p pha) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||||
|
|
||||||
|
func getRandomPermutation(n int) []int {
|
||||||
|
r := make([]ph, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
r[i].orig = i
|
||||||
|
r[i].rand = rand.Intn(1 << 29)
|
||||||
|
}
|
||||||
|
sort.Sort(pha(r))
|
||||||
|
m := make([]int, n)
|
||||||
|
for i := range m {
|
||||||
|
m[i] = r[i].orig
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func minOfInt(a, b int) int {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func maxOfInt(a, b int) int {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func maxOfUint16(a, b uint16) uint16 {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func minOfUint16(a, b uint16) uint16 {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
26
vendor/github.com/bits-and-blooms/bitset/.gitignore
generated
vendored
Normal file
26
vendor/github.com/bits-and-blooms/bitset/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||||
|
*.o
|
||||||
|
*.a
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Folders
|
||||||
|
_obj
|
||||||
|
_test
|
||||||
|
|
||||||
|
# Architecture specific extensions/prefixes
|
||||||
|
*.[568vq]
|
||||||
|
[568vq].out
|
||||||
|
|
||||||
|
*.cgo1.go
|
||||||
|
*.cgo2.c
|
||||||
|
_cgo_defun.c
|
||||||
|
_cgo_gotypes.go
|
||||||
|
_cgo_export.*
|
||||||
|
|
||||||
|
_testmain.go
|
||||||
|
|
||||||
|
*.exe
|
||||||
|
*.test
|
||||||
|
*.prof
|
||||||
|
|
||||||
|
target
|
||||||
37
vendor/github.com/bits-and-blooms/bitset/.travis.yml
generated
vendored
Normal file
37
vendor/github.com/bits-and-blooms/bitset/.travis.yml
generated
vendored
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
language: go
|
||||||
|
|
||||||
|
sudo: false
|
||||||
|
|
||||||
|
branches:
|
||||||
|
except:
|
||||||
|
- release
|
||||||
|
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- master
|
||||||
|
- travis
|
||||||
|
|
||||||
|
go:
|
||||||
|
- "1.11.x"
|
||||||
|
- tip
|
||||||
|
|
||||||
|
matrix:
|
||||||
|
allow_failures:
|
||||||
|
- go: tip
|
||||||
|
|
||||||
|
before_install:
|
||||||
|
- if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi;
|
||||||
|
- if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi;
|
||||||
|
- go get github.com/mattn/goveralls
|
||||||
|
|
||||||
|
before_script:
|
||||||
|
- make deps
|
||||||
|
|
||||||
|
script:
|
||||||
|
- make qa
|
||||||
|
|
||||||
|
after_failure:
|
||||||
|
- cat ./target/test/report.xml
|
||||||
|
|
||||||
|
after_success:
|
||||||
|
- if [ "$TRAVIS_GO_VERSION" = "1.11.1" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi;
|
||||||
27
vendor/github.com/bits-and-blooms/bitset/LICENSE
generated
vendored
Normal file
27
vendor/github.com/bits-and-blooms/bitset/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
Copyright (c) 2014 Will Fitzgerald. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
152
vendor/github.com/bits-and-blooms/bitset/README.md
generated
vendored
Normal file
152
vendor/github.com/bits-and-blooms/bitset/README.md
generated
vendored
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
# bitset
|
||||||
|
|
||||||
|
*Go language library to map between non-negative integers and boolean values*
|
||||||
|
|
||||||
|
[](https://github.com/willf/bitset/actions?query=workflow%3ATest)
|
||||||
|
[](https://goreportcard.com/report/github.com/willf/bitset)
|
||||||
|
[](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc)
|
||||||
|
|
||||||
|
|
||||||
|
This library is part of the [awesome go collection](https://github.com/avelino/awesome-go). It is used in production by several important systems:
|
||||||
|
|
||||||
|
* [beego](https://github.com/beego/beego)
|
||||||
|
* [CubeFS](https://github.com/cubefs/cubefs)
|
||||||
|
* [Amazon EKS Distro](https://github.com/aws/eks-distro)
|
||||||
|
* [sourcegraph](https://github.com/sourcegraph/sourcegraph)
|
||||||
|
* [torrent](https://github.com/anacrolix/torrent)
|
||||||
|
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Package bitset implements bitsets, a mapping between non-negative integers and boolean values.
|
||||||
|
It should be more efficient than map[uint] bool.
|
||||||
|
|
||||||
|
It provides methods for setting, clearing, flipping, and testing individual integers.
|
||||||
|
|
||||||
|
But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits.
|
||||||
|
|
||||||
|
BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used.
|
||||||
|
|
||||||
|
Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining.
|
||||||
|
|
||||||
|
### Example use:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
|
||||||
|
"github.com/bits-and-blooms/bitset"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
fmt.Printf("Hello from BitSet!\n")
|
||||||
|
var b bitset.BitSet
|
||||||
|
// play some Go Fish
|
||||||
|
for i := 0; i < 100; i++ {
|
||||||
|
card1 := uint(rand.Intn(52))
|
||||||
|
card2 := uint(rand.Intn(52))
|
||||||
|
b.Set(card1)
|
||||||
|
if b.Test(card2) {
|
||||||
|
fmt.Println("Go Fish!")
|
||||||
|
}
|
||||||
|
b.Clear(card1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chaining
|
||||||
|
b.Set(10).Set(11)
|
||||||
|
|
||||||
|
for i, e := b.NextSet(0); e; i, e = b.NextSet(i + 1) {
|
||||||
|
fmt.Println("The following bit is set:", i)
|
||||||
|
}
|
||||||
|
if b.Intersection(bitset.New(100).Set(10)).Count() == 1 {
|
||||||
|
fmt.Println("Intersection works.")
|
||||||
|
} else {
|
||||||
|
fmt.Println("Intersection doesn't work???")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc
|
||||||
|
|
||||||
|
## Serialization
|
||||||
|
|
||||||
|
|
||||||
|
You may serialize a bitset safely and portably to a stream
|
||||||
|
of bytes as follows:
|
||||||
|
```Go
|
||||||
|
const length = 9585
|
||||||
|
const oneEvery = 97
|
||||||
|
bs := bitset.New(length)
|
||||||
|
// Add some bits
|
||||||
|
for i := uint(0); i < length; i += oneEvery {
|
||||||
|
bs = bs.Set(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
n, err := bs.WriteTo(&buf)
|
||||||
|
if err != nil {
|
||||||
|
// failure
|
||||||
|
}
|
||||||
|
// Here n == buf.Len()
|
||||||
|
```
|
||||||
|
You can later deserialize the result as follows:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
// Read back from buf
|
||||||
|
bs = bitset.New()
|
||||||
|
n, err = bs.ReadFrom(&buf)
|
||||||
|
if err != nil {
|
||||||
|
// error
|
||||||
|
}
|
||||||
|
// n is the number of bytes read
|
||||||
|
```
|
||||||
|
|
||||||
|
The `ReadFrom` function attempts to read the data into the existing
|
||||||
|
BitSet instance, to minimize memory allocations.
|
||||||
|
|
||||||
|
|
||||||
|
*Performance tip*:
|
||||||
|
When reading and writing to a file or a network connection, you may get better performance by
|
||||||
|
wrapping your streams with `bufio` instances.
|
||||||
|
|
||||||
|
E.g.,
|
||||||
|
```Go
|
||||||
|
f, err := os.Create("myfile")
|
||||||
|
w := bufio.NewWriter(f)
|
||||||
|
```
|
||||||
|
```Go
|
||||||
|
f, err := os.Open("myfile")
|
||||||
|
r := bufio.NewReader(f)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Memory Usage
|
||||||
|
|
||||||
|
The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
|
||||||
|
|
||||||
|
## Implementation Note
|
||||||
|
|
||||||
|
Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed.
|
||||||
|
|
||||||
|
It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `uint64`). If so, the version will be bumped.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go get github.com/bits-and-blooms/bitset
|
||||||
|
```
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)")
|
||||||
|
|
||||||
|
## Running all tests
|
||||||
|
|
||||||
|
Before committing the code, please check if it passes tests, has adequate coverage, etc.
|
||||||
|
```bash
|
||||||
|
go test
|
||||||
|
go test -cover
|
||||||
|
```
|
||||||
5
vendor/github.com/bits-and-blooms/bitset/SECURITY.md
generated
vendored
Normal file
5
vendor/github.com/bits-and-blooms/bitset/SECURITY.md
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Security Policy
|
||||||
|
|
||||||
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
|
You can report privately a vulnerability by email at daniel@lemire.me (current maintainer).
|
||||||
39
vendor/github.com/bits-and-blooms/bitset/azure-pipelines.yml
generated
vendored
Normal file
39
vendor/github.com/bits-and-blooms/bitset/azure-pipelines.yml
generated
vendored
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Go
|
||||||
|
# Build your Go project.
|
||||||
|
# Add steps that test, save build artifacts, deploy, and more:
|
||||||
|
# https://docs.microsoft.com/azure/devops/pipelines/languages/go
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
- master
|
||||||
|
|
||||||
|
pool:
|
||||||
|
vmImage: 'Ubuntu-16.04'
|
||||||
|
|
||||||
|
variables:
|
||||||
|
GOBIN: '$(GOPATH)/bin' # Go binaries path
|
||||||
|
GOROOT: '/usr/local/go1.11' # Go installation path
|
||||||
|
GOPATH: '$(system.defaultWorkingDirectory)/gopath' # Go workspace path
|
||||||
|
modulePath: '$(GOPATH)/src/github.com/$(build.repository.name)' # Path to the module's code
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- script: |
|
||||||
|
mkdir -p '$(GOBIN)'
|
||||||
|
mkdir -p '$(GOPATH)/pkg'
|
||||||
|
mkdir -p '$(modulePath)'
|
||||||
|
shopt -s extglob
|
||||||
|
shopt -s dotglob
|
||||||
|
mv !(gopath) '$(modulePath)'
|
||||||
|
echo '##vso[task.prependpath]$(GOBIN)'
|
||||||
|
echo '##vso[task.prependpath]$(GOROOT)/bin'
|
||||||
|
displayName: 'Set up the Go workspace'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
go version
|
||||||
|
go get -v -t -d ./...
|
||||||
|
if [ -f Gopkg.toml ]; then
|
||||||
|
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
|
||||||
|
dep ensure
|
||||||
|
fi
|
||||||
|
go build -v .
|
||||||
|
workingDirectory: '$(modulePath)'
|
||||||
|
displayName: 'Get dependencies, then build'
|
||||||
1137
vendor/github.com/bits-and-blooms/bitset/bitset.go
generated
vendored
Normal file
1137
vendor/github.com/bits-and-blooms/bitset/bitset.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
53
vendor/github.com/bits-and-blooms/bitset/popcnt.go
generated
vendored
Normal file
53
vendor/github.com/bits-and-blooms/bitset/popcnt.go
generated
vendored
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
package bitset
|
||||||
|
|
||||||
|
// bit population count, take from
|
||||||
|
// https://code.google.com/p/go/issues/detail?id=4988#c11
|
||||||
|
// credit: https://code.google.com/u/arnehormann/
|
||||||
|
func popcount(x uint64) (n uint64) {
|
||||||
|
x -= (x >> 1) & 0x5555555555555555
|
||||||
|
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
|
||||||
|
x += x >> 4
|
||||||
|
x &= 0x0f0f0f0f0f0f0f0f
|
||||||
|
x *= 0x0101010101010101
|
||||||
|
return x >> 56
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntSliceGo(s []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for _, x := range s {
|
||||||
|
cnt += popcount(x)
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntMaskSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] &^ m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntAndSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] & m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntOrSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] | m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntXorSliceGo(s, m []uint64) uint64 {
|
||||||
|
cnt := uint64(0)
|
||||||
|
for i := range s {
|
||||||
|
cnt += popcount(s[i] ^ m[i])
|
||||||
|
}
|
||||||
|
return cnt
|
||||||
|
}
|
||||||
62
vendor/github.com/bits-and-blooms/bitset/popcnt_19.go
generated
vendored
Normal file
62
vendor/github.com/bits-and-blooms/bitset/popcnt_19.go
generated
vendored
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
//go:build go1.9
|
||||||
|
// +build go1.9
|
||||||
|
|
||||||
|
package bitset
|
||||||
|
|
||||||
|
import "math/bits"
|
||||||
|
|
||||||
|
func popcntSlice(s []uint64) uint64 {
|
||||||
|
var cnt int
|
||||||
|
for _, x := range s {
|
||||||
|
cnt += bits.OnesCount64(x)
|
||||||
|
}
|
||||||
|
return uint64(cnt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||||
|
var cnt int
|
||||||
|
// this explicit check eliminates a bounds check in the loop
|
||||||
|
if len(m) < len(s) {
|
||||||
|
panic("mask slice is too short")
|
||||||
|
}
|
||||||
|
for i := range s {
|
||||||
|
cnt += bits.OnesCount64(s[i] &^ m[i])
|
||||||
|
}
|
||||||
|
return uint64(cnt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntAndSlice(s, m []uint64) uint64 {
|
||||||
|
var cnt int
|
||||||
|
// this explicit check eliminates a bounds check in the loop
|
||||||
|
if len(m) < len(s) {
|
||||||
|
panic("mask slice is too short")
|
||||||
|
}
|
||||||
|
for i := range s {
|
||||||
|
cnt += bits.OnesCount64(s[i] & m[i])
|
||||||
|
}
|
||||||
|
return uint64(cnt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntOrSlice(s, m []uint64) uint64 {
|
||||||
|
var cnt int
|
||||||
|
// this explicit check eliminates a bounds check in the loop
|
||||||
|
if len(m) < len(s) {
|
||||||
|
panic("mask slice is too short")
|
||||||
|
}
|
||||||
|
for i := range s {
|
||||||
|
cnt += bits.OnesCount64(s[i] | m[i])
|
||||||
|
}
|
||||||
|
return uint64(cnt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntXorSlice(s, m []uint64) uint64 {
|
||||||
|
var cnt int
|
||||||
|
// this explicit check eliminates a bounds check in the loop
|
||||||
|
if len(m) < len(s) {
|
||||||
|
panic("mask slice is too short")
|
||||||
|
}
|
||||||
|
for i := range s {
|
||||||
|
cnt += bits.OnesCount64(s[i] ^ m[i])
|
||||||
|
}
|
||||||
|
return uint64(cnt)
|
||||||
|
}
|
||||||
68
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go
generated
vendored
Normal file
68
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
//go:build !go1.9 && amd64 && !appengine
|
||||||
|
// +build !go1.9,amd64,!appengine
|
||||||
|
|
||||||
|
package bitset
|
||||||
|
|
||||||
|
// *** the following functions are defined in popcnt_amd64.s
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func hasAsm() bool
|
||||||
|
|
||||||
|
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
|
||||||
|
var useAsm = hasAsm()
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntSliceAsm(s []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntMaskSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntAndSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntOrSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
func popcntXorSliceAsm(s, m []uint64) uint64
|
||||||
|
|
||||||
|
func popcntSlice(s []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntSliceAsm(s)
|
||||||
|
}
|
||||||
|
return popcntSliceGo(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntMaskSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntMaskSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntAndSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntAndSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntAndSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntOrSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntOrSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntOrSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntXorSlice(s, m []uint64) uint64 {
|
||||||
|
if useAsm {
|
||||||
|
return popcntXorSliceAsm(s, m)
|
||||||
|
}
|
||||||
|
return popcntXorSliceGo(s, m)
|
||||||
|
}
|
||||||
104
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s
generated
vendored
Normal file
104
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
// +build !go1.9
|
||||||
|
// +build amd64,!appengine
|
||||||
|
|
||||||
|
TEXT ·hasAsm(SB),4,$0-1
|
||||||
|
MOVQ $1, AX
|
||||||
|
CPUID
|
||||||
|
SHRQ $23, CX
|
||||||
|
ANDQ $1, CX
|
||||||
|
MOVB CX, ret+0(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
|
||||||
|
|
||||||
|
TEXT ·popcntSliceAsm(SB),4,$0-32
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntSliceEnd
|
||||||
|
popcntSliceLoop:
|
||||||
|
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
LOOP popcntSliceLoop
|
||||||
|
popcntSliceEnd:
|
||||||
|
MOVQ AX, ret+24(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntMaskSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntMaskSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
NOTQ DX
|
||||||
|
ANDQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntMaskSliceLoop
|
||||||
|
popcntMaskSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntAndSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntAndSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntAndSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
ANDQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntAndSliceLoop
|
||||||
|
popcntAndSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntOrSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntOrSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntOrSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
ORQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntOrSliceLoop
|
||||||
|
popcntOrSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·popcntXorSliceAsm(SB),4,$0-56
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
TESTQ CX, CX
|
||||||
|
JZ popcntXorSliceEnd
|
||||||
|
MOVQ m+24(FP), DI
|
||||||
|
popcntXorSliceLoop:
|
||||||
|
MOVQ (DI), DX
|
||||||
|
XORQ (SI), DX
|
||||||
|
POPCNTQ_DX_DX
|
||||||
|
ADDQ DX, AX
|
||||||
|
ADDQ $8, SI
|
||||||
|
ADDQ $8, DI
|
||||||
|
LOOP popcntXorSliceLoop
|
||||||
|
popcntXorSliceEnd:
|
||||||
|
MOVQ AX, ret+48(FP)
|
||||||
|
RET
|
||||||
25
vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go
generated
vendored
Normal file
25
vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
//go:build !go1.9 && (!amd64 || appengine)
|
||||||
|
// +build !go1.9
|
||||||
|
// +build !amd64 appengine
|
||||||
|
|
||||||
|
package bitset
|
||||||
|
|
||||||
|
func popcntSlice(s []uint64) uint64 {
|
||||||
|
return popcntSliceGo(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntMaskSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntAndSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntAndSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntOrSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntOrSliceGo(s, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func popcntXorSlice(s, m []uint64) uint64 {
|
||||||
|
return popcntXorSliceGo(s, m)
|
||||||
|
}
|
||||||
15
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go
generated
vendored
Normal file
15
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
//go:build !go1.9
|
||||||
|
// +build !go1.9
|
||||||
|
|
||||||
|
package bitset
|
||||||
|
|
||||||
|
var deBruijn = [...]byte{
|
||||||
|
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
||||||
|
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
||||||
|
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
||||||
|
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
|
||||||
|
}
|
||||||
|
|
||||||
|
func trailingZeroes64(v uint64) uint {
|
||||||
|
return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58])
|
||||||
|
}
|
||||||
10
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go
generated
vendored
Normal file
10
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
//go:build go1.9
|
||||||
|
// +build go1.9
|
||||||
|
|
||||||
|
package bitset
|
||||||
|
|
||||||
|
import "math/bits"
|
||||||
|
|
||||||
|
func trailingZeroes64(v uint64) uint {
|
||||||
|
return uint(bits.TrailingZeros64(v))
|
||||||
|
}
|
||||||
2
vendor/github.com/klauspost/compress/.gitattributes
generated
vendored
Normal file
2
vendor/github.com/klauspost/compress/.gitattributes
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
* -text
|
||||||
|
*.bin -text -diff
|
||||||
32
vendor/github.com/klauspost/compress/.gitignore
generated
vendored
Normal file
32
vendor/github.com/klauspost/compress/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||||
|
*.o
|
||||||
|
*.a
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Folders
|
||||||
|
_obj
|
||||||
|
_test
|
||||||
|
|
||||||
|
# Architecture specific extensions/prefixes
|
||||||
|
*.[568vq]
|
||||||
|
[568vq].out
|
||||||
|
|
||||||
|
*.cgo1.go
|
||||||
|
*.cgo2.c
|
||||||
|
_cgo_defun.c
|
||||||
|
_cgo_gotypes.go
|
||||||
|
_cgo_export.*
|
||||||
|
|
||||||
|
_testmain.go
|
||||||
|
|
||||||
|
*.exe
|
||||||
|
*.test
|
||||||
|
*.prof
|
||||||
|
/s2/cmd/_s2sx/sfx-exe
|
||||||
|
|
||||||
|
# Linux perf files
|
||||||
|
perf.data
|
||||||
|
perf.data.old
|
||||||
|
|
||||||
|
# gdb history
|
||||||
|
.gdb_history
|
||||||
123
vendor/github.com/klauspost/compress/.goreleaser.yml
generated
vendored
Normal file
123
vendor/github.com/klauspost/compress/.goreleaser.yml
generated
vendored
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
before:
|
||||||
|
hooks:
|
||||||
|
- ./gen.sh
|
||||||
|
|
||||||
|
builds:
|
||||||
|
-
|
||||||
|
id: "s2c"
|
||||||
|
binary: s2c
|
||||||
|
main: ./s2/cmd/s2c/main.go
|
||||||
|
flags:
|
||||||
|
- -trimpath
|
||||||
|
env:
|
||||||
|
- CGO_ENABLED=0
|
||||||
|
goos:
|
||||||
|
- aix
|
||||||
|
- linux
|
||||||
|
- freebsd
|
||||||
|
- netbsd
|
||||||
|
- windows
|
||||||
|
- darwin
|
||||||
|
goarch:
|
||||||
|
- 386
|
||||||
|
- amd64
|
||||||
|
- arm
|
||||||
|
- arm64
|
||||||
|
- ppc64
|
||||||
|
- ppc64le
|
||||||
|
- mips64
|
||||||
|
- mips64le
|
||||||
|
goarm:
|
||||||
|
- 7
|
||||||
|
-
|
||||||
|
id: "s2d"
|
||||||
|
binary: s2d
|
||||||
|
main: ./s2/cmd/s2d/main.go
|
||||||
|
flags:
|
||||||
|
- -trimpath
|
||||||
|
env:
|
||||||
|
- CGO_ENABLED=0
|
||||||
|
goos:
|
||||||
|
- aix
|
||||||
|
- linux
|
||||||
|
- freebsd
|
||||||
|
- netbsd
|
||||||
|
- windows
|
||||||
|
- darwin
|
||||||
|
goarch:
|
||||||
|
- 386
|
||||||
|
- amd64
|
||||||
|
- arm
|
||||||
|
- arm64
|
||||||
|
- ppc64
|
||||||
|
- ppc64le
|
||||||
|
- mips64
|
||||||
|
- mips64le
|
||||||
|
goarm:
|
||||||
|
- 7
|
||||||
|
-
|
||||||
|
id: "s2sx"
|
||||||
|
binary: s2sx
|
||||||
|
main: ./s2/cmd/_s2sx/main.go
|
||||||
|
flags:
|
||||||
|
- -modfile=s2sx.mod
|
||||||
|
- -trimpath
|
||||||
|
env:
|
||||||
|
- CGO_ENABLED=0
|
||||||
|
goos:
|
||||||
|
- aix
|
||||||
|
- linux
|
||||||
|
- freebsd
|
||||||
|
- netbsd
|
||||||
|
- windows
|
||||||
|
- darwin
|
||||||
|
goarch:
|
||||||
|
- 386
|
||||||
|
- amd64
|
||||||
|
- arm
|
||||||
|
- arm64
|
||||||
|
- ppc64
|
||||||
|
- ppc64le
|
||||||
|
- mips64
|
||||||
|
- mips64le
|
||||||
|
goarm:
|
||||||
|
- 7
|
||||||
|
|
||||||
|
archives:
|
||||||
|
-
|
||||||
|
id: s2-binaries
|
||||||
|
name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
|
||||||
|
format_overrides:
|
||||||
|
- goos: windows
|
||||||
|
format: zip
|
||||||
|
files:
|
||||||
|
- unpack/*
|
||||||
|
- s2/LICENSE
|
||||||
|
- s2/README.md
|
||||||
|
checksum:
|
||||||
|
name_template: 'checksums.txt'
|
||||||
|
snapshot:
|
||||||
|
version_template: "{{ .Tag }}-next"
|
||||||
|
changelog:
|
||||||
|
sort: asc
|
||||||
|
filters:
|
||||||
|
exclude:
|
||||||
|
- '^doc:'
|
||||||
|
- '^docs:'
|
||||||
|
- '^test:'
|
||||||
|
- '^tests:'
|
||||||
|
- '^Update\sREADME.md'
|
||||||
|
|
||||||
|
nfpms:
|
||||||
|
-
|
||||||
|
file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
|
||||||
|
vendor: Klaus Post
|
||||||
|
homepage: https://github.com/klauspost/compress
|
||||||
|
maintainer: Klaus Post <klauspost@gmail.com>
|
||||||
|
description: S2 Compression Tool
|
||||||
|
license: BSD 3-Clause
|
||||||
|
formats:
|
||||||
|
- deb
|
||||||
|
- rpm
|
||||||
304
vendor/github.com/klauspost/compress/LICENSE
generated
vendored
Normal file
304
vendor/github.com/klauspost/compress/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,304 @@
|
|||||||
|
Copyright (c) 2012 The Go Authors. All rights reserved.
|
||||||
|
Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Files: gzhttp/*
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2016-2017 The New York Times Company
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Files: s2/cmd/internal/readahead/*
|
||||||
|
|
||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2015 Klaus Post
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
||||||
|
---------------------
|
||||||
|
Files: snappy/*
|
||||||
|
Files: internal/snapref/*
|
||||||
|
|
||||||
|
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Files: s2/cmd/internal/filepathx/*
|
||||||
|
|
||||||
|
Copyright 2016 The filepathx Authors
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
671
vendor/github.com/klauspost/compress/README.md
generated
vendored
Normal file
671
vendor/github.com/klauspost/compress/README.md
generated
vendored
Normal file
@@ -0,0 +1,671 @@
|
|||||||
|
# compress
|
||||||
|
|
||||||
|
This package provides various compression algorithms.
|
||||||
|
|
||||||
|
* [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression in pure Go.
|
||||||
|
* [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) is a high performance replacement for Snappy.
|
||||||
|
* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
|
||||||
|
* [snappy](https://github.com/klauspost/compress/tree/master/snappy) is a drop-in replacement for `github.com/golang/snappy` offering better compression and concurrent streams.
|
||||||
|
* [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.
|
||||||
|
* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.
|
||||||
|
* [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.
|
||||||
|
|
||||||
|
[](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)
|
||||||
|
[](https://github.com/klauspost/compress/actions/workflows/go.yml)
|
||||||
|
[](https://sourcegraph.com/github.com/klauspost/compress?badge)
|
||||||
|
|
||||||
|
# package usage
|
||||||
|
|
||||||
|
Use `go get github.com/klauspost/compress@latest` to add it to your project.
|
||||||
|
|
||||||
|
This package will support the current Go version and 2 versions back.
|
||||||
|
|
||||||
|
* Use the `nounsafe` tag to disable all use of the "unsafe" package.
|
||||||
|
* Use the `noasm` tag to disable all assembly across packages.
|
||||||
|
|
||||||
|
Use the links above for more information on each.
|
||||||
|
|
||||||
|
# changelog
|
||||||
|
|
||||||
|
* Feb 19th, 2025 - [1.18.0](https://github.com/klauspost/compress/releases/tag/v1.18.0)
|
||||||
|
* Add unsafe little endian loaders https://github.com/klauspost/compress/pull/1036
|
||||||
|
* fix: check `r.err != nil` but return a nil value error `err` by @alingse in https://github.com/klauspost/compress/pull/1028
|
||||||
|
* flate: Simplify L4-6 loading https://github.com/klauspost/compress/pull/1043
|
||||||
|
* flate: Simplify matchlen (remove asm) https://github.com/klauspost/compress/pull/1045
|
||||||
|
* s2: Improve small block compression speed w/o asm https://github.com/klauspost/compress/pull/1048
|
||||||
|
* flate: Fix matchlen L5+L6 https://github.com/klauspost/compress/pull/1049
|
||||||
|
* flate: Cleanup & reduce casts https://github.com/klauspost/compress/pull/1050
|
||||||
|
|
||||||
|
* Oct 11th, 2024 - [1.17.11](https://github.com/klauspost/compress/releases/tag/v1.17.11)
|
||||||
|
* zstd: Fix extra CRC written with multiple Close calls https://github.com/klauspost/compress/pull/1017
|
||||||
|
* s2: Don't use stack for index tables https://github.com/klauspost/compress/pull/1014
|
||||||
|
* gzhttp: No content-type on no body response code by @juliens in https://github.com/klauspost/compress/pull/1011
|
||||||
|
* gzhttp: Do not set the content-type when response has no body by @kevinpollet in https://github.com/klauspost/compress/pull/1013
|
||||||
|
|
||||||
|
* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10)
|
||||||
|
* gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978
|
||||||
|
* gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002
|
||||||
|
* s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982
|
||||||
|
* zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007
|
||||||
|
* flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996
|
||||||
|
|
||||||
|
* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9)
|
||||||
|
* s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949
|
||||||
|
* flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963
|
||||||
|
* Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971
|
||||||
|
* zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951
|
||||||
|
|
||||||
|
* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8)
|
||||||
|
* zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885
|
||||||
|
* zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938
|
||||||
|
|
||||||
|
* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7)
|
||||||
|
* s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927
|
||||||
|
* s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930
|
||||||
|
|
||||||
|
* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6)
|
||||||
|
* zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923
|
||||||
|
* s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925
|
||||||
|
|
||||||
|
* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5)
|
||||||
|
* flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912
|
||||||
|
* zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908
|
||||||
|
* zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913
|
||||||
|
* zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910
|
||||||
|
* s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917
|
||||||
|
https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918
|
||||||
|
|
||||||
|
* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4)
|
||||||
|
* huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887
|
||||||
|
* huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886
|
||||||
|
* gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892
|
||||||
|
* gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890
|
||||||
|
* gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891
|
||||||
|
|
||||||
|
* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3)
|
||||||
|
* fse: Fix max header size https://github.com/klauspost/compress/pull/881
|
||||||
|
* zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877
|
||||||
|
* gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883
|
||||||
|
|
||||||
|
* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
|
||||||
|
* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
|
||||||
|
|
||||||
|
* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
|
||||||
|
* s2: Fix S2 "best" dictionary wrong encoding https://github.com/klauspost/compress/pull/871
|
||||||
|
* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
|
||||||
|
* s2: Fix EstimateBlockSize on 6&7 length input https://github.com/klauspost/compress/pull/867
|
||||||
|
|
||||||
|
* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
|
||||||
|
* Add experimental dictionary builder https://github.com/klauspost/compress/pull/853
|
||||||
|
* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
|
||||||
|
* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
|
||||||
|
* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
|
||||||
|
* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
|
||||||
|
* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.16.x</summary>
|
||||||
|
|
||||||
|
|
||||||
|
* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
|
||||||
|
* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
|
||||||
|
* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
|
||||||
|
|
||||||
|
* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6)
|
||||||
|
* zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806
|
||||||
|
* zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824
|
||||||
|
* gzhttp: Handle informational headers by @rtribotte in https://github.com/klauspost/compress/pull/815
|
||||||
|
* s2: Improve Better compression slightly https://github.com/klauspost/compress/pull/663
|
||||||
|
|
||||||
|
* Apr 16, 2023 - [v1.16.5](https://github.com/klauspost/compress/releases/tag/v1.16.5)
|
||||||
|
* zstd: readByte needs to use io.ReadFull by @jnoxon in https://github.com/klauspost/compress/pull/802
|
||||||
|
* gzip: Fix WriterTo after initial read https://github.com/klauspost/compress/pull/804
|
||||||
|
|
||||||
|
* Apr 5, 2023 - [v1.16.4](https://github.com/klauspost/compress/releases/tag/v1.16.4)
|
||||||
|
* zstd: Improve zstd best efficiency by @greatroar and @klauspost in https://github.com/klauspost/compress/pull/784
|
||||||
|
* zstd: Respect WithAllLitEntropyCompression https://github.com/klauspost/compress/pull/792
|
||||||
|
* zstd: Fix amd64 not always detecting corrupt data https://github.com/klauspost/compress/pull/785
|
||||||
|
* zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795
|
||||||
|
* s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779
|
||||||
|
* s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780
|
||||||
|
* gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
|
||||||
|
|
||||||
|
* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
|
||||||
|
* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
|
||||||
|
* gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767
|
||||||
|
* s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766
|
||||||
|
* zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773
|
||||||
|
* huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774
|
||||||
|
|
||||||
|
* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0)
|
||||||
|
* s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support. https://github.com/klauspost/compress/pull/685
|
||||||
|
* s2: Add Compression Size Estimate. https://github.com/klauspost/compress/pull/752
|
||||||
|
* s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755
|
||||||
|
* s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748
|
||||||
|
* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
|
||||||
|
* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.15.x</summary>
|
||||||
|
|
||||||
|
* Jan 21st, 2023 (v1.15.15)
|
||||||
|
* deflate: Improve level 7-9 https://github.com/klauspost/compress/pull/739
|
||||||
|
* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
|
||||||
|
* zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745
|
||||||
|
* gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740
|
||||||
|
|
||||||
|
* Jan 3rd, 2023 (v1.15.14)
|
||||||
|
|
||||||
|
* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718
|
||||||
|
* zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720
|
||||||
|
* export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722
|
||||||
|
* s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723
|
||||||
|
|
||||||
|
* Dec 11, 2022 (v1.15.13)
|
||||||
|
* zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder https://github.com/klauspost/compress/pull/691
|
||||||
|
* zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708
|
||||||
|
|
||||||
|
* Oct 26, 2022 (v1.15.12)
|
||||||
|
|
||||||
|
* zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680
|
||||||
|
* gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683
|
||||||
|
|
||||||
|
* Sept 26, 2022 (v1.15.11)
|
||||||
|
|
||||||
|
* flate: Improve level 1-3 compression https://github.com/klauspost/compress/pull/678
|
||||||
|
* zstd: Improve "best" compression by @nightwolfz in https://github.com/klauspost/compress/pull/677
|
||||||
|
* zstd: Fix+reduce decompression allocations https://github.com/klauspost/compress/pull/668
|
||||||
|
* zstd: Fix non-effective noescape tag https://github.com/klauspost/compress/pull/667
|
||||||
|
|
||||||
|
* Sept 16, 2022 (v1.15.10)
|
||||||
|
|
||||||
|
* zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649
|
||||||
|
* Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651
|
||||||
|
* flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656
|
||||||
|
* zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657
|
||||||
|
* s2: Improve "best" compression https://github.com/klauspost/compress/pull/658
|
||||||
|
* s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635
|
||||||
|
* s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646
|
||||||
|
* Use arrays for constant size copies https://github.com/klauspost/compress/pull/659
|
||||||
|
|
||||||
|
* July 21, 2022 (v1.15.9)
|
||||||
|
|
||||||
|
* zstd: Fix decoder crash on amd64 (no BMI) on invalid input https://github.com/klauspost/compress/pull/645
|
||||||
|
* zstd: Disable decoder extended memory copies (amd64) due to possible crashes https://github.com/klauspost/compress/pull/644
|
||||||
|
* zstd: Allow single segments up to "max decoded size" https://github.com/klauspost/compress/pull/643
|
||||||
|
|
||||||
|
* July 13, 2022 (v1.15.8)
|
||||||
|
|
||||||
|
* gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641
|
||||||
|
* s2: Add Index header trim/restore https://github.com/klauspost/compress/pull/638
|
||||||
|
* zstd: Optimize seqdeq amd64 asm by @greatroar in https://github.com/klauspost/compress/pull/636
|
||||||
|
* zstd: Improve decoder memcopy https://github.com/klauspost/compress/pull/637
|
||||||
|
* huff0: Pass a single bitReader pointer to asm by @greatroar in https://github.com/klauspost/compress/pull/634
|
||||||
|
* zstd: Branchless getBits for amd64 w/o BMI2 by @greatroar in https://github.com/klauspost/compress/pull/640
|
||||||
|
* gzhttp: Remove header before writing https://github.com/klauspost/compress/pull/639
|
||||||
|
|
||||||
|
* June 29, 2022 (v1.15.7)
|
||||||
|
|
||||||
|
* s2: Fix absolute forward seeks https://github.com/klauspost/compress/pull/633
|
||||||
|
* zip: Merge upstream https://github.com/klauspost/compress/pull/631
|
||||||
|
* zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624
|
||||||
|
* zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598
|
||||||
|
* flate: Faster histograms https://github.com/klauspost/compress/pull/620
|
||||||
|
* deflate: Use compound hcode https://github.com/klauspost/compress/pull/622
|
||||||
|
|
||||||
|
* June 3, 2022 (v1.15.6)
|
||||||
|
* s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613
|
||||||
|
* s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611
|
||||||
|
* zstd: Always use configured block size https://github.com/klauspost/compress/pull/605
|
||||||
|
* zstd: Fix incorrect hash table placement for dict encoding in default https://github.com/klauspost/compress/pull/606
|
||||||
|
* zstd: Apply default config to ZipDecompressor without options https://github.com/klauspost/compress/pull/608
|
||||||
|
* gzhttp: Exclude more common archive formats https://github.com/klauspost/compress/pull/612
|
||||||
|
* s2: Add ReaderIgnoreCRC https://github.com/klauspost/compress/pull/609
|
||||||
|
* s2: Remove sanity load on index creation https://github.com/klauspost/compress/pull/607
|
||||||
|
* snappy: Use dedicated function for scoring https://github.com/klauspost/compress/pull/614
|
||||||
|
* s2c+s2d: Use official snappy framed extension https://github.com/klauspost/compress/pull/610
|
||||||
|
|
||||||
|
* May 25, 2022 (v1.15.5)
|
||||||
|
* s2: Add concurrent stream decompression https://github.com/klauspost/compress/pull/602
|
||||||
|
* s2: Fix final emit oob read crash on amd64 https://github.com/klauspost/compress/pull/601
|
||||||
|
* huff0: asm implementation of Decompress1X by @WojciechMula https://github.com/klauspost/compress/pull/596
|
||||||
|
* zstd: Use 1 less goroutine for stream decoding https://github.com/klauspost/compress/pull/588
|
||||||
|
* zstd: Copy literal in 16 byte blocks when possible https://github.com/klauspost/compress/pull/592
|
||||||
|
* zstd: Speed up when WithDecoderLowmem(false) https://github.com/klauspost/compress/pull/599
|
||||||
|
* zstd: faster next state update in BMI2 version of decode by @WojciechMula in https://github.com/klauspost/compress/pull/593
|
||||||
|
* huff0: Do not check max size when reading table. https://github.com/klauspost/compress/pull/586
|
||||||
|
* flate: Inplace hashing for level 7-9 https://github.com/klauspost/compress/pull/590
|
||||||
|
|
||||||
|
|
||||||
|
* May 11, 2022 (v1.15.4)
|
||||||
|
* huff0: decompress directly into output by @WojciechMula in [#577](https://github.com/klauspost/compress/pull/577)
|
||||||
|
* inflate: Keep dict on stack [#581](https://github.com/klauspost/compress/pull/581)
|
||||||
|
* zstd: Faster decoding memcopy in asm [#583](https://github.com/klauspost/compress/pull/583)
|
||||||
|
* zstd: Fix ignored crc [#580](https://github.com/klauspost/compress/pull/580)
|
||||||
|
|
||||||
|
* May 5, 2022 (v1.15.3)
|
||||||
|
* zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572)
|
||||||
|
* s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575)
|
||||||
|
|
||||||
|
* Apr 26, 2022 (v1.15.2)
|
||||||
|
* zstd: Add x86-64 assembly for decompression on streams and blocks. Contributed by [@WojciechMula](https://github.com/WojciechMula). Typically 2x faster. [#528](https://github.com/klauspost/compress/pull/528) [#531](https://github.com/klauspost/compress/pull/531) [#545](https://github.com/klauspost/compress/pull/545) [#537](https://github.com/klauspost/compress/pull/537)
|
||||||
|
* zstd: Add options to ZipDecompressor and fixes [#539](https://github.com/klauspost/compress/pull/539)
|
||||||
|
* s2: Use sorted search for index [#555](https://github.com/klauspost/compress/pull/555)
|
||||||
|
* Minimum version is Go 1.16, added CI test on 1.18.
|
||||||
|
|
||||||
|
* Mar 11, 2022 (v1.15.1)
|
||||||
|
* huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512)
|
||||||
|
* zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514)
|
||||||
|
* zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520)
|
||||||
|
* zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521)
|
||||||
|
* zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523)
|
||||||
|
|
||||||
|
* Mar 3, 2022 (v1.15.0)
|
||||||
|
* zstd: Refactor decoder [#498](https://github.com/klauspost/compress/pull/498)
|
||||||
|
* zstd: Add stream encoding without goroutines [#505](https://github.com/klauspost/compress/pull/505)
|
||||||
|
* huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507)
|
||||||
|
* flate: Inline literal emission [#509](https://github.com/klauspost/compress/pull/509)
|
||||||
|
* gzhttp: Add zstd to transport [#400](https://github.com/klauspost/compress/pull/400)
|
||||||
|
* gzhttp: Make content-type optional [#510](https://github.com/klauspost/compress/pull/510)
|
||||||
|
|
||||||
|
Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines.
|
||||||
|
|
||||||
|
Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected.
|
||||||
|
|
||||||
|
While the release has been extensively tested, it is recommended to testing when upgrading.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.14.x</summary>
|
||||||
|
|
||||||
|
* Feb 22, 2022 (v1.14.4)
|
||||||
|
* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
|
||||||
|
* zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502)
|
||||||
|
* zip: don't read data descriptor early by @saracen in [#501](https://github.com/klauspost/compress/pull/501) #501
|
||||||
|
* huff0: Use static decompression buffer up to 30% faster [#499](https://github.com/klauspost/compress/pull/499) [#500](https://github.com/klauspost/compress/pull/500)
|
||||||
|
|
||||||
|
* Feb 17, 2022 (v1.14.3)
|
||||||
|
* flate: Improve fastest levels compression speed ~10% more throughput. [#482](https://github.com/klauspost/compress/pull/482) [#489](https://github.com/klauspost/compress/pull/489) [#490](https://github.com/klauspost/compress/pull/490) [#491](https://github.com/klauspost/compress/pull/491) [#494](https://github.com/klauspost/compress/pull/494) [#478](https://github.com/klauspost/compress/pull/478)
|
||||||
|
* flate: Faster decompression speed, ~5-10%. [#483](https://github.com/klauspost/compress/pull/483)
|
||||||
|
* s2: Faster compression with Go v1.18 and amd64 microarch level 3+. [#484](https://github.com/klauspost/compress/pull/484) [#486](https://github.com/klauspost/compress/pull/486)
|
||||||
|
|
||||||
|
* Jan 25, 2022 (v1.14.2)
|
||||||
|
* zstd: improve header decoder by @dsnet [#476](https://github.com/klauspost/compress/pull/476)
|
||||||
|
* zstd: Add bigger default blocks [#469](https://github.com/klauspost/compress/pull/469)
|
||||||
|
* zstd: Remove unused decompression buffer [#470](https://github.com/klauspost/compress/pull/470)
|
||||||
|
* zstd: Fix logically dead code by @ningmingxiao [#472](https://github.com/klauspost/compress/pull/472)
|
||||||
|
* flate: Improve level 7-9 [#471](https://github.com/klauspost/compress/pull/471) [#473](https://github.com/klauspost/compress/pull/473)
|
||||||
|
* zstd: Add noasm tag for xxhash [#475](https://github.com/klauspost/compress/pull/475)
|
||||||
|
|
||||||
|
* Jan 11, 2022 (v1.14.1)
|
||||||
|
* s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462)
|
||||||
|
* flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458)
|
||||||
|
* zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468)
|
||||||
|
* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
|
||||||
|
* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.13.x</summary>
|
||||||
|
|
||||||
|
* Aug 30, 2021 (v1.13.5)
|
||||||
|
* gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425)
|
||||||
|
* s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413)
|
||||||
|
* zstd: pooledZipWriter should return Writers to the same pool [#426](https://github.com/klauspost/compress/pull/426)
|
||||||
|
* Removed golang/snappy as external dependency for tests [#421](https://github.com/klauspost/compress/pull/421)
|
||||||
|
|
||||||
|
* Aug 12, 2021 (v1.13.4)
|
||||||
|
* Add [snappy replacement package](https://github.com/klauspost/compress/tree/master/snappy).
|
||||||
|
* zstd: Fix incorrect encoding in "best" mode [#415](https://github.com/klauspost/compress/pull/415)
|
||||||
|
|
||||||
|
* Aug 3, 2021 (v1.13.3)
|
||||||
|
* zstd: Improve Best compression [#404](https://github.com/klauspost/compress/pull/404)
|
||||||
|
* zstd: Fix WriteTo error forwarding [#411](https://github.com/klauspost/compress/pull/411)
|
||||||
|
* gzhttp: Return http.HandlerFunc instead of http.Handler. Unlikely breaking change. [#406](https://github.com/klauspost/compress/pull/406)
|
||||||
|
* s2sx: Fix max size error [#399](https://github.com/klauspost/compress/pull/399)
|
||||||
|
* zstd: Add optional stream content size on reset [#401](https://github.com/klauspost/compress/pull/401)
|
||||||
|
* zstd: use SpeedBestCompression for level >= 10 [#410](https://github.com/klauspost/compress/pull/410)
|
||||||
|
|
||||||
|
* Jun 14, 2021 (v1.13.1)
|
||||||
|
* s2: Add full Snappy output support [#396](https://github.com/klauspost/compress/pull/396)
|
||||||
|
* zstd: Add configurable [Decoder window](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithDecoderMaxWindow) size [#394](https://github.com/klauspost/compress/pull/394)
|
||||||
|
* gzhttp: Add header to skip compression [#389](https://github.com/klauspost/compress/pull/389)
|
||||||
|
* s2: Improve speed with bigger output margin [#395](https://github.com/klauspost/compress/pull/395)
|
||||||
|
|
||||||
|
* Jun 3, 2021 (v1.13.0)
|
||||||
|
* Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors.
|
||||||
|
* zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382)
|
||||||
|
* zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380)
|
||||||
|
</details>
|
||||||
|
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.12.x</summary>
|
||||||
|
|
||||||
|
* May 25, 2021 (v1.12.3)
|
||||||
|
* deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374)
|
||||||
|
* deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375)
|
||||||
|
* zstd: Forward read errors [#373](https://github.com/klauspost/compress/pull/373)
|
||||||
|
|
||||||
|
* Apr 27, 2021 (v1.12.2)
|
||||||
|
* zstd: Improve better/best compression [#360](https://github.com/klauspost/compress/pull/360) [#364](https://github.com/klauspost/compress/pull/364) [#365](https://github.com/klauspost/compress/pull/365)
|
||||||
|
* zstd: Add helpers to compress/decompress zstd inside zip files [#363](https://github.com/klauspost/compress/pull/363)
|
||||||
|
* deflate: Improve level 5+6 compression [#367](https://github.com/klauspost/compress/pull/367)
|
||||||
|
* s2: Improve better/best compression [#358](https://github.com/klauspost/compress/pull/358) [#359](https://github.com/klauspost/compress/pull/358)
|
||||||
|
* s2: Load after checking src limit on amd64. [#362](https://github.com/klauspost/compress/pull/362)
|
||||||
|
* s2sx: Limit max executable size [#368](https://github.com/klauspost/compress/pull/368)
|
||||||
|
|
||||||
|
* Apr 14, 2021 (v1.12.1)
|
||||||
|
* snappy package removed. Upstream added as dependency.
|
||||||
|
* s2: Better compression in "best" mode [#353](https://github.com/klauspost/compress/pull/353)
|
||||||
|
* s2sx: Add stdin input and detect pre-compressed from signature [#352](https://github.com/klauspost/compress/pull/352)
|
||||||
|
* s2c/s2d: Add http as possible input [#348](https://github.com/klauspost/compress/pull/348)
|
||||||
|
* s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352)
|
||||||
|
* zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346)
|
||||||
|
* s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349)
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.11.x</summary>
|
||||||
|
|
||||||
|
* Mar 26, 2021 (v1.11.13)
|
||||||
|
* zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345)
|
||||||
|
* zstd: Add [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) encoder option [#336](https://github.com/klauspost/compress/pull/336)
|
||||||
|
* deflate: Improve entropy compression [#338](https://github.com/klauspost/compress/pull/338)
|
||||||
|
* s2: Clean up and minor performance improvement in best [#341](https://github.com/klauspost/compress/pull/341)
|
||||||
|
|
||||||
|
* Mar 5, 2021 (v1.11.12)
|
||||||
|
* s2: Add `s2sx` binary that creates [self extracting archives](https://github.com/klauspost/compress/tree/master/s2#s2sx-self-extracting-archives).
|
||||||
|
* s2: Speed up decompression on non-assembly platforms [#328](https://github.com/klauspost/compress/pull/328)
|
||||||
|
|
||||||
|
* Mar 1, 2021 (v1.11.9)
|
||||||
|
* s2: Add ARM64 decompression assembly. Around 2x output speed. [#324](https://github.com/klauspost/compress/pull/324)
|
||||||
|
* s2: Improve "better" speed and efficiency. [#325](https://github.com/klauspost/compress/pull/325)
|
||||||
|
* s2: Fix binaries.
|
||||||
|
|
||||||
|
* Feb 25, 2021 (v1.11.8)
|
||||||
|
* s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended.
|
||||||
|
* s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315)
|
||||||
|
* s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322)
|
||||||
|
* zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314)
|
||||||
|
* zip: Fix zip64 headers. [#313](https://github.com/klauspost/compress/pull/313)
|
||||||
|
|
||||||
|
* Jan 14, 2021 (v1.11.7)
|
||||||
|
* Use Bytes() interface to get bytes across packages. [#309](https://github.com/klauspost/compress/pull/309)
|
||||||
|
* s2: Add 'best' compression option. [#310](https://github.com/klauspost/compress/pull/310)
|
||||||
|
* s2: Add ReaderMaxBlockSize, changes `s2.NewReader` signature to include varargs. [#311](https://github.com/klauspost/compress/pull/311)
|
||||||
|
* s2: Fix crash on small better buffers. [#308](https://github.com/klauspost/compress/pull/308)
|
||||||
|
* s2: Clean up decoder. [#312](https://github.com/klauspost/compress/pull/312)
|
||||||
|
|
||||||
|
* Jan 7, 2021 (v1.11.6)
|
||||||
|
* zstd: Make decoder allocations smaller [#306](https://github.com/klauspost/compress/pull/306)
|
||||||
|
* zstd: Free Decoder resources when Reset is called with a nil io.Reader [#305](https://github.com/klauspost/compress/pull/305)
|
||||||
|
|
||||||
|
* Dec 20, 2020 (v1.11.4)
|
||||||
|
* zstd: Add Best compression mode [#304](https://github.com/klauspost/compress/pull/304)
|
||||||
|
* Add header decoder [#299](https://github.com/klauspost/compress/pull/299)
|
||||||
|
* s2: Add uncompressed stream option [#297](https://github.com/klauspost/compress/pull/297)
|
||||||
|
* Simplify/speed up small blocks with known max size. [#300](https://github.com/klauspost/compress/pull/300)
|
||||||
|
* zstd: Always reset literal dict encoder [#303](https://github.com/klauspost/compress/pull/303)
|
||||||
|
|
||||||
|
* Nov 15, 2020 (v1.11.3)
|
||||||
|
* inflate: 10-15% faster decompression [#293](https://github.com/klauspost/compress/pull/293)
|
||||||
|
* zstd: Tweak DecodeAll default allocation [#295](https://github.com/klauspost/compress/pull/295)
|
||||||
|
|
||||||
|
* Oct 11, 2020 (v1.11.2)
|
||||||
|
* s2: Fix out of bounds read in "better" block compression [#291](https://github.com/klauspost/compress/pull/291)
|
||||||
|
|
||||||
|
* Oct 1, 2020 (v1.11.1)
|
||||||
|
* zstd: Set allLitEntropy true in default configuration [#286](https://github.com/klauspost/compress/pull/286)
|
||||||
|
|
||||||
|
* Sept 8, 2020 (v1.11.0)
|
||||||
|
* zstd: Add experimental compression [dictionaries](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) [#281](https://github.com/klauspost/compress/pull/281)
|
||||||
|
* zstd: Fix mixed Write and ReadFrom calls [#282](https://github.com/klauspost/compress/pull/282)
|
||||||
|
* inflate/gz: Limit variable shifts, ~5% faster decompression [#274](https://github.com/klauspost/compress/pull/274)
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.10.x</summary>
|
||||||
|
|
||||||
|
* July 8, 2020 (v1.10.11)
|
||||||
|
* zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278)
|
||||||
|
* huff0: Also populate compression table when reading decoding table. [#275](https://github.com/klauspost/compress/pull/275)
|
||||||
|
|
||||||
|
* June 23, 2020 (v1.10.10)
|
||||||
|
* zstd: Skip entropy compression in fastest mode when no matches. [#270](https://github.com/klauspost/compress/pull/270)
|
||||||
|
|
||||||
|
* June 16, 2020 (v1.10.9):
|
||||||
|
* zstd: API change for specifying dictionaries. See [#268](https://github.com/klauspost/compress/pull/268)
|
||||||
|
* zip: update CreateHeaderRaw to handle zip64 fields. [#266](https://github.com/klauspost/compress/pull/266)
|
||||||
|
* Fuzzit tests removed. The service has been purchased and is no longer available.
|
||||||
|
|
||||||
|
* June 5, 2020 (v1.10.8):
|
||||||
|
* 1.15x faster zstd block decompression. [#265](https://github.com/klauspost/compress/pull/265)
|
||||||
|
|
||||||
|
* June 1, 2020 (v1.10.7):
|
||||||
|
* Added zstd decompression [dictionary support](https://github.com/klauspost/compress/tree/master/zstd#dictionaries)
|
||||||
|
* Increase zstd decompression speed up to 1.19x. [#259](https://github.com/klauspost/compress/pull/259)
|
||||||
|
* Remove internal reset call in zstd compression and reduce allocations. [#263](https://github.com/klauspost/compress/pull/263)
|
||||||
|
|
||||||
|
* May 21, 2020: (v1.10.6)
|
||||||
|
* zstd: Reduce allocations while decoding. [#258](https://github.com/klauspost/compress/pull/258), [#252](https://github.com/klauspost/compress/pull/252)
|
||||||
|
* zstd: Stricter decompression checks.
|
||||||
|
|
||||||
|
* April 12, 2020: (v1.10.5)
|
||||||
|
* s2-commands: Flush output when receiving SIGINT. [#239](https://github.com/klauspost/compress/pull/239)
|
||||||
|
|
||||||
|
* Apr 8, 2020: (v1.10.4)
|
||||||
|
* zstd: Minor/special case optimizations. [#251](https://github.com/klauspost/compress/pull/251), [#250](https://github.com/klauspost/compress/pull/250), [#249](https://github.com/klauspost/compress/pull/249), [#247](https://github.com/klauspost/compress/pull/247)
|
||||||
|
* Mar 11, 2020: (v1.10.3)
|
||||||
|
* s2: Use S2 encoder in pure Go mode for Snappy output as well. [#245](https://github.com/klauspost/compress/pull/245)
|
||||||
|
* s2: Fix pure Go block encoder. [#244](https://github.com/klauspost/compress/pull/244)
|
||||||
|
* zstd: Added "better compression" mode. [#240](https://github.com/klauspost/compress/pull/240)
|
||||||
|
* zstd: Improve speed of fastest compression mode by 5-10% [#241](https://github.com/klauspost/compress/pull/241)
|
||||||
|
* zstd: Skip creating encoders when not needed. [#238](https://github.com/klauspost/compress/pull/238)
|
||||||
|
|
||||||
|
* Feb 27, 2020: (v1.10.2)
|
||||||
|
* Close to 50% speedup in inflate (gzip/zip decompression). [#236](https://github.com/klauspost/compress/pull/236) [#234](https://github.com/klauspost/compress/pull/234) [#232](https://github.com/klauspost/compress/pull/232)
|
||||||
|
* Reduce deflate level 1-6 memory usage up to 59%. [#227](https://github.com/klauspost/compress/pull/227)
|
||||||
|
|
||||||
|
* Feb 18, 2020: (v1.10.1)
|
||||||
|
* Fix zstd crash when resetting multiple times without sending data. [#226](https://github.com/klauspost/compress/pull/226)
|
||||||
|
* deflate: Fix dictionary use on level 1-6. [#224](https://github.com/klauspost/compress/pull/224)
|
||||||
|
* Remove deflate writer reference when closing. [#224](https://github.com/klauspost/compress/pull/224)
|
||||||
|
|
||||||
|
* Feb 4, 2020: (v1.10.0)
|
||||||
|
* Add optional dictionary to [stateless deflate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc#StatelessDeflate). Breaking change, send `nil` for previous behaviour. [#216](https://github.com/klauspost/compress/pull/216)
|
||||||
|
* Fix buffer overflow on repeated small block deflate. [#218](https://github.com/klauspost/compress/pull/218)
|
||||||
|
* Allow copying content from an existing ZIP file without decompressing+compressing. [#214](https://github.com/klauspost/compress/pull/214)
|
||||||
|
* Added [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) AMD64 assembler and various optimizations. Stream speed >10GB/s. [#186](https://github.com/klauspost/compress/pull/186)
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes prior to v1.10.0</summary>
|
||||||
|
|
||||||
|
* Jan 20,2020 (v1.9.8) Optimize gzip/deflate with better size estimates and faster table generation. [#207](https://github.com/klauspost/compress/pull/207) by [luyu6056](https://github.com/luyu6056), [#206](https://github.com/klauspost/compress/pull/206).
|
||||||
|
* Jan 11, 2020: S2 Encode/Decode will use provided buffer if capacity is big enough. [#204](https://github.com/klauspost/compress/pull/204)
|
||||||
|
* Jan 5, 2020: (v1.9.7) Fix another zstd regression in v1.9.5 - v1.9.6 removed.
|
||||||
|
* Jan 4, 2020: (v1.9.6) Regression in v1.9.5 fixed causing corrupt zstd encodes in rare cases.
|
||||||
|
* Jan 4, 2020: Faster IO in [s2c + s2d commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) compression/decompression. [#192](https://github.com/klauspost/compress/pull/192)
|
||||||
|
* Dec 29, 2019: Removed v1.9.5 since fuzz tests showed a compatibility problem with the reference zstandard decoder.
|
||||||
|
* Dec 29, 2019: (v1.9.5) zstd: 10-20% faster block compression. [#199](https://github.com/klauspost/compress/pull/199)
|
||||||
|
* Dec 29, 2019: [zip](https://godoc.org/github.com/klauspost/compress/zip) package updated with latest Go features
|
||||||
|
* Dec 29, 2019: zstd: Single segment flag condintions tweaked. [#197](https://github.com/klauspost/compress/pull/197)
|
||||||
|
* Dec 18, 2019: s2: Faster compression when ReadFrom is used. [#198](https://github.com/klauspost/compress/pull/198)
|
||||||
|
* Dec 10, 2019: s2: Fix repeat length output when just above at 16MB limit.
|
||||||
|
* Dec 10, 2019: zstd: Add function to get decoder as io.ReadCloser. [#191](https://github.com/klauspost/compress/pull/191)
|
||||||
|
* Dec 3, 2019: (v1.9.4) S2: limit max repeat length. [#188](https://github.com/klauspost/compress/pull/188)
|
||||||
|
* Dec 3, 2019: Add [WithNoEntropyCompression](https://godoc.org/github.com/klauspost/compress/zstd#WithNoEntropyCompression) to zstd [#187](https://github.com/klauspost/compress/pull/187)
|
||||||
|
* Dec 3, 2019: Reduce memory use for tests. Check for leaked goroutines.
|
||||||
|
* Nov 28, 2019 (v1.9.3) Less allocations in stateless deflate.
|
||||||
|
* Nov 28, 2019: 5-20% Faster huff0 decode. Impacts zstd as well. [#184](https://github.com/klauspost/compress/pull/184)
|
||||||
|
* Nov 12, 2019 (v1.9.2) Added [Stateless Compression](#stateless-compression) for gzip/deflate.
|
||||||
|
* Nov 12, 2019: Fixed zstd decompression of large single blocks. [#180](https://github.com/klauspost/compress/pull/180)
|
||||||
|
* Nov 11, 2019: Set default [s2c](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) block size to 4MB.
|
||||||
|
* Nov 11, 2019: Reduce inflate memory use by 1KB.
|
||||||
|
* Nov 10, 2019: Less allocations in deflate bit writer.
|
||||||
|
* Nov 10, 2019: Fix inconsistent error returned by zstd decoder.
|
||||||
|
* Oct 28, 2019 (v1.9.1) ztsd: Fix crash when compressing blocks. [#174](https://github.com/klauspost/compress/pull/174)
|
||||||
|
* Oct 24, 2019 (v1.9.0) zstd: Fix rare data corruption [#173](https://github.com/klauspost/compress/pull/173)
|
||||||
|
* Oct 24, 2019 zstd: Fix huff0 out of buffer write [#171](https://github.com/klauspost/compress/pull/171) and always return errors [#172](https://github.com/klauspost/compress/pull/172)
|
||||||
|
* Oct 10, 2019: Big deflate rewrite, 30-40% faster with better compression [#105](https://github.com/klauspost/compress/pull/105)
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes prior to v1.9.0</summary>
|
||||||
|
|
||||||
|
* Oct 10, 2019: (v1.8.6) zstd: Allow partial reads to get flushed data. [#169](https://github.com/klauspost/compress/pull/169)
|
||||||
|
* Oct 3, 2019: Fix inconsistent results on broken zstd streams.
|
||||||
|
* Sep 25, 2019: Added `-rm` (remove source files) and `-q` (no output except errors) to `s2c` and `s2d` [commands](https://github.com/klauspost/compress/tree/master/s2#commandline-tools)
|
||||||
|
* Sep 16, 2019: (v1.8.4) Add `s2c` and `s2d` [commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools).
|
||||||
|
* Sep 10, 2019: (v1.8.3) Fix s2 decoder [Skip](https://godoc.org/github.com/klauspost/compress/s2#Reader.Skip).
|
||||||
|
* Sep 7, 2019: zstd: Added [WithWindowSize](https://godoc.org/github.com/klauspost/compress/zstd#WithWindowSize), contributed by [ianwilkes](https://github.com/ianwilkes).
|
||||||
|
* Sep 5, 2019: (v1.8.2) Add [WithZeroFrames](https://godoc.org/github.com/klauspost/compress/zstd#WithZeroFrames) which adds full zero payload block encoding option.
|
||||||
|
* Sep 5, 2019: Lazy initialization of zstandard predefined en/decoder tables.
|
||||||
|
* Aug 26, 2019: (v1.8.1) S2: 1-2% compression increase in "better" compression mode.
|
||||||
|
* Aug 26, 2019: zstd: Check maximum size of Huffman 1X compressed literals while decoding.
|
||||||
|
* Aug 24, 2019: (v1.8.0) Added [S2 compression](https://github.com/klauspost/compress/tree/master/s2#s2-compression), a high performance replacement for Snappy.
|
||||||
|
* Aug 21, 2019: (v1.7.6) Fixed minor issues found by fuzzer. One could lead to zstd not decompressing.
|
||||||
|
* Aug 18, 2019: Add [fuzzit](https://fuzzit.dev/) continuous fuzzing.
|
||||||
|
* Aug 14, 2019: zstd: Skip incompressible data 2x faster. [#147](https://github.com/klauspost/compress/pull/147)
|
||||||
|
* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146)
|
||||||
|
* Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144)
|
||||||
|
* Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142)
|
||||||
|
* July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder.
|
||||||
|
* July 15, 2019 (v1.7.3): Minor speedup/compression increase in default zstd encoder.
|
||||||
|
* July 14, 2019: zstd decoder: Fix decompression error on multiple uses with mixed content.
|
||||||
|
* July 7, 2019 (v1.7.2): Snappy update, zstd decoder potential race fix.
|
||||||
|
* June 17, 2019: zstd decompression bugfix.
|
||||||
|
* June 17, 2019: fix 32 bit builds.
|
||||||
|
* June 17, 2019: Easier use in modules (less dependencies).
|
||||||
|
* June 9, 2019: New stronger "default" [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression mode. Matches zstd default compression ratio.
|
||||||
|
* June 5, 2019: 20-40% throughput in [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and better compression.
|
||||||
|
* June 5, 2019: deflate/gzip compression: Reduce memory usage of lower compression levels.
|
||||||
|
* June 2, 2019: Added [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression!
|
||||||
|
* May 25, 2019: deflate/gzip: 10% faster bit writer, mostly visible in lower levels.
|
||||||
|
* Apr 22, 2019: [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) decompression added.
|
||||||
|
* Aug 1, 2018: Added [huff0 README](https://github.com/klauspost/compress/tree/master/huff0#huff0-entropy-compression).
|
||||||
|
* Jul 8, 2018: Added [Performance Update 2018](#performance-update-2018) below.
|
||||||
|
* Jun 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0).
|
||||||
|
* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change.
|
||||||
|
* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change.
|
||||||
|
* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function.
|
||||||
|
* May 28, 2017: Reduce allocations when resetting decoder.
|
||||||
|
* Apr 02, 2017: Change back to official crc32, since changes were merged in Go 1.7.
|
||||||
|
* Jan 14, 2017: Reduce stack pressure due to array copies. See [Issue #18625](https://github.com/golang/go/issues/18625).
|
||||||
|
* Oct 25, 2016: Level 2-4 have been rewritten and now offers significantly better performance than before.
|
||||||
|
* Oct 20, 2016: Port zlib changes from Go 1.7 to fix zlib writer issue. Please update.
|
||||||
|
* Oct 16, 2016: Go 1.7 changes merged. Apples to apples this package is a few percent faster, but has a significantly better balance between speed and compression per level.
|
||||||
|
* Mar 24, 2016: Always attempt Huffman encoding on level 4-7. This improves base 64 encoded data compression.
|
||||||
|
* Mar 24, 2016: Small speedup for level 1-3.
|
||||||
|
* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster.
|
||||||
|
* Feb 19, 2016: Handle small payloads faster in level 1-3.
|
||||||
|
* Feb 19, 2016: Added faster level 2 + 3 compression modes.
|
||||||
|
* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5.
|
||||||
|
* Feb 14, 2016: Snappy: Merge upstream changes.
|
||||||
|
* Feb 14, 2016: Snappy: Fix aggressive skipping.
|
||||||
|
* Feb 14, 2016: Snappy: Update benchmark.
|
||||||
|
* Feb 13, 2016: Deflate: Fixed assembler problem that could lead to sub-optimal compression.
|
||||||
|
* Feb 12, 2016: Snappy: Added AMD64 SSE 4.2 optimizations to matching, which makes easy to compress material run faster. Typical speedup is around 25%.
|
||||||
|
* Feb 9, 2016: Added Snappy package fork. This version is 5-7% faster, much more on hard to compress content.
|
||||||
|
* Jan 30, 2016: Optimize level 1 to 3 by not considering static dictionary or storing uncompressed. ~4-5% speedup.
|
||||||
|
* Jan 16, 2016: Optimization on deflate level 1,2,3 compression.
|
||||||
|
* Jan 8 2016: Merge [CL 18317](https://go-review.googlesource.com/#/c/18317): fix reading, writing of zip64 archives.
|
||||||
|
* Dec 8 2015: Make level 1 and -2 deterministic even if write size differs.
|
||||||
|
* Dec 8 2015: Split encoding functions, so hashing and matching can potentially be inlined. 1-3% faster on AMD64. 5% faster on other platforms.
|
||||||
|
* Dec 8 2015: Fixed rare [one byte out-of bounds read](https://github.com/klauspost/compress/issues/20). Please update!
|
||||||
|
* Nov 23 2015: Optimization on token writer. ~2-4% faster. Contributed by [@dsnet](https://github.com/dsnet).
|
||||||
|
* Nov 20 2015: Small optimization to bit writer on 64 bit systems.
|
||||||
|
* Nov 17 2015: Fixed out-of-bound errors if the underlying Writer returned an error. See [#15](https://github.com/klauspost/compress/issues/15).
|
||||||
|
* Nov 12 2015: Added [io.WriterTo](https://golang.org/pkg/io/#WriterTo) support to gzip/inflate.
|
||||||
|
* Nov 11 2015: Merged [CL 16669](https://go-review.googlesource.com/#/c/16669/4): archive/zip: enable overriding (de)compressors per file
|
||||||
|
* Oct 15 2015: Added skipping on uncompressible data. Random data speed up >5x.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
# deflate usage
|
||||||
|
|
||||||
|
The packages are drop-in replacements for standard libraries. Simply replace the import path to use them:
|
||||||
|
|
||||||
|
Typical speed is about 2x of the standard library packages.
|
||||||
|
|
||||||
|
| old import | new import | Documentation |
|
||||||
|
|------------------|---------------------------------------|-------------------------------------------------------------------------|
|
||||||
|
| `compress/gzip` | `github.com/klauspost/compress/gzip` | [gzip](https://pkg.go.dev/github.com/klauspost/compress/gzip?tab=doc) |
|
||||||
|
| `compress/zlib` | `github.com/klauspost/compress/zlib` | [zlib](https://pkg.go.dev/github.com/klauspost/compress/zlib?tab=doc) |
|
||||||
|
| `archive/zip` | `github.com/klauspost/compress/zip` | [zip](https://pkg.go.dev/github.com/klauspost/compress/zip?tab=doc) |
|
||||||
|
| `compress/flate` | `github.com/klauspost/compress/flate` | [flate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc) |
|
||||||
|
|
||||||
|
* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
|
||||||
|
|
||||||
|
You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages.
|
||||||
|
|
||||||
|
The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/), [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/).
|
||||||
|
|
||||||
|
Currently there is only minor speedup on decompression (mostly CRC32 calculation).
|
||||||
|
|
||||||
|
Memory usage is typically 1MB for a Writer. stdlib is in the same range.
|
||||||
|
If you expect to have a lot of concurrently allocated Writers consider using
|
||||||
|
the stateless compress described below.
|
||||||
|
|
||||||
|
For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing).
|
||||||
|
|
||||||
|
To disable all assembly add `-tags=noasm`. This works across all packages.
|
||||||
|
|
||||||
|
# Stateless compression
|
||||||
|
|
||||||
|
This package offers stateless compression as a special option for gzip/deflate.
|
||||||
|
It will do compression but without maintaining any state between Write calls.
|
||||||
|
|
||||||
|
This means there will be no memory kept between Write calls, but compression and speed will be suboptimal.
|
||||||
|
|
||||||
|
This is only relevant in cases where you expect to run many thousands of compressors concurrently,
|
||||||
|
but with very little activity. This is *not* intended for regular web servers serving individual requests.
|
||||||
|
|
||||||
|
Because of this, the size of actual Write calls will affect output size.
|
||||||
|
|
||||||
|
In gzip, specify level `-3` / `gzip.StatelessCompression` to enable.
|
||||||
|
|
||||||
|
For direct deflate use, NewStatelessWriter and StatelessDeflate are available. See [documentation](https://godoc.org/github.com/klauspost/compress/flate#NewStatelessWriter)
|
||||||
|
|
||||||
|
A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// replace 'ioutil.Discard' with your output.
|
||||||
|
gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer gzw.Close()
|
||||||
|
|
||||||
|
w := bufio.NewWriterSize(gzw, 4096)
|
||||||
|
defer w.Flush()
|
||||||
|
|
||||||
|
// Write to 'w'
|
||||||
|
```
|
||||||
|
|
||||||
|
This will only use up to 4KB in memory when the writer is idle.
|
||||||
|
|
||||||
|
Compression is almost always worse than the fastest compression level
|
||||||
|
and each write will allocate (a little) memory.
|
||||||
|
|
||||||
|
|
||||||
|
# Other packages
|
||||||
|
|
||||||
|
Here are other packages of good quality and pure Go (no cgo wrappers or autoconverted code):
|
||||||
|
|
||||||
|
* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
|
||||||
|
* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
|
||||||
|
* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
|
||||||
|
* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
|
||||||
|
* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
|
||||||
|
* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
|
||||||
|
* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.
|
||||||
|
|
||||||
|
# license
|
||||||
|
|
||||||
|
This code is licensed under the same conditions as the original Go code. See LICENSE file.
|
||||||
25
vendor/github.com/klauspost/compress/SECURITY.md
generated
vendored
Normal file
25
vendor/github.com/klauspost/compress/SECURITY.md
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Security Policy
|
||||||
|
|
||||||
|
## Supported Versions
|
||||||
|
|
||||||
|
Security updates are applied only to the latest release.
|
||||||
|
|
||||||
|
## Vulnerability Definition
|
||||||
|
|
||||||
|
A security vulnerability is a bug that with certain input triggers a crash or an infinite loop. Most calls will have varying execution time and only in rare cases will slow operation be considered a security vulnerability.
|
||||||
|
|
||||||
|
Corrupted output generally is not considered a security vulnerability, unless independent operations are able to affect each other. Note that not all functionality is re-entrant and safe to use concurrently.
|
||||||
|
|
||||||
|
Out-of-memory crashes only applies if the en/decoder uses an abnormal amount of memory, with appropriate options applied, to limit maximum window size, concurrency, etc. However, if you are in doubt you are welcome to file a security issue.
|
||||||
|
|
||||||
|
It is assumed that all callers are trusted, meaning internal data exposed through reflection or inspection of returned data structures is not considered a vulnerability.
|
||||||
|
|
||||||
|
Vulnerabilities resulting from compiler/assembler errors should be reported upstream. Depending on the severity this package may or may not implement a workaround.
|
||||||
|
|
||||||
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
|
If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
|
||||||
|
|
||||||
|
Please disclose it at [security advisory](https://github.com/klauspost/compress/security/advisories/new). If possible please provide a minimal reproducer. If the issue only applies to a single platform, it would be helpful to provide access to that.
|
||||||
|
|
||||||
|
This project is maintained by a team of volunteers on a reasonable-effort basis. As such, vulnerabilities will be disclosed in a best effort base.
|
||||||
85
vendor/github.com/klauspost/compress/compressible.go
generated
vendored
Normal file
85
vendor/github.com/klauspost/compress/compressible.go
generated
vendored
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
package compress
|
||||||
|
|
||||||
|
import "math"
|
||||||
|
|
||||||
|
// Estimate returns a normalized compressibility estimate of block b.
|
||||||
|
// Values close to zero are likely uncompressible.
|
||||||
|
// Values above 0.1 are likely to be compressible.
|
||||||
|
// Values above 0.5 are very compressible.
|
||||||
|
// Very small lengths will return 0.
|
||||||
|
func Estimate(b []byte) float64 {
|
||||||
|
if len(b) < 16 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Correctly predicted order 1
|
||||||
|
hits := 0
|
||||||
|
lastMatch := false
|
||||||
|
var o1 [256]byte
|
||||||
|
var hist [256]int
|
||||||
|
c1 := byte(0)
|
||||||
|
for _, c := range b {
|
||||||
|
if c == o1[c1] {
|
||||||
|
// We only count a hit if there was two correct predictions in a row.
|
||||||
|
if lastMatch {
|
||||||
|
hits++
|
||||||
|
}
|
||||||
|
lastMatch = true
|
||||||
|
} else {
|
||||||
|
lastMatch = false
|
||||||
|
}
|
||||||
|
o1[c1] = c
|
||||||
|
c1 = c
|
||||||
|
hist[c]++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use x^0.6 to give better spread
|
||||||
|
prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)
|
||||||
|
|
||||||
|
// Calculate histogram distribution
|
||||||
|
variance := float64(0)
|
||||||
|
avg := float64(len(b)) / 256
|
||||||
|
|
||||||
|
for _, v := range hist {
|
||||||
|
Δ := float64(v) - avg
|
||||||
|
variance += Δ * Δ
|
||||||
|
}
|
||||||
|
|
||||||
|
stddev := math.Sqrt(float64(variance)) / float64(len(b))
|
||||||
|
exp := math.Sqrt(1 / float64(len(b)))
|
||||||
|
|
||||||
|
// Subtract expected stddev
|
||||||
|
stddev -= exp
|
||||||
|
if stddev < 0 {
|
||||||
|
stddev = 0
|
||||||
|
}
|
||||||
|
stddev *= 1 + exp
|
||||||
|
|
||||||
|
// Use x^0.4 to give better spread
|
||||||
|
entropy := math.Pow(stddev, 0.4)
|
||||||
|
|
||||||
|
// 50/50 weight between prediction and histogram distribution
|
||||||
|
return math.Pow((prediction+entropy)/2, 0.9)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ShannonEntropyBits returns the number of bits minimum required to represent
|
||||||
|
// an entropy encoding of the input bytes.
|
||||||
|
// https://en.wiktionary.org/wiki/Shannon_entropy
|
||||||
|
func ShannonEntropyBits(b []byte) int {
|
||||||
|
if len(b) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
var hist [256]int
|
||||||
|
for _, c := range b {
|
||||||
|
hist[c]++
|
||||||
|
}
|
||||||
|
shannon := float64(0)
|
||||||
|
invTotal := 1.0 / float64(len(b))
|
||||||
|
for _, v := range hist[:] {
|
||||||
|
if v > 0 {
|
||||||
|
n := float64(v)
|
||||||
|
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return int(math.Ceil(shannon))
|
||||||
|
}
|
||||||
79
vendor/github.com/klauspost/compress/fse/README.md
generated
vendored
Normal file
79
vendor/github.com/klauspost/compress/fse/README.md
generated
vendored
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
# Finite State Entropy
|
||||||
|
|
||||||
|
This package provides Finite State Entropy encoding and decoding.
|
||||||
|
|
||||||
|
Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS))
|
||||||
|
encoding provides a fast near-optimal symbol encoding/decoding
|
||||||
|
for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd).
|
||||||
|
|
||||||
|
This can be used for compressing input with a lot of similar input values to the smallest number of bytes.
|
||||||
|
This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders,
|
||||||
|
but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding.
|
||||||
|
|
||||||
|
* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse)
|
||||||
|
|
||||||
|
## News
|
||||||
|
|
||||||
|
* Feb 2018: First implementation released. Consider this beta software for now.
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
This package provides a low level interface that allows to compress single independent blocks.
|
||||||
|
|
||||||
|
Each block is separate, and there is no built in integrity checks.
|
||||||
|
This means that the caller should keep track of block sizes and also do checksums if needed.
|
||||||
|
|
||||||
|
Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function.
|
||||||
|
You must provide input and will receive the output and maybe an error.
|
||||||
|
|
||||||
|
These error values can be returned:
|
||||||
|
|
||||||
|
| Error | Description |
|
||||||
|
|---------------------|-----------------------------------------------------------------------------|
|
||||||
|
| `<nil>` | Everything ok, output is returned |
|
||||||
|
| `ErrIncompressible` | Returned when input is judged to be too hard to compress |
|
||||||
|
| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated |
|
||||||
|
| `(error)` | An internal error occurred. |
|
||||||
|
|
||||||
|
As can be seen above there are errors that will be returned even under normal operation so it is important to handle these.
|
||||||
|
|
||||||
|
To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object
|
||||||
|
that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same
|
||||||
|
object can be used for both.
|
||||||
|
|
||||||
|
Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this
|
||||||
|
you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output.
|
||||||
|
|
||||||
|
Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function.
|
||||||
|
You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
|
||||||
|
your input was likely corrupted.
|
||||||
|
|
||||||
|
It is important to note that a successful decoding does *not* mean your output matches your original input.
|
||||||
|
There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid.
|
||||||
|
|
||||||
|
For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples).
|
||||||
|
|
||||||
|
# Performance
|
||||||
|
|
||||||
|
A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors.
|
||||||
|
All compression functions are currently only running on the calling goroutine so only one core will be used per block.
|
||||||
|
|
||||||
|
The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input
|
||||||
|
is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be
|
||||||
|
beneficial to transpose all your input values down by 64.
|
||||||
|
|
||||||
|
With moderate block sizes around 64k speed are typically 200MB/s per core for compression and
|
||||||
|
around 300MB/s decompression speed.
|
||||||
|
|
||||||
|
The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s.
|
||||||
|
|
||||||
|
# Plans
|
||||||
|
|
||||||
|
At one point, more internals will be exposed to facilitate more "expert" usage of the components.
|
||||||
|
|
||||||
|
A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261).
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
|
||||||
|
changes will likely not be accepted. If in doubt open an issue before writing the PR.
|
||||||
122
vendor/github.com/klauspost/compress/fse/bitreader.go
generated
vendored
Normal file
122
vendor/github.com/klauspost/compress/fse/bitreader.go
generated
vendored
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
// Copyright 2018 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||||
|
|
||||||
|
package fse
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
// bitReader reads a bitstream in reverse.
|
||||||
|
// The last set bit indicates the start of the stream and is used
|
||||||
|
// for aligning the input.
|
||||||
|
type bitReader struct {
|
||||||
|
in []byte
|
||||||
|
off uint // next byte to read is at in[off - 1]
|
||||||
|
value uint64
|
||||||
|
bitsRead uint8
|
||||||
|
}
|
||||||
|
|
||||||
|
// init initializes and resets the bit reader.
|
||||||
|
func (b *bitReader) init(in []byte) error {
|
||||||
|
if len(in) < 1 {
|
||||||
|
return errors.New("corrupt stream: too short")
|
||||||
|
}
|
||||||
|
b.in = in
|
||||||
|
b.off = uint(len(in))
|
||||||
|
// The highest bit of the last byte indicates where to start
|
||||||
|
v := in[len(in)-1]
|
||||||
|
if v == 0 {
|
||||||
|
return errors.New("corrupt stream, did not find end of stream")
|
||||||
|
}
|
||||||
|
b.bitsRead = 64
|
||||||
|
b.value = 0
|
||||||
|
if len(in) >= 8 {
|
||||||
|
b.fillFastStart()
|
||||||
|
} else {
|
||||||
|
b.fill()
|
||||||
|
b.fill()
|
||||||
|
}
|
||||||
|
b.bitsRead += 8 - uint8(highBits(uint32(v)))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getBits will return n bits. n can be 0.
|
||||||
|
func (b *bitReader) getBits(n uint8) uint16 {
|
||||||
|
if n == 0 || b.bitsRead >= 64 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return b.getBitsFast(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getBitsFast requires that at least one bit is requested every time.
|
||||||
|
// There are no checks if the buffer is filled.
|
||||||
|
func (b *bitReader) getBitsFast(n uint8) uint16 {
|
||||||
|
const regMask = 64 - 1
|
||||||
|
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
|
||||||
|
b.bitsRead += n
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// fillFast() will make sure at least 32 bits are available.
|
||||||
|
// There must be at least 4 bytes available.
|
||||||
|
func (b *bitReader) fillFast() {
|
||||||
|
if b.bitsRead < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// 2 bounds checks.
|
||||||
|
v := b.in[b.off-4:]
|
||||||
|
v = v[:4]
|
||||||
|
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
b.value = (b.value << 32) | uint64(low)
|
||||||
|
b.bitsRead -= 32
|
||||||
|
b.off -= 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill() will make sure at least 32 bits are available.
|
||||||
|
func (b *bitReader) fill() {
|
||||||
|
if b.bitsRead < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if b.off > 4 {
|
||||||
|
v := b.in[b.off-4:]
|
||||||
|
v = v[:4]
|
||||||
|
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
b.value = (b.value << 32) | uint64(low)
|
||||||
|
b.bitsRead -= 32
|
||||||
|
b.off -= 4
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for b.off > 0 {
|
||||||
|
b.value = (b.value << 8) | uint64(b.in[b.off-1])
|
||||||
|
b.bitsRead -= 8
|
||||||
|
b.off--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
|
||||||
|
func (b *bitReader) fillFastStart() {
|
||||||
|
// Do single re-slice to avoid bounds checks.
|
||||||
|
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
|
||||||
|
b.bitsRead = 0
|
||||||
|
b.off -= 8
|
||||||
|
}
|
||||||
|
|
||||||
|
// finished returns true if all bits have been read from the bit stream.
|
||||||
|
func (b *bitReader) finished() bool {
|
||||||
|
return b.bitsRead >= 64 && b.off == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// close the bitstream and returns an error if out-of-buffer reads occurred.
|
||||||
|
func (b *bitReader) close() error {
|
||||||
|
// Release reference.
|
||||||
|
b.in = nil
|
||||||
|
if b.bitsRead > 64 {
|
||||||
|
return io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
167
vendor/github.com/klauspost/compress/fse/bitwriter.go
generated
vendored
Normal file
167
vendor/github.com/klauspost/compress/fse/bitwriter.go
generated
vendored
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
// Copyright 2018 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||||
|
|
||||||
|
package fse
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// bitWriter will write bits.
|
||||||
|
// First bit will be LSB of the first byte of output.
|
||||||
|
type bitWriter struct {
|
||||||
|
bitContainer uint64
|
||||||
|
nBits uint8
|
||||||
|
out []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
// bitMask16 is bitmasks. Has extra to avoid bounds check.
|
||||||
|
var bitMask16 = [32]uint16{
|
||||||
|
0, 1, 3, 7, 0xF, 0x1F,
|
||||||
|
0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
|
||||||
|
0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
|
||||||
|
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
|
||||||
|
0xFFFF, 0xFFFF} /* up to 16 bits */
|
||||||
|
|
||||||
|
// addBits16NC will add up to 16 bits.
|
||||||
|
// It will not check if there is space for them,
|
||||||
|
// so the caller must ensure that it has flushed recently.
|
||||||
|
func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
|
||||||
|
b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
|
||||||
|
b.nBits += bits
|
||||||
|
}
|
||||||
|
|
||||||
|
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
|
||||||
|
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
|
||||||
|
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
|
||||||
|
b.bitContainer |= uint64(value) << (b.nBits & 63)
|
||||||
|
b.nBits += bits
|
||||||
|
}
|
||||||
|
|
||||||
|
// addBits16ZeroNC will add up to 16 bits.
|
||||||
|
// It will not check if there is space for them,
|
||||||
|
// so the caller must ensure that it has flushed recently.
|
||||||
|
// This is fastest if bits can be zero.
|
||||||
|
func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
|
||||||
|
if bits == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
value <<= (16 - bits) & 15
|
||||||
|
value >>= (16 - bits) & 15
|
||||||
|
b.bitContainer |= uint64(value) << (b.nBits & 63)
|
||||||
|
b.nBits += bits
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush will flush all pending full bytes.
|
||||||
|
// There will be at least 56 bits available for writing when this has been called.
|
||||||
|
// Using flush32 is faster, but leaves less space for writing.
|
||||||
|
func (b *bitWriter) flush() {
|
||||||
|
v := b.nBits >> 3
|
||||||
|
switch v {
|
||||||
|
case 0:
|
||||||
|
case 1:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
)
|
||||||
|
case 2:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
)
|
||||||
|
case 3:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
)
|
||||||
|
case 4:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
byte(b.bitContainer>>24),
|
||||||
|
)
|
||||||
|
case 5:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
byte(b.bitContainer>>24),
|
||||||
|
byte(b.bitContainer>>32),
|
||||||
|
)
|
||||||
|
case 6:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
byte(b.bitContainer>>24),
|
||||||
|
byte(b.bitContainer>>32),
|
||||||
|
byte(b.bitContainer>>40),
|
||||||
|
)
|
||||||
|
case 7:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
byte(b.bitContainer>>24),
|
||||||
|
byte(b.bitContainer>>32),
|
||||||
|
byte(b.bitContainer>>40),
|
||||||
|
byte(b.bitContainer>>48),
|
||||||
|
)
|
||||||
|
case 8:
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
byte(b.bitContainer>>24),
|
||||||
|
byte(b.bitContainer>>32),
|
||||||
|
byte(b.bitContainer>>40),
|
||||||
|
byte(b.bitContainer>>48),
|
||||||
|
byte(b.bitContainer>>56),
|
||||||
|
)
|
||||||
|
default:
|
||||||
|
panic(fmt.Errorf("bits (%d) > 64", b.nBits))
|
||||||
|
}
|
||||||
|
b.bitContainer >>= v << 3
|
||||||
|
b.nBits &= 7
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush32 will flush out, so there are at least 32 bits available for writing.
|
||||||
|
func (b *bitWriter) flush32() {
|
||||||
|
if b.nBits < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
byte(b.bitContainer>>24))
|
||||||
|
b.nBits -= 32
|
||||||
|
b.bitContainer >>= 32
|
||||||
|
}
|
||||||
|
|
||||||
|
// flushAlign will flush remaining full bytes and align to next byte boundary.
|
||||||
|
func (b *bitWriter) flushAlign() {
|
||||||
|
nbBytes := (b.nBits + 7) >> 3
|
||||||
|
for i := range nbBytes {
|
||||||
|
b.out = append(b.out, byte(b.bitContainer>>(i*8)))
|
||||||
|
}
|
||||||
|
b.nBits = 0
|
||||||
|
b.bitContainer = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// close will write the alignment bit and write the final byte(s)
|
||||||
|
// to the output.
|
||||||
|
func (b *bitWriter) close() {
|
||||||
|
// End mark
|
||||||
|
b.addBits16Clean(1, 1)
|
||||||
|
// flush until next byte.
|
||||||
|
b.flushAlign()
|
||||||
|
}
|
||||||
|
|
||||||
|
// reset and continue writing by appending to out.
|
||||||
|
func (b *bitWriter) reset(out []byte) {
|
||||||
|
b.bitContainer = 0
|
||||||
|
b.nBits = 0
|
||||||
|
b.out = out
|
||||||
|
}
|
||||||
47
vendor/github.com/klauspost/compress/fse/bytereader.go
generated
vendored
Normal file
47
vendor/github.com/klauspost/compress/fse/bytereader.go
generated
vendored
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
// Copyright 2018 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||||
|
|
||||||
|
package fse
|
||||||
|
|
||||||
|
// byteReader provides a byte reader that reads
|
||||||
|
// little endian values from a byte stream.
|
||||||
|
// The input stream is manually advanced.
|
||||||
|
// The reader performs no bounds checks.
|
||||||
|
type byteReader struct {
|
||||||
|
b []byte
|
||||||
|
off int
|
||||||
|
}
|
||||||
|
|
||||||
|
// init will initialize the reader and set the input.
|
||||||
|
func (b *byteReader) init(in []byte) {
|
||||||
|
b.b = in
|
||||||
|
b.off = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// advance the stream b n bytes.
|
||||||
|
func (b *byteReader) advance(n uint) {
|
||||||
|
b.off += int(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uint32 returns a little endian uint32 starting at current offset.
|
||||||
|
func (b byteReader) Uint32() uint32 {
|
||||||
|
b2 := b.b[b.off:]
|
||||||
|
b2 = b2[:4]
|
||||||
|
v3 := uint32(b2[3])
|
||||||
|
v2 := uint32(b2[2])
|
||||||
|
v1 := uint32(b2[1])
|
||||||
|
v0 := uint32(b2[0])
|
||||||
|
return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
|
||||||
|
}
|
||||||
|
|
||||||
|
// unread returns the unread portion of the input.
|
||||||
|
func (b byteReader) unread() []byte {
|
||||||
|
return b.b[b.off:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// remain will return the number of bytes remaining.
|
||||||
|
func (b byteReader) remain() int {
|
||||||
|
return len(b.b) - b.off
|
||||||
|
}
|
||||||
683
vendor/github.com/klauspost/compress/fse/compress.go
generated
vendored
Normal file
683
vendor/github.com/klauspost/compress/fse/compress.go
generated
vendored
Normal file
@@ -0,0 +1,683 @@
|
|||||||
|
// Copyright 2018 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||||
|
|
||||||
|
package fse
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Compress the input bytes. Input must be < 2GB.
|
||||||
|
// Provide a Scratch buffer to avoid memory allocations.
|
||||||
|
// Note that the output is also kept in the scratch buffer.
|
||||||
|
// If input is too hard to compress, ErrIncompressible is returned.
|
||||||
|
// If input is a single byte value repeated ErrUseRLE is returned.
|
||||||
|
func Compress(in []byte, s *Scratch) ([]byte, error) {
|
||||||
|
if len(in) <= 1 {
|
||||||
|
return nil, ErrIncompressible
|
||||||
|
}
|
||||||
|
if len(in) > (2<<30)-1 {
|
||||||
|
return nil, errors.New("input too big, must be < 2GB")
|
||||||
|
}
|
||||||
|
s, err := s.prepare(in)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create histogram, if none was provided.
|
||||||
|
maxCount := s.maxCount
|
||||||
|
if maxCount == 0 {
|
||||||
|
maxCount = s.countSimple(in)
|
||||||
|
}
|
||||||
|
// Reset for next run.
|
||||||
|
s.clearCount = true
|
||||||
|
s.maxCount = 0
|
||||||
|
if maxCount == len(in) {
|
||||||
|
// One symbol, use RLE
|
||||||
|
return nil, ErrUseRLE
|
||||||
|
}
|
||||||
|
if maxCount == 1 || maxCount < (len(in)>>7) {
|
||||||
|
// Each symbol present maximum once or too well distributed.
|
||||||
|
return nil, ErrIncompressible
|
||||||
|
}
|
||||||
|
s.optimalTableLog()
|
||||||
|
err = s.normalizeCount()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = s.writeCount()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if false {
|
||||||
|
err = s.validateNorm()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = s.buildCTable()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = s.compress(in)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
s.Out = s.bw.out
|
||||||
|
// Check if we compressed.
|
||||||
|
if len(s.Out) >= len(in) {
|
||||||
|
return nil, ErrIncompressible
|
||||||
|
}
|
||||||
|
return s.Out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// cState contains the compression state of a stream.
|
||||||
|
type cState struct {
|
||||||
|
bw *bitWriter
|
||||||
|
stateTable []uint16
|
||||||
|
state uint16
|
||||||
|
}
|
||||||
|
|
||||||
|
// init will initialize the compression state to the first symbol of the stream.
|
||||||
|
func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) {
|
||||||
|
c.bw = bw
|
||||||
|
c.stateTable = ct.stateTable
|
||||||
|
|
||||||
|
nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
|
||||||
|
im := int32((nbBitsOut << 16) - first.deltaNbBits)
|
||||||
|
lu := (im >> nbBitsOut) + first.deltaFindState
|
||||||
|
c.state = c.stateTable[lu]
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode the output symbol provided and write it to the bitstream.
|
||||||
|
func (c *cState) encode(symbolTT symbolTransform) {
|
||||||
|
nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
|
||||||
|
dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
|
||||||
|
c.bw.addBits16NC(c.state, uint8(nbBitsOut))
|
||||||
|
c.state = c.stateTable[dstState]
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode the output symbol provided and write it to the bitstream.
|
||||||
|
func (c *cState) encodeZero(symbolTT symbolTransform) {
|
||||||
|
nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
|
||||||
|
dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
|
||||||
|
c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut))
|
||||||
|
c.state = c.stateTable[dstState]
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush will write the tablelog to the output and flush the remaining full bytes.
|
||||||
|
func (c *cState) flush(tableLog uint8) {
|
||||||
|
c.bw.flush32()
|
||||||
|
c.bw.addBits16NC(c.state, tableLog)
|
||||||
|
c.bw.flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// compress is the main compression loop that will encode the input from the last byte to the first.
|
||||||
|
func (s *Scratch) compress(src []byte) error {
|
||||||
|
if len(src) <= 2 {
|
||||||
|
return errors.New("compress: src too small")
|
||||||
|
}
|
||||||
|
tt := s.ct.symbolTT[:256]
|
||||||
|
s.bw.reset(s.Out)
|
||||||
|
|
||||||
|
// Our two states each encodes every second byte.
|
||||||
|
// Last byte encoded (first byte decoded) will always be encoded by c1.
|
||||||
|
var c1, c2 cState
|
||||||
|
|
||||||
|
// Encode so remaining size is divisible by 4.
|
||||||
|
ip := len(src)
|
||||||
|
if ip&1 == 1 {
|
||||||
|
c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
|
||||||
|
c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
|
||||||
|
c1.encodeZero(tt[src[ip-3]])
|
||||||
|
ip -= 3
|
||||||
|
} else {
|
||||||
|
c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
|
||||||
|
c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
|
||||||
|
ip -= 2
|
||||||
|
}
|
||||||
|
if ip&2 != 0 {
|
||||||
|
c2.encodeZero(tt[src[ip-1]])
|
||||||
|
c1.encodeZero(tt[src[ip-2]])
|
||||||
|
ip -= 2
|
||||||
|
}
|
||||||
|
src = src[:ip]
|
||||||
|
|
||||||
|
// Main compression loop.
|
||||||
|
switch {
|
||||||
|
case !s.zeroBits && s.actualTableLog <= 8:
|
||||||
|
// We can encode 4 symbols without requiring a flush.
|
||||||
|
// We do not need to check if any output is 0 bits.
|
||||||
|
for ; len(src) >= 4; src = src[:len(src)-4] {
|
||||||
|
s.bw.flush32()
|
||||||
|
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
|
||||||
|
c2.encode(tt[v0])
|
||||||
|
c1.encode(tt[v1])
|
||||||
|
c2.encode(tt[v2])
|
||||||
|
c1.encode(tt[v3])
|
||||||
|
}
|
||||||
|
case !s.zeroBits:
|
||||||
|
// We do not need to check if any output is 0 bits.
|
||||||
|
for ; len(src) >= 4; src = src[:len(src)-4] {
|
||||||
|
s.bw.flush32()
|
||||||
|
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
|
||||||
|
c2.encode(tt[v0])
|
||||||
|
c1.encode(tt[v1])
|
||||||
|
s.bw.flush32()
|
||||||
|
c2.encode(tt[v2])
|
||||||
|
c1.encode(tt[v3])
|
||||||
|
}
|
||||||
|
case s.actualTableLog <= 8:
|
||||||
|
// We can encode 4 symbols without requiring a flush
|
||||||
|
for ; len(src) >= 4; src = src[:len(src)-4] {
|
||||||
|
s.bw.flush32()
|
||||||
|
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
|
||||||
|
c2.encodeZero(tt[v0])
|
||||||
|
c1.encodeZero(tt[v1])
|
||||||
|
c2.encodeZero(tt[v2])
|
||||||
|
c1.encodeZero(tt[v3])
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
for ; len(src) >= 4; src = src[:len(src)-4] {
|
||||||
|
s.bw.flush32()
|
||||||
|
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
|
||||||
|
c2.encodeZero(tt[v0])
|
||||||
|
c1.encodeZero(tt[v1])
|
||||||
|
s.bw.flush32()
|
||||||
|
c2.encodeZero(tt[v2])
|
||||||
|
c1.encodeZero(tt[v3])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush final state.
|
||||||
|
// Used to initialize state when decoding.
|
||||||
|
c2.flush(s.actualTableLog)
|
||||||
|
c1.flush(s.actualTableLog)
|
||||||
|
|
||||||
|
s.bw.close()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeCount will write the normalized histogram count to header.
|
||||||
|
// This is read back by readNCount.
|
||||||
|
func (s *Scratch) writeCount() error {
|
||||||
|
var (
|
||||||
|
tableLog = s.actualTableLog
|
||||||
|
tableSize = 1 << tableLog
|
||||||
|
previous0 bool
|
||||||
|
charnum uint16
|
||||||
|
|
||||||
|
maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3
|
||||||
|
|
||||||
|
// Write Table Size
|
||||||
|
bitStream = uint32(tableLog - minTablelog)
|
||||||
|
bitCount = uint(4)
|
||||||
|
remaining = int16(tableSize + 1) /* +1 for extra accuracy */
|
||||||
|
threshold = int16(tableSize)
|
||||||
|
nbBits = uint(tableLog + 1)
|
||||||
|
)
|
||||||
|
if cap(s.Out) < maxHeaderSize {
|
||||||
|
s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize)
|
||||||
|
}
|
||||||
|
outP := uint(0)
|
||||||
|
out := s.Out[:maxHeaderSize]
|
||||||
|
|
||||||
|
// stops at 1
|
||||||
|
for remaining > 1 {
|
||||||
|
if previous0 {
|
||||||
|
start := charnum
|
||||||
|
for s.norm[charnum] == 0 {
|
||||||
|
charnum++
|
||||||
|
}
|
||||||
|
for charnum >= start+24 {
|
||||||
|
start += 24
|
||||||
|
bitStream += uint32(0xFFFF) << bitCount
|
||||||
|
out[outP] = byte(bitStream)
|
||||||
|
out[outP+1] = byte(bitStream >> 8)
|
||||||
|
outP += 2
|
||||||
|
bitStream >>= 16
|
||||||
|
}
|
||||||
|
for charnum >= start+3 {
|
||||||
|
start += 3
|
||||||
|
bitStream += 3 << bitCount
|
||||||
|
bitCount += 2
|
||||||
|
}
|
||||||
|
bitStream += uint32(charnum-start) << bitCount
|
||||||
|
bitCount += 2
|
||||||
|
if bitCount > 16 {
|
||||||
|
out[outP] = byte(bitStream)
|
||||||
|
out[outP+1] = byte(bitStream >> 8)
|
||||||
|
outP += 2
|
||||||
|
bitStream >>= 16
|
||||||
|
bitCount -= 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
count := s.norm[charnum]
|
||||||
|
charnum++
|
||||||
|
max := (2*threshold - 1) - remaining
|
||||||
|
if count < 0 {
|
||||||
|
remaining += count
|
||||||
|
} else {
|
||||||
|
remaining -= count
|
||||||
|
}
|
||||||
|
count++ // +1 for extra accuracy
|
||||||
|
if count >= threshold {
|
||||||
|
count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
|
||||||
|
}
|
||||||
|
bitStream += uint32(count) << bitCount
|
||||||
|
bitCount += nbBits
|
||||||
|
if count < max {
|
||||||
|
bitCount--
|
||||||
|
}
|
||||||
|
|
||||||
|
previous0 = count == 1
|
||||||
|
if remaining < 1 {
|
||||||
|
return errors.New("internal error: remaining<1")
|
||||||
|
}
|
||||||
|
for remaining < threshold {
|
||||||
|
nbBits--
|
||||||
|
threshold >>= 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if bitCount > 16 {
|
||||||
|
out[outP] = byte(bitStream)
|
||||||
|
out[outP+1] = byte(bitStream >> 8)
|
||||||
|
outP += 2
|
||||||
|
bitStream >>= 16
|
||||||
|
bitCount -= 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out[outP] = byte(bitStream)
|
||||||
|
out[outP+1] = byte(bitStream >> 8)
|
||||||
|
outP += (bitCount + 7) / 8
|
||||||
|
|
||||||
|
if charnum > s.symbolLen {
|
||||||
|
return errors.New("internal error: charnum > s.symbolLen")
|
||||||
|
}
|
||||||
|
s.Out = out[:outP]
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// symbolTransform contains the state transform for a symbol.
|
||||||
|
type symbolTransform struct {
|
||||||
|
deltaFindState int32
|
||||||
|
deltaNbBits uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// String prints values as a human readable string.
|
||||||
|
func (s symbolTransform) String() string {
|
||||||
|
return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState)
|
||||||
|
}
|
||||||
|
|
||||||
|
// cTable contains tables used for compression.
|
||||||
|
type cTable struct {
|
||||||
|
tableSymbol []byte
|
||||||
|
stateTable []uint16
|
||||||
|
symbolTT []symbolTransform
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocCtable will allocate tables needed for compression.
|
||||||
|
// If existing tables a re big enough, they are simply re-used.
|
||||||
|
func (s *Scratch) allocCtable() {
|
||||||
|
tableSize := 1 << s.actualTableLog
|
||||||
|
// get tableSymbol that is big enough.
|
||||||
|
if cap(s.ct.tableSymbol) < tableSize {
|
||||||
|
s.ct.tableSymbol = make([]byte, tableSize)
|
||||||
|
}
|
||||||
|
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
|
||||||
|
|
||||||
|
ctSize := tableSize
|
||||||
|
if cap(s.ct.stateTable) < ctSize {
|
||||||
|
s.ct.stateTable = make([]uint16, ctSize)
|
||||||
|
}
|
||||||
|
s.ct.stateTable = s.ct.stateTable[:ctSize]
|
||||||
|
|
||||||
|
if cap(s.ct.symbolTT) < 256 {
|
||||||
|
s.ct.symbolTT = make([]symbolTransform, 256)
|
||||||
|
}
|
||||||
|
s.ct.symbolTT = s.ct.symbolTT[:256]
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildCTable will populate the compression table so it is ready to be used.
|
||||||
|
func (s *Scratch) buildCTable() error {
|
||||||
|
tableSize := uint32(1 << s.actualTableLog)
|
||||||
|
highThreshold := tableSize - 1
|
||||||
|
var cumul [maxSymbolValue + 2]int16
|
||||||
|
|
||||||
|
s.allocCtable()
|
||||||
|
tableSymbol := s.ct.tableSymbol[:tableSize]
|
||||||
|
// symbol start positions
|
||||||
|
{
|
||||||
|
cumul[0] = 0
|
||||||
|
for ui, v := range s.norm[:s.symbolLen-1] {
|
||||||
|
u := byte(ui) // one less than reference
|
||||||
|
if v == -1 {
|
||||||
|
// Low proba symbol
|
||||||
|
cumul[u+1] = cumul[u] + 1
|
||||||
|
tableSymbol[highThreshold] = u
|
||||||
|
highThreshold--
|
||||||
|
} else {
|
||||||
|
cumul[u+1] = cumul[u] + v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Encode last symbol separately to avoid overflowing u
|
||||||
|
u := int(s.symbolLen - 1)
|
||||||
|
v := s.norm[s.symbolLen-1]
|
||||||
|
if v == -1 {
|
||||||
|
// Low proba symbol
|
||||||
|
cumul[u+1] = cumul[u] + 1
|
||||||
|
tableSymbol[highThreshold] = byte(u)
|
||||||
|
highThreshold--
|
||||||
|
} else {
|
||||||
|
cumul[u+1] = cumul[u] + v
|
||||||
|
}
|
||||||
|
if uint32(cumul[s.symbolLen]) != tableSize {
|
||||||
|
return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
|
||||||
|
}
|
||||||
|
cumul[s.symbolLen] = int16(tableSize) + 1
|
||||||
|
}
|
||||||
|
// Spread symbols
|
||||||
|
s.zeroBits = false
|
||||||
|
{
|
||||||
|
step := tableStep(tableSize)
|
||||||
|
tableMask := tableSize - 1
|
||||||
|
var position uint32
|
||||||
|
// if any symbol > largeLimit, we may have 0 bits output.
|
||||||
|
largeLimit := int16(1 << (s.actualTableLog - 1))
|
||||||
|
for ui, v := range s.norm[:s.symbolLen] {
|
||||||
|
symbol := byte(ui)
|
||||||
|
if v > largeLimit {
|
||||||
|
s.zeroBits = true
|
||||||
|
}
|
||||||
|
for range v {
|
||||||
|
tableSymbol[position] = symbol
|
||||||
|
position = (position + step) & tableMask
|
||||||
|
for position > highThreshold {
|
||||||
|
position = (position + step) & tableMask
|
||||||
|
} /* Low proba area */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have gone through all positions
|
||||||
|
if position != 0 {
|
||||||
|
return errors.New("position!=0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build table
|
||||||
|
table := s.ct.stateTable
|
||||||
|
{
|
||||||
|
tsi := int(tableSize)
|
||||||
|
for u, v := range tableSymbol {
|
||||||
|
// TableU16 : sorted by symbol order; gives next state value
|
||||||
|
table[cumul[v]] = uint16(tsi + u)
|
||||||
|
cumul[v]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build Symbol Transformation Table
|
||||||
|
{
|
||||||
|
total := int16(0)
|
||||||
|
symbolTT := s.ct.symbolTT[:s.symbolLen]
|
||||||
|
tableLog := s.actualTableLog
|
||||||
|
tl := (uint32(tableLog) << 16) - (1 << tableLog)
|
||||||
|
for i, v := range s.norm[:s.symbolLen] {
|
||||||
|
switch v {
|
||||||
|
case 0:
|
||||||
|
case -1, 1:
|
||||||
|
symbolTT[i].deltaNbBits = tl
|
||||||
|
symbolTT[i].deltaFindState = int32(total - 1)
|
||||||
|
total++
|
||||||
|
default:
|
||||||
|
maxBitsOut := uint32(tableLog) - highBits(uint32(v-1))
|
||||||
|
minStatePlus := uint32(v) << maxBitsOut
|
||||||
|
symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
|
||||||
|
symbolTT[i].deltaFindState = int32(total - v)
|
||||||
|
total += v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if total != int16(tableSize) {
|
||||||
|
return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// countSimple will create a simple histogram in s.count.
|
||||||
|
// Returns the biggest count.
|
||||||
|
// Does not update s.clearCount.
|
||||||
|
func (s *Scratch) countSimple(in []byte) (max int) {
|
||||||
|
for _, v := range in {
|
||||||
|
s.count[v]++
|
||||||
|
}
|
||||||
|
m, symlen := uint32(0), s.symbolLen
|
||||||
|
for i, v := range s.count[:] {
|
||||||
|
if v == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if v > m {
|
||||||
|
m = v
|
||||||
|
}
|
||||||
|
symlen = uint16(i) + 1
|
||||||
|
}
|
||||||
|
s.symbolLen = symlen
|
||||||
|
return int(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// minTableLog provides the minimum logSize to safely represent a distribution.
|
||||||
|
func (s *Scratch) minTableLog() uint8 {
|
||||||
|
minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1
|
||||||
|
minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2
|
||||||
|
if minBitsSrc < minBitsSymbols {
|
||||||
|
return uint8(minBitsSrc)
|
||||||
|
}
|
||||||
|
return uint8(minBitsSymbols)
|
||||||
|
}
|
||||||
|
|
||||||
|
// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
|
||||||
|
func (s *Scratch) optimalTableLog() {
|
||||||
|
tableLog := s.TableLog
|
||||||
|
minBits := s.minTableLog()
|
||||||
|
maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2
|
||||||
|
if maxBitsSrc < tableLog {
|
||||||
|
// Accuracy can be reduced
|
||||||
|
tableLog = maxBitsSrc
|
||||||
|
}
|
||||||
|
if minBits > tableLog {
|
||||||
|
tableLog = minBits
|
||||||
|
}
|
||||||
|
// Need a minimum to safely represent all symbol values
|
||||||
|
if tableLog < minTablelog {
|
||||||
|
tableLog = minTablelog
|
||||||
|
}
|
||||||
|
if tableLog > maxTableLog {
|
||||||
|
tableLog = maxTableLog
|
||||||
|
}
|
||||||
|
s.actualTableLog = tableLog
|
||||||
|
}
|
||||||
|
|
||||||
|
var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
|
||||||
|
|
||||||
|
// normalizeCount will normalize the count of the symbols so
|
||||||
|
// the total is equal to the table size.
|
||||||
|
func (s *Scratch) normalizeCount() error {
|
||||||
|
var (
|
||||||
|
tableLog = s.actualTableLog
|
||||||
|
scale = 62 - uint64(tableLog)
|
||||||
|
step = (1 << 62) / uint64(s.br.remain())
|
||||||
|
vStep = uint64(1) << (scale - 20)
|
||||||
|
stillToDistribute = int16(1 << tableLog)
|
||||||
|
largest int
|
||||||
|
largestP int16
|
||||||
|
lowThreshold = (uint32)(s.br.remain() >> tableLog)
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, cnt := range s.count[:s.symbolLen] {
|
||||||
|
// already handled
|
||||||
|
// if (count[s] == s.length) return 0; /* rle special case */
|
||||||
|
|
||||||
|
if cnt == 0 {
|
||||||
|
s.norm[i] = 0
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if cnt <= lowThreshold {
|
||||||
|
s.norm[i] = -1
|
||||||
|
stillToDistribute--
|
||||||
|
} else {
|
||||||
|
proba := (int16)((uint64(cnt) * step) >> scale)
|
||||||
|
if proba < 8 {
|
||||||
|
restToBeat := vStep * uint64(rtbTable[proba])
|
||||||
|
v := uint64(cnt)*step - (uint64(proba) << scale)
|
||||||
|
if v > restToBeat {
|
||||||
|
proba++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if proba > largestP {
|
||||||
|
largestP = proba
|
||||||
|
largest = i
|
||||||
|
}
|
||||||
|
s.norm[i] = proba
|
||||||
|
stillToDistribute -= proba
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if -stillToDistribute >= (s.norm[largest] >> 1) {
|
||||||
|
// corner case, need another normalization method
|
||||||
|
return s.normalizeCount2()
|
||||||
|
}
|
||||||
|
s.norm[largest] += stillToDistribute
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Secondary normalization method.
|
||||||
|
// To be used when primary method fails.
|
||||||
|
func (s *Scratch) normalizeCount2() error {
|
||||||
|
const notYetAssigned = -2
|
||||||
|
var (
|
||||||
|
distributed uint32
|
||||||
|
total = uint32(s.br.remain())
|
||||||
|
tableLog = s.actualTableLog
|
||||||
|
lowThreshold = total >> tableLog
|
||||||
|
lowOne = (total * 3) >> (tableLog + 1)
|
||||||
|
)
|
||||||
|
for i, cnt := range s.count[:s.symbolLen] {
|
||||||
|
if cnt == 0 {
|
||||||
|
s.norm[i] = 0
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if cnt <= lowThreshold {
|
||||||
|
s.norm[i] = -1
|
||||||
|
distributed++
|
||||||
|
total -= cnt
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if cnt <= lowOne {
|
||||||
|
s.norm[i] = 1
|
||||||
|
distributed++
|
||||||
|
total -= cnt
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s.norm[i] = notYetAssigned
|
||||||
|
}
|
||||||
|
toDistribute := (1 << tableLog) - distributed
|
||||||
|
|
||||||
|
if (total / toDistribute) > lowOne {
|
||||||
|
// risk of rounding to zero
|
||||||
|
lowOne = (total * 3) / (toDistribute * 2)
|
||||||
|
for i, cnt := range s.count[:s.symbolLen] {
|
||||||
|
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
|
||||||
|
s.norm[i] = 1
|
||||||
|
distributed++
|
||||||
|
total -= cnt
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
toDistribute = (1 << tableLog) - distributed
|
||||||
|
}
|
||||||
|
if distributed == uint32(s.symbolLen)+1 {
|
||||||
|
// all values are pretty poor;
|
||||||
|
// probably incompressible data (should have already been detected);
|
||||||
|
// find max, then give all remaining points to max
|
||||||
|
var maxV int
|
||||||
|
var maxC uint32
|
||||||
|
for i, cnt := range s.count[:s.symbolLen] {
|
||||||
|
if cnt > maxC {
|
||||||
|
maxV = i
|
||||||
|
maxC = cnt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.norm[maxV] += int16(toDistribute)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if total == 0 {
|
||||||
|
// all of the symbols were low enough for the lowOne or lowThreshold
|
||||||
|
for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
|
||||||
|
if s.norm[i] > 0 {
|
||||||
|
toDistribute--
|
||||||
|
s.norm[i]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
vStepLog = 62 - uint64(tableLog)
|
||||||
|
mid = uint64((1 << (vStepLog - 1)) - 1)
|
||||||
|
rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
|
||||||
|
tmpTotal = mid
|
||||||
|
)
|
||||||
|
for i, cnt := range s.count[:s.symbolLen] {
|
||||||
|
if s.norm[i] == notYetAssigned {
|
||||||
|
var (
|
||||||
|
end = tmpTotal + uint64(cnt)*rStep
|
||||||
|
sStart = uint32(tmpTotal >> vStepLog)
|
||||||
|
sEnd = uint32(end >> vStepLog)
|
||||||
|
weight = sEnd - sStart
|
||||||
|
)
|
||||||
|
if weight < 1 {
|
||||||
|
return errors.New("weight < 1")
|
||||||
|
}
|
||||||
|
s.norm[i] = int16(weight)
|
||||||
|
tmpTotal = end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateNorm validates the normalized histogram table.
|
||||||
|
func (s *Scratch) validateNorm() (err error) {
|
||||||
|
var total int
|
||||||
|
for _, v := range s.norm[:s.symbolLen] {
|
||||||
|
if v >= 0 {
|
||||||
|
total += int(v)
|
||||||
|
} else {
|
||||||
|
total -= int(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
|
||||||
|
for i, v := range s.norm[:s.symbolLen] {
|
||||||
|
fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
if total != (1 << s.actualTableLog) {
|
||||||
|
return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
|
||||||
|
}
|
||||||
|
for i, v := range s.count[s.symbolLen:] {
|
||||||
|
if v != 0 {
|
||||||
|
return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
376
vendor/github.com/klauspost/compress/fse/decompress.go
generated
vendored
Normal file
376
vendor/github.com/klauspost/compress/fse/decompress.go
generated
vendored
Normal file
@@ -0,0 +1,376 @@
|
|||||||
|
package fse
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
tablelogAbsoluteMax = 15
|
||||||
|
)
|
||||||
|
|
||||||
|
// Decompress a block of data.
|
||||||
|
// You can provide a scratch buffer to avoid allocations.
|
||||||
|
// If nil is provided a temporary one will be allocated.
|
||||||
|
// It is possible, but by no way guaranteed that corrupt data will
|
||||||
|
// return an error.
|
||||||
|
// It is up to the caller to verify integrity of the returned data.
|
||||||
|
// Use a predefined Scratch to set maximum acceptable output size.
|
||||||
|
func Decompress(b []byte, s *Scratch) ([]byte, error) {
|
||||||
|
s, err := s.prepare(b)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
s.Out = s.Out[:0]
|
||||||
|
err = s.readNCount()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = s.buildDtable()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = s.decompress()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.Out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readNCount will read the symbol distribution so decoding tables can be constructed.
|
||||||
|
func (s *Scratch) readNCount() error {
|
||||||
|
var (
|
||||||
|
charnum uint16
|
||||||
|
previous0 bool
|
||||||
|
b = &s.br
|
||||||
|
)
|
||||||
|
iend := b.remain()
|
||||||
|
if iend < 4 {
|
||||||
|
return errors.New("input too small")
|
||||||
|
}
|
||||||
|
bitStream := b.Uint32()
|
||||||
|
nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog
|
||||||
|
if nbBits > tablelogAbsoluteMax {
|
||||||
|
return errors.New("tableLog too large")
|
||||||
|
}
|
||||||
|
bitStream >>= 4
|
||||||
|
bitCount := uint(4)
|
||||||
|
|
||||||
|
s.actualTableLog = uint8(nbBits)
|
||||||
|
remaining := int32((1 << nbBits) + 1)
|
||||||
|
threshold := int32(1 << nbBits)
|
||||||
|
gotTotal := int32(0)
|
||||||
|
nbBits++
|
||||||
|
|
||||||
|
for remaining > 1 {
|
||||||
|
if previous0 {
|
||||||
|
n0 := charnum
|
||||||
|
for (bitStream & 0xFFFF) == 0xFFFF {
|
||||||
|
n0 += 24
|
||||||
|
if b.off < iend-5 {
|
||||||
|
b.advance(2)
|
||||||
|
bitStream = b.Uint32() >> bitCount
|
||||||
|
} else {
|
||||||
|
bitStream >>= 16
|
||||||
|
bitCount += 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (bitStream & 3) == 3 {
|
||||||
|
n0 += 3
|
||||||
|
bitStream >>= 2
|
||||||
|
bitCount += 2
|
||||||
|
}
|
||||||
|
n0 += uint16(bitStream & 3)
|
||||||
|
bitCount += 2
|
||||||
|
if n0 > maxSymbolValue {
|
||||||
|
return errors.New("maxSymbolValue too small")
|
||||||
|
}
|
||||||
|
for charnum < n0 {
|
||||||
|
s.norm[charnum&0xff] = 0
|
||||||
|
charnum++
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 {
|
||||||
|
b.advance(bitCount >> 3)
|
||||||
|
bitCount &= 7
|
||||||
|
bitStream = b.Uint32() >> bitCount
|
||||||
|
} else {
|
||||||
|
bitStream >>= 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
max := (2*(threshold) - 1) - (remaining)
|
||||||
|
var count int32
|
||||||
|
|
||||||
|
if (int32(bitStream) & (threshold - 1)) < max {
|
||||||
|
count = int32(bitStream) & (threshold - 1)
|
||||||
|
bitCount += nbBits - 1
|
||||||
|
} else {
|
||||||
|
count = int32(bitStream) & (2*threshold - 1)
|
||||||
|
if count >= threshold {
|
||||||
|
count -= max
|
||||||
|
}
|
||||||
|
bitCount += nbBits
|
||||||
|
}
|
||||||
|
|
||||||
|
count-- // extra accuracy
|
||||||
|
if count < 0 {
|
||||||
|
// -1 means +1
|
||||||
|
remaining += count
|
||||||
|
gotTotal -= count
|
||||||
|
} else {
|
||||||
|
remaining -= count
|
||||||
|
gotTotal += count
|
||||||
|
}
|
||||||
|
s.norm[charnum&0xff] = int16(count)
|
||||||
|
charnum++
|
||||||
|
previous0 = count == 0
|
||||||
|
for remaining < threshold {
|
||||||
|
nbBits--
|
||||||
|
threshold >>= 1
|
||||||
|
}
|
||||||
|
if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 {
|
||||||
|
b.advance(bitCount >> 3)
|
||||||
|
bitCount &= 7
|
||||||
|
} else {
|
||||||
|
bitCount -= (uint)(8 * (len(b.b) - 4 - b.off))
|
||||||
|
b.off = len(b.b) - 4
|
||||||
|
}
|
||||||
|
bitStream = b.Uint32() >> (bitCount & 31)
|
||||||
|
}
|
||||||
|
s.symbolLen = charnum
|
||||||
|
|
||||||
|
if s.symbolLen <= 1 {
|
||||||
|
return fmt.Errorf("symbolLen (%d) too small", s.symbolLen)
|
||||||
|
}
|
||||||
|
if s.symbolLen > maxSymbolValue+1 {
|
||||||
|
return fmt.Errorf("symbolLen (%d) too big", s.symbolLen)
|
||||||
|
}
|
||||||
|
if remaining != 1 {
|
||||||
|
return fmt.Errorf("corruption detected (remaining %d != 1)", remaining)
|
||||||
|
}
|
||||||
|
if bitCount > 32 {
|
||||||
|
return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount)
|
||||||
|
}
|
||||||
|
if gotTotal != 1<<s.actualTableLog {
|
||||||
|
return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
|
||||||
|
}
|
||||||
|
b.advance((bitCount + 7) >> 3)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// decSymbol contains information about a state entry,
|
||||||
|
// Including the state offset base, the output symbol and
|
||||||
|
// the number of bits to read for the low part of the destination state.
|
||||||
|
type decSymbol struct {
|
||||||
|
newState uint16
|
||||||
|
symbol uint8
|
||||||
|
nbBits uint8
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocDtable will allocate decoding tables if they are not big enough.
|
||||||
|
func (s *Scratch) allocDtable() {
|
||||||
|
tableSize := 1 << s.actualTableLog
|
||||||
|
if cap(s.decTable) < tableSize {
|
||||||
|
s.decTable = make([]decSymbol, tableSize)
|
||||||
|
}
|
||||||
|
s.decTable = s.decTable[:tableSize]
|
||||||
|
|
||||||
|
if cap(s.ct.tableSymbol) < 256 {
|
||||||
|
s.ct.tableSymbol = make([]byte, 256)
|
||||||
|
}
|
||||||
|
s.ct.tableSymbol = s.ct.tableSymbol[:256]
|
||||||
|
|
||||||
|
if cap(s.ct.stateTable) < 256 {
|
||||||
|
s.ct.stateTable = make([]uint16, 256)
|
||||||
|
}
|
||||||
|
s.ct.stateTable = s.ct.stateTable[:256]
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildDtable will build the decoding table.
|
||||||
|
func (s *Scratch) buildDtable() error {
|
||||||
|
tableSize := uint32(1 << s.actualTableLog)
|
||||||
|
highThreshold := tableSize - 1
|
||||||
|
s.allocDtable()
|
||||||
|
symbolNext := s.ct.stateTable[:256]
|
||||||
|
|
||||||
|
// Init, lay down lowprob symbols
|
||||||
|
s.zeroBits = false
|
||||||
|
{
|
||||||
|
largeLimit := int16(1 << (s.actualTableLog - 1))
|
||||||
|
for i, v := range s.norm[:s.symbolLen] {
|
||||||
|
if v == -1 {
|
||||||
|
s.decTable[highThreshold].symbol = uint8(i)
|
||||||
|
highThreshold--
|
||||||
|
symbolNext[i] = 1
|
||||||
|
} else {
|
||||||
|
if v >= largeLimit {
|
||||||
|
s.zeroBits = true
|
||||||
|
}
|
||||||
|
symbolNext[i] = uint16(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Spread symbols
|
||||||
|
{
|
||||||
|
tableMask := tableSize - 1
|
||||||
|
step := tableStep(tableSize)
|
||||||
|
position := uint32(0)
|
||||||
|
for ss, v := range s.norm[:s.symbolLen] {
|
||||||
|
for i := 0; i < int(v); i++ {
|
||||||
|
s.decTable[position].symbol = uint8(ss)
|
||||||
|
position = (position + step) & tableMask
|
||||||
|
for position > highThreshold {
|
||||||
|
// lowprob area
|
||||||
|
position = (position + step) & tableMask
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if position != 0 {
|
||||||
|
// position must reach all cells once, otherwise normalizedCounter is incorrect
|
||||||
|
return errors.New("corrupted input (position != 0)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build Decoding table
|
||||||
|
{
|
||||||
|
tableSize := uint16(1 << s.actualTableLog)
|
||||||
|
for u, v := range s.decTable {
|
||||||
|
symbol := v.symbol
|
||||||
|
nextState := symbolNext[symbol]
|
||||||
|
symbolNext[symbol] = nextState + 1
|
||||||
|
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
|
||||||
|
s.decTable[u].nbBits = nBits
|
||||||
|
newState := (nextState << nBits) - tableSize
|
||||||
|
if newState >= tableSize {
|
||||||
|
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
|
||||||
|
}
|
||||||
|
if newState == uint16(u) && nBits == 0 {
|
||||||
|
// Seems weird that this is possible with nbits > 0.
|
||||||
|
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
|
||||||
|
}
|
||||||
|
s.decTable[u].newState = newState
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// decompress will decompress the bitstream.
|
||||||
|
// If the buffer is over-read an error is returned.
|
||||||
|
func (s *Scratch) decompress() error {
|
||||||
|
br := &s.bits
|
||||||
|
if err := br.init(s.br.unread()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var s1, s2 decoder
|
||||||
|
// Initialize and decode first state and symbol.
|
||||||
|
s1.init(br, s.decTable, s.actualTableLog)
|
||||||
|
s2.init(br, s.decTable, s.actualTableLog)
|
||||||
|
|
||||||
|
// Use temp table to avoid bound checks/append penalty.
|
||||||
|
var tmp = s.ct.tableSymbol[:256]
|
||||||
|
var off uint8
|
||||||
|
|
||||||
|
// Main part
|
||||||
|
if !s.zeroBits {
|
||||||
|
for br.off >= 8 {
|
||||||
|
br.fillFast()
|
||||||
|
tmp[off+0] = s1.nextFast()
|
||||||
|
tmp[off+1] = s2.nextFast()
|
||||||
|
br.fillFast()
|
||||||
|
tmp[off+2] = s1.nextFast()
|
||||||
|
tmp[off+3] = s2.nextFast()
|
||||||
|
off += 4
|
||||||
|
// When off is 0, we have overflowed and should write.
|
||||||
|
if off == 0 {
|
||||||
|
s.Out = append(s.Out, tmp...)
|
||||||
|
if len(s.Out) >= s.DecompressLimit {
|
||||||
|
return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for br.off >= 8 {
|
||||||
|
br.fillFast()
|
||||||
|
tmp[off+0] = s1.next()
|
||||||
|
tmp[off+1] = s2.next()
|
||||||
|
br.fillFast()
|
||||||
|
tmp[off+2] = s1.next()
|
||||||
|
tmp[off+3] = s2.next()
|
||||||
|
off += 4
|
||||||
|
if off == 0 {
|
||||||
|
s.Out = append(s.Out, tmp...)
|
||||||
|
// When off is 0, we have overflowed and should write.
|
||||||
|
if len(s.Out) >= s.DecompressLimit {
|
||||||
|
return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.Out = append(s.Out, tmp[:off]...)
|
||||||
|
|
||||||
|
// Final bits, a bit more expensive check
|
||||||
|
for {
|
||||||
|
if s1.finished() {
|
||||||
|
s.Out = append(s.Out, s1.final(), s2.final())
|
||||||
|
break
|
||||||
|
}
|
||||||
|
br.fill()
|
||||||
|
s.Out = append(s.Out, s1.next())
|
||||||
|
if s2.finished() {
|
||||||
|
s.Out = append(s.Out, s2.final(), s1.final())
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s.Out = append(s.Out, s2.next())
|
||||||
|
if len(s.Out) >= s.DecompressLimit {
|
||||||
|
return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return br.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// decoder keeps track of the current state and updates it from the bitstream.
|
||||||
|
type decoder struct {
|
||||||
|
state uint16
|
||||||
|
br *bitReader
|
||||||
|
dt []decSymbol
|
||||||
|
}
|
||||||
|
|
||||||
|
// init will initialize the decoder and read the first state from the stream.
|
||||||
|
func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) {
|
||||||
|
d.dt = dt
|
||||||
|
d.br = in
|
||||||
|
d.state = in.getBits(tableLog)
|
||||||
|
}
|
||||||
|
|
||||||
|
// next returns the next symbol and sets the next state.
|
||||||
|
// At least tablelog bits must be available in the bit reader.
|
||||||
|
func (d *decoder) next() uint8 {
|
||||||
|
n := &d.dt[d.state]
|
||||||
|
lowBits := d.br.getBits(n.nbBits)
|
||||||
|
d.state = n.newState + lowBits
|
||||||
|
return n.symbol
|
||||||
|
}
|
||||||
|
|
||||||
|
// finished returns true if all bits have been read from the bitstream
|
||||||
|
// and the next state would require reading bits from the input.
|
||||||
|
func (d *decoder) finished() bool {
|
||||||
|
return d.br.finished() && d.dt[d.state].nbBits > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// final returns the current state symbol without decoding the next.
|
||||||
|
func (d *decoder) final() uint8 {
|
||||||
|
return d.dt[d.state].symbol
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextFast returns the next symbol and sets the next state.
|
||||||
|
// This can only be used if no symbols are 0 bits.
|
||||||
|
// At least tablelog bits must be available in the bit reader.
|
||||||
|
func (d *decoder) nextFast() uint8 {
|
||||||
|
n := d.dt[d.state]
|
||||||
|
lowBits := d.br.getBitsFast(n.nbBits)
|
||||||
|
d.state = n.newState + lowBits
|
||||||
|
return n.symbol
|
||||||
|
}
|
||||||
144
vendor/github.com/klauspost/compress/fse/fse.go
generated
vendored
Normal file
144
vendor/github.com/klauspost/compress/fse/fse.go
generated
vendored
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
// Copyright 2018 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||||
|
|
||||||
|
// Package fse provides Finite State Entropy encoding and decoding.
|
||||||
|
//
|
||||||
|
// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
|
||||||
|
// for byte blocks as implemented in zstd.
|
||||||
|
//
|
||||||
|
// See https://github.com/klauspost/compress/tree/master/fse for more information.
|
||||||
|
package fse
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math/bits"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
/*!MEMORY_USAGE :
|
||||||
|
* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
|
||||||
|
* Increasing memory usage improves compression ratio
|
||||||
|
* Reduced memory usage can improve speed, due to cache effect
|
||||||
|
* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
|
||||||
|
maxMemoryUsage = 14
|
||||||
|
defaultMemoryUsage = 13
|
||||||
|
|
||||||
|
maxTableLog = maxMemoryUsage - 2
|
||||||
|
maxTablesize = 1 << maxTableLog
|
||||||
|
defaultTablelog = defaultMemoryUsage - 2
|
||||||
|
minTablelog = 5
|
||||||
|
maxSymbolValue = 255
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ErrIncompressible is returned when input is judged to be too hard to compress.
|
||||||
|
ErrIncompressible = errors.New("input is not compressible")
|
||||||
|
|
||||||
|
// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
|
||||||
|
ErrUseRLE = errors.New("input is single value repeated")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Scratch provides temporary storage for compression and decompression.
|
||||||
|
type Scratch struct {
|
||||||
|
// Private
|
||||||
|
count [maxSymbolValue + 1]uint32
|
||||||
|
norm [maxSymbolValue + 1]int16
|
||||||
|
br byteReader
|
||||||
|
bits bitReader
|
||||||
|
bw bitWriter
|
||||||
|
ct cTable // Compression tables.
|
||||||
|
decTable []decSymbol // Decompression table.
|
||||||
|
maxCount int // count of the most probable symbol
|
||||||
|
|
||||||
|
// Per block parameters.
|
||||||
|
// These can be used to override compression parameters of the block.
|
||||||
|
// Do not touch, unless you know what you are doing.
|
||||||
|
|
||||||
|
// Out is output buffer.
|
||||||
|
// If the scratch is re-used before the caller is done processing the output,
|
||||||
|
// set this field to nil.
|
||||||
|
// Otherwise the output buffer will be re-used for next Compression/Decompression step
|
||||||
|
// and allocation will be avoided.
|
||||||
|
Out []byte
|
||||||
|
|
||||||
|
// DecompressLimit limits the maximum decoded size acceptable.
|
||||||
|
// If > 0 decompression will stop when approximately this many bytes
|
||||||
|
// has been decoded.
|
||||||
|
// If 0, maximum size will be 2GB.
|
||||||
|
DecompressLimit int
|
||||||
|
|
||||||
|
symbolLen uint16 // Length of active part of the symbol table.
|
||||||
|
actualTableLog uint8 // Selected tablelog.
|
||||||
|
zeroBits bool // no bits has prob > 50%.
|
||||||
|
clearCount bool // clear count
|
||||||
|
|
||||||
|
// MaxSymbolValue will override the maximum symbol value of the next block.
|
||||||
|
MaxSymbolValue uint8
|
||||||
|
|
||||||
|
// TableLog will attempt to override the tablelog for the next block.
|
||||||
|
TableLog uint8
|
||||||
|
}
|
||||||
|
|
||||||
|
// Histogram allows to populate the histogram and skip that step in the compression,
|
||||||
|
// It otherwise allows to inspect the histogram when compression is done.
|
||||||
|
// To indicate that you have populated the histogram call HistogramFinished
|
||||||
|
// with the value of the highest populated symbol, as well as the number of entries
|
||||||
|
// in the most populated entry. These are accepted at face value.
|
||||||
|
// The returned slice will always be length 256.
|
||||||
|
func (s *Scratch) Histogram() []uint32 {
|
||||||
|
return s.count[:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// HistogramFinished can be called to indicate that the histogram has been populated.
|
||||||
|
// maxSymbol is the index of the highest set symbol of the next data segment.
|
||||||
|
// maxCount is the number of entries in the most populated entry.
|
||||||
|
// These are accepted at face value.
|
||||||
|
func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
|
||||||
|
s.maxCount = maxCount
|
||||||
|
s.symbolLen = uint16(maxSymbol) + 1
|
||||||
|
s.clearCount = maxCount != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepare will prepare and allocate scratch tables used for both compression and decompression.
|
||||||
|
func (s *Scratch) prepare(in []byte) (*Scratch, error) {
|
||||||
|
if s == nil {
|
||||||
|
s = &Scratch{}
|
||||||
|
}
|
||||||
|
if s.MaxSymbolValue == 0 {
|
||||||
|
s.MaxSymbolValue = 255
|
||||||
|
}
|
||||||
|
if s.TableLog == 0 {
|
||||||
|
s.TableLog = defaultTablelog
|
||||||
|
}
|
||||||
|
if s.TableLog > maxTableLog {
|
||||||
|
return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
|
||||||
|
}
|
||||||
|
if cap(s.Out) == 0 {
|
||||||
|
s.Out = make([]byte, 0, len(in))
|
||||||
|
}
|
||||||
|
if s.clearCount && s.maxCount == 0 {
|
||||||
|
for i := range s.count {
|
||||||
|
s.count[i] = 0
|
||||||
|
}
|
||||||
|
s.clearCount = false
|
||||||
|
}
|
||||||
|
s.br.init(in)
|
||||||
|
if s.DecompressLimit == 0 {
|
||||||
|
// Max size 2GB.
|
||||||
|
s.DecompressLimit = (2 << 30) - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// tableStep returns the next table index.
|
||||||
|
func tableStep(tableSize uint32) uint32 {
|
||||||
|
return (tableSize >> 1) + (tableSize >> 3) + 3
|
||||||
|
}
|
||||||
|
|
||||||
|
func highBits(val uint32) (n uint32) {
|
||||||
|
return uint32(bits.Len32(val) - 1)
|
||||||
|
}
|
||||||
4
vendor/github.com/klauspost/compress/gen.sh
generated
vendored
Normal file
4
vendor/github.com/klauspost/compress/gen.sh
generated
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
cd s2/cmd/_s2sx/ || exit 1
|
||||||
|
go generate .
|
||||||
1
vendor/github.com/klauspost/compress/huff0/.gitignore
generated
vendored
Normal file
1
vendor/github.com/klauspost/compress/huff0/.gitignore
generated
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
/huff0-fuzz.zip
|
||||||
89
vendor/github.com/klauspost/compress/huff0/README.md
generated
vendored
Normal file
89
vendor/github.com/klauspost/compress/huff0/README.md
generated
vendored
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# Huff0 entropy compression
|
||||||
|
|
||||||
|
This package provides Huff0 encoding and decoding as used in zstd.
|
||||||
|
|
||||||
|
[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders),
|
||||||
|
a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU
|
||||||
|
(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds.
|
||||||
|
|
||||||
|
This can be used for compressing input with a lot of similar input values to the smallest number of bytes.
|
||||||
|
This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders,
|
||||||
|
but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding.
|
||||||
|
|
||||||
|
* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
|
||||||
|
|
||||||
|
## News
|
||||||
|
|
||||||
|
This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package.
|
||||||
|
|
||||||
|
This ensures that most functionality is well tested.
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
This package provides a low level interface that allows to compress single independent blocks.
|
||||||
|
|
||||||
|
Each block is separate, and there is no built in integrity checks.
|
||||||
|
This means that the caller should keep track of block sizes and also do checksums if needed.
|
||||||
|
|
||||||
|
Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and
|
||||||
|
[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions.
|
||||||
|
You must provide input and will receive the output and maybe an error.
|
||||||
|
|
||||||
|
These error values can be returned:
|
||||||
|
|
||||||
|
| Error | Description |
|
||||||
|
|---------------------|-----------------------------------------------------------------------------|
|
||||||
|
| `<nil>` | Everything ok, output is returned |
|
||||||
|
| `ErrIncompressible` | Returned when input is judged to be too hard to compress |
|
||||||
|
| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated |
|
||||||
|
| `ErrTooBig` | Returned if the input block exceeds the maximum allowed size (128 Kib) |
|
||||||
|
| `(error)` | An internal error occurred. |
|
||||||
|
|
||||||
|
|
||||||
|
As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these.
|
||||||
|
|
||||||
|
To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object
|
||||||
|
that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same
|
||||||
|
object can be used for both.
|
||||||
|
|
||||||
|
Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this
|
||||||
|
you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output.
|
||||||
|
|
||||||
|
The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding.
|
||||||
|
|
||||||
|
## Tables and re-use
|
||||||
|
|
||||||
|
Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results.
|
||||||
|
|
||||||
|
The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy)
|
||||||
|
that controls this behaviour. See the documentation for details. This can be altered between each block.
|
||||||
|
|
||||||
|
Do however note that this information is *not* stored in the output block and it is up to the users of the package to
|
||||||
|
record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called,
|
||||||
|
based on the boolean reported back from the CompressXX call.
|
||||||
|
|
||||||
|
If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the
|
||||||
|
[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object.
|
||||||
|
|
||||||
|
## Decompressing
|
||||||
|
|
||||||
|
The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable).
|
||||||
|
This will initialize the decoding tables.
|
||||||
|
You can supply the complete block to `ReadTable` and it will return the data part of the block
|
||||||
|
which can be given to the decompressor.
|
||||||
|
|
||||||
|
Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X)
|
||||||
|
or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
|
||||||
|
|
||||||
|
For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size.
|
||||||
|
|
||||||
|
You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
|
||||||
|
your input was likely corrupted.
|
||||||
|
|
||||||
|
It is important to note that a successful decoding does *not* mean your output matches your original input.
|
||||||
|
There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid.
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
|
||||||
|
changes will likely not be accepted. If in doubt open an issue before writing the PR.
|
||||||
224
vendor/github.com/klauspost/compress/huff0/bitreader.go
generated
vendored
Normal file
224
vendor/github.com/klauspost/compress/huff0/bitreader.go
generated
vendored
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
// Copyright 2018 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||||
|
|
||||||
|
package huff0
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/internal/le"
|
||||||
|
)
|
||||||
|
|
||||||
|
// bitReader reads a bitstream in reverse.
|
||||||
|
// The last set bit indicates the start of the stream and is used
|
||||||
|
// for aligning the input.
|
||||||
|
type bitReaderBytes struct {
|
||||||
|
in []byte
|
||||||
|
off uint // next byte to read is at in[off - 1]
|
||||||
|
value uint64
|
||||||
|
bitsRead uint8
|
||||||
|
}
|
||||||
|
|
||||||
|
// init initializes and resets the bit reader.
|
||||||
|
func (b *bitReaderBytes) init(in []byte) error {
|
||||||
|
if len(in) < 1 {
|
||||||
|
return errors.New("corrupt stream: too short")
|
||||||
|
}
|
||||||
|
b.in = in
|
||||||
|
b.off = uint(len(in))
|
||||||
|
// The highest bit of the last byte indicates where to start
|
||||||
|
v := in[len(in)-1]
|
||||||
|
if v == 0 {
|
||||||
|
return errors.New("corrupt stream, did not find end of stream")
|
||||||
|
}
|
||||||
|
b.bitsRead = 64
|
||||||
|
b.value = 0
|
||||||
|
if len(in) >= 8 {
|
||||||
|
b.fillFastStart()
|
||||||
|
} else {
|
||||||
|
b.fill()
|
||||||
|
b.fill()
|
||||||
|
}
|
||||||
|
b.advance(8 - uint8(highBit32(uint32(v))))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// peekByteFast requires that at least one byte is requested every time.
|
||||||
|
// There are no checks if the buffer is filled.
|
||||||
|
func (b *bitReaderBytes) peekByteFast() uint8 {
|
||||||
|
got := uint8(b.value >> 56)
|
||||||
|
return got
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bitReaderBytes) advance(n uint8) {
|
||||||
|
b.bitsRead += n
|
||||||
|
b.value <<= n & 63
|
||||||
|
}
|
||||||
|
|
||||||
|
// fillFast() will make sure at least 32 bits are available.
|
||||||
|
// There must be at least 4 bytes available.
|
||||||
|
func (b *bitReaderBytes) fillFast() {
|
||||||
|
if b.bitsRead < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 bounds checks.
|
||||||
|
low := le.Load32(b.in, b.off-4)
|
||||||
|
b.value |= uint64(low) << (b.bitsRead - 32)
|
||||||
|
b.bitsRead -= 32
|
||||||
|
b.off -= 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read.
|
||||||
|
func (b *bitReaderBytes) fillFastStart() {
|
||||||
|
// Do single re-slice to avoid bounds checks.
|
||||||
|
b.value = le.Load64(b.in, b.off-8)
|
||||||
|
b.bitsRead = 0
|
||||||
|
b.off -= 8
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill() will make sure at least 32 bits are available.
|
||||||
|
func (b *bitReaderBytes) fill() {
|
||||||
|
if b.bitsRead < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if b.off >= 4 {
|
||||||
|
low := le.Load32(b.in, b.off-4)
|
||||||
|
b.value |= uint64(low) << (b.bitsRead - 32)
|
||||||
|
b.bitsRead -= 32
|
||||||
|
b.off -= 4
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for b.off > 0 {
|
||||||
|
b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8)
|
||||||
|
b.bitsRead -= 8
|
||||||
|
b.off--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// finished returns true if all bits have been read from the bit stream.
|
||||||
|
func (b *bitReaderBytes) finished() bool {
|
||||||
|
return b.off == 0 && b.bitsRead >= 64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bitReaderBytes) remaining() uint {
|
||||||
|
return b.off*8 + uint(64-b.bitsRead)
|
||||||
|
}
|
||||||
|
|
||||||
|
// close the bitstream and returns an error if out-of-buffer reads occurred.
|
||||||
|
func (b *bitReaderBytes) close() error {
|
||||||
|
// Release reference.
|
||||||
|
b.in = nil
|
||||||
|
if b.remaining() > 0 {
|
||||||
|
return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining())
|
||||||
|
}
|
||||||
|
if b.bitsRead > 64 {
|
||||||
|
return io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bitReaderShifted reads a bitstream in reverse.
|
||||||
|
// The last set bit indicates the start of the stream and is used
|
||||||
|
// for aligning the input.
|
||||||
|
type bitReaderShifted struct {
|
||||||
|
in []byte
|
||||||
|
off uint // next byte to read is at in[off - 1]
|
||||||
|
value uint64
|
||||||
|
bitsRead uint8
|
||||||
|
}
|
||||||
|
|
||||||
|
// init initializes and resets the bit reader.
|
||||||
|
func (b *bitReaderShifted) init(in []byte) error {
|
||||||
|
if len(in) < 1 {
|
||||||
|
return errors.New("corrupt stream: too short")
|
||||||
|
}
|
||||||
|
b.in = in
|
||||||
|
b.off = uint(len(in))
|
||||||
|
// The highest bit of the last byte indicates where to start
|
||||||
|
v := in[len(in)-1]
|
||||||
|
if v == 0 {
|
||||||
|
return errors.New("corrupt stream, did not find end of stream")
|
||||||
|
}
|
||||||
|
b.bitsRead = 64
|
||||||
|
b.value = 0
|
||||||
|
if len(in) >= 8 {
|
||||||
|
b.fillFastStart()
|
||||||
|
} else {
|
||||||
|
b.fill()
|
||||||
|
b.fill()
|
||||||
|
}
|
||||||
|
b.advance(8 - uint8(highBit32(uint32(v))))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// peekBitsFast requires that at least one bit is requested every time.
|
||||||
|
// There are no checks if the buffer is filled.
|
||||||
|
func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
|
||||||
|
return uint16(b.value >> ((64 - n) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bitReaderShifted) advance(n uint8) {
|
||||||
|
b.bitsRead += n
|
||||||
|
b.value <<= n & 63
|
||||||
|
}
|
||||||
|
|
||||||
|
// fillFast() will make sure at least 32 bits are available.
|
||||||
|
// There must be at least 4 bytes available.
|
||||||
|
func (b *bitReaderShifted) fillFast() {
|
||||||
|
if b.bitsRead < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
low := le.Load32(b.in, b.off-4)
|
||||||
|
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
|
||||||
|
b.bitsRead -= 32
|
||||||
|
b.off -= 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read.
|
||||||
|
func (b *bitReaderShifted) fillFastStart() {
|
||||||
|
b.value = le.Load64(b.in, b.off-8)
|
||||||
|
b.bitsRead = 0
|
||||||
|
b.off -= 8
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill() will make sure at least 32 bits are available.
|
||||||
|
func (b *bitReaderShifted) fill() {
|
||||||
|
if b.bitsRead < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if b.off > 4 {
|
||||||
|
low := le.Load32(b.in, b.off-4)
|
||||||
|
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
|
||||||
|
b.bitsRead -= 32
|
||||||
|
b.off -= 4
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for b.off > 0 {
|
||||||
|
b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63)
|
||||||
|
b.bitsRead -= 8
|
||||||
|
b.off--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bitReaderShifted) remaining() uint {
|
||||||
|
return b.off*8 + uint(64-b.bitsRead)
|
||||||
|
}
|
||||||
|
|
||||||
|
// close the bitstream and returns an error if out-of-buffer reads occurred.
|
||||||
|
func (b *bitReaderShifted) close() error {
|
||||||
|
// Release reference.
|
||||||
|
b.in = nil
|
||||||
|
if b.remaining() > 0 {
|
||||||
|
return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining())
|
||||||
|
}
|
||||||
|
if b.bitsRead > 64 {
|
||||||
|
return io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
102
vendor/github.com/klauspost/compress/huff0/bitwriter.go
generated
vendored
Normal file
102
vendor/github.com/klauspost/compress/huff0/bitwriter.go
generated
vendored
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
// Copyright 2018 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||||
|
|
||||||
|
package huff0
|
||||||
|
|
||||||
|
// bitWriter will write bits.
|
||||||
|
// First bit will be LSB of the first byte of output.
|
||||||
|
type bitWriter struct {
|
||||||
|
bitContainer uint64
|
||||||
|
nBits uint8
|
||||||
|
out []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
|
||||||
|
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
|
||||||
|
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
|
||||||
|
b.bitContainer |= uint64(value) << (b.nBits & 63)
|
||||||
|
b.nBits += bits
|
||||||
|
}
|
||||||
|
|
||||||
|
// encSymbol will add up to 16 bits. value may not contain more set bits than indicated.
|
||||||
|
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
|
||||||
|
func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
|
||||||
|
enc := ct[symbol]
|
||||||
|
b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
|
||||||
|
if false {
|
||||||
|
if enc.nBits == 0 {
|
||||||
|
panic("nbits 0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.nBits += enc.nBits
|
||||||
|
}
|
||||||
|
|
||||||
|
// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated.
|
||||||
|
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
|
||||||
|
func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
|
||||||
|
encA := ct[av]
|
||||||
|
encB := ct[bv]
|
||||||
|
sh := b.nBits & 63
|
||||||
|
combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63))
|
||||||
|
b.bitContainer |= combined << sh
|
||||||
|
if false {
|
||||||
|
if encA.nBits == 0 {
|
||||||
|
panic("nbitsA 0")
|
||||||
|
}
|
||||||
|
if encB.nBits == 0 {
|
||||||
|
panic("nbitsB 0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.nBits += encA.nBits + encB.nBits
|
||||||
|
}
|
||||||
|
|
||||||
|
// encFourSymbols adds up to 32 bits from four symbols.
|
||||||
|
// It will not check if there is space for them,
|
||||||
|
// so the caller must ensure that b has been flushed recently.
|
||||||
|
func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) {
|
||||||
|
bitsA := encA.nBits
|
||||||
|
bitsB := bitsA + encB.nBits
|
||||||
|
bitsC := bitsB + encC.nBits
|
||||||
|
bitsD := bitsC + encD.nBits
|
||||||
|
combined := uint64(encA.val) |
|
||||||
|
(uint64(encB.val) << (bitsA & 63)) |
|
||||||
|
(uint64(encC.val) << (bitsB & 63)) |
|
||||||
|
(uint64(encD.val) << (bitsC & 63))
|
||||||
|
b.bitContainer |= combined << (b.nBits & 63)
|
||||||
|
b.nBits += bitsD
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush32 will flush out, so there are at least 32 bits available for writing.
|
||||||
|
func (b *bitWriter) flush32() {
|
||||||
|
if b.nBits < 32 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
b.out = append(b.out,
|
||||||
|
byte(b.bitContainer),
|
||||||
|
byte(b.bitContainer>>8),
|
||||||
|
byte(b.bitContainer>>16),
|
||||||
|
byte(b.bitContainer>>24))
|
||||||
|
b.nBits -= 32
|
||||||
|
b.bitContainer >>= 32
|
||||||
|
}
|
||||||
|
|
||||||
|
// flushAlign will flush remaining full bytes and align to next byte boundary.
|
||||||
|
func (b *bitWriter) flushAlign() {
|
||||||
|
nbBytes := (b.nBits + 7) >> 3
|
||||||
|
for i := range nbBytes {
|
||||||
|
b.out = append(b.out, byte(b.bitContainer>>(i*8)))
|
||||||
|
}
|
||||||
|
b.nBits = 0
|
||||||
|
b.bitContainer = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// close will write the alignment bit and write the final byte(s)
|
||||||
|
// to the output.
|
||||||
|
func (b *bitWriter) close() {
|
||||||
|
// End mark
|
||||||
|
b.addBits16Clean(1, 1)
|
||||||
|
// flush until next byte.
|
||||||
|
b.flushAlign()
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user