首次提交代码

This commit is contained in:
algotao
2025-11-03 14:37:59 +08:00
parent e60f64721c
commit d76c196fb1
311 changed files with 81709 additions and 0 deletions

90
makebloom.go Normal file
View File

@@ -0,0 +1,90 @@
package main
import (
"bufio"
"flag"
"log/slog"
"os"
"git.algo.com.cn/public/bloomtool/internal/bloom"
)
const (
FalseRate = 0.00000001 // 误判率 千万分之一
)
func RunMakeBloom(args ...string) error {
fs := flag.NewFlagSet("makebloom", flag.ExitOnError)
txtFile := fs.String("d", "", "device id filename")
bmpFile := fs.String("b", "", "bitmap filename for output")
elements := fs.Uint64("e", 0, "max elements. (max 100 0000 0000). if 0 then auto")
falseRate := fs.Float64("r", FalseRate, "false rate (0.01--0.0000 0000 1)")
if err := fs.Parse(args); err != nil {
return err
} else if fs.NArg() > 0 || *txtFile == "" || *bmpFile == "" ||
*elements > 10000000000 ||
*falseRate > 0.01 || *falseRate < 0.000000001 {
fs.Usage()
return nil
}
return makeBloom(*txtFile, *bmpFile, *elements, *falseRate)
}
func makeBloom(txtFile string, bmpFile string, elements uint64, falseRate float64) error {
// 打开设备号文件
slog.Info("open source file", "filename", txtFile)
tfile, err := os.Open(txtFile)
if err != nil {
slog.Error("open source file error", "err", err)
return err
}
defer tfile.Close()
fstat, err := tfile.Stat()
if err != nil {
slog.Error("source file stat error", "err", err)
return err
}
// 计算元素个数并预留了一些空间。理论上单行md5为32字节加回车1个或2个字节
// 这里取30做安全系数。再加10000个保险
maxElements := uint64(0)
if elements == 0 {
maxElements = uint64((fstat.Size() / 30)) + 10000
} else {
maxElements = elements
}
// 新建布隆过滤器
bloombmp := bloom.NewWithEstimates(maxElements, falseRate)
// 逐行读取
scanner := bufio.NewScanner(tfile)
lineCount := 1
for scanner.Scan() {
if lineCount%100000 == 0 {
slog.Info("read line", "lineno", lineCount)
}
// 转换成bloom bit 写入
bloombmp.AddString(scanner.Text())
lineCount++
}
// 保存文件
slog.Info("save bitmap file", "filename", bmpFile)
err = bloombmp.SaveToFile(bmpFile)
if err != nil {
slog.Error("save bitmap file error", "err", err)
return err
}
slog.Info("save bitmap file done")
return nil
}