Files
bloomtool/makebloom.go
2025-11-05 16:41:06 +08:00

91 lines
2.1 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"bufio"
"flag"
"log/slog"
"os"
"git.algo.com.cn/public/bloomtool/internal/bloom"
)
const (
FalseRate = 0.00000001 // 误判率 千万分之一
)
func RunMakeBloom(args ...string) error {
fs := flag.NewFlagSet("makebloom", flag.ExitOnError)
txtFile := fs.String("d", "", "device id filename")
outFile := fs.String("o", "", "output bitmap filename")
elements := fs.Uint64("e", 0, "max elements. (max 100 0000 0000). if 0 then auto")
falseRate := fs.Float64("r", FalseRate, "false rate (0.01--0.0000 0000 1)")
if err := fs.Parse(args); err != nil {
return err
} else if fs.NArg() > 0 || *txtFile == "" || *outFile == "" ||
*elements > 10000000000 ||
*falseRate > 0.01 || *falseRate < 0.000000001 {
fs.Usage()
return nil
}
return makeBloom(*txtFile, *outFile, *elements, *falseRate)
}
func makeBloom(txtFile string, outFile string, elements uint64, falseRate float64) error {
// 打开设备号文件
slog.Info("open source file", "filename", txtFile)
tfile, err := os.Open(txtFile)
if err != nil {
slog.Error("open source file error", "err", err)
return err
}
defer tfile.Close()
fstat, err := tfile.Stat()
if err != nil {
slog.Error("source file stat error", "err", err)
return err
}
// 计算元素个数并预留了一些空间。理论上单行md5为32字节加回车1个或2个字节
// 这里取30做安全系数。再加10000个保险
maxElements := uint64(0)
if elements == 0 {
maxElements = uint64((fstat.Size() / 30)) + 10000
} else {
maxElements = elements
}
// 新建布隆过滤器
bloombmp := bloom.NewWithEstimates(maxElements, falseRate)
// 逐行读取
scanner := bufio.NewScanner(tfile)
lineCount := 1
for scanner.Scan() {
if lineCount%100000 == 0 {
slog.Info("read line", "lineno", lineCount)
}
// 转换成bloom bit 写入
bloombmp.AddString(scanner.Text())
lineCount++
}
// 保存文件
slog.Info("save bitmap file", "filename", outFile)
err = bloombmp.SaveToFile(outFile)
if err != nil {
slog.Error("save bitmap file error", "err", err)
return err
}
slog.Info("save bitmap file done")
return nil
}