Files
bloomtool/hittest.go
2025-11-05 16:41:06 +08:00

89 lines
1.8 KiB
Go

package main
import (
"bufio"
"flag"
"fmt"
"log/slog"
"os"
"git.algo.com.cn/public/bloomtool/internal/bloom"
)
func RunHitTest(args ...string) error {
fs := flag.NewFlagSet("hittest", flag.ExitOnError)
txtFile := fs.String("d", "", "device id filename")
bmpFile := fs.String("b", "", "bitmap filename")
outStateFile := fs.String("o", "", "state filename for output")
filter := fs.Bool("f", false, "filter for hit only")
if err := fs.Parse(args); err != nil {
return err
} else if fs.NArg() > 0 || *txtFile == "" || *bmpFile == "" || *outStateFile == "" {
fmt.Println(fs.NArg())
fs.Usage()
return nil
}
return hitTest(*txtFile, *bmpFile, *outStateFile, *filter)
}
func hitTest(txtFile, bmpFile, stateFile string, filter bool) error {
slog.Info("load bitmap file", "filename", bmpFile)
bfile, err := bloom.LoadFromFile(bmpFile, false)
if err != nil {
slog.Error("open bitmap file error", "err", err)
return err
}
slog.Info("load text file", "filename", txtFile)
tfile, err := os.Open(txtFile)
if err != nil {
slog.Error("open text file error", "err", err)
return err
}
defer tfile.Close()
slog.Info("create state file", "filename", stateFile)
sfile, err := os.Create(stateFile)
if err != nil {
slog.Error("create state file error", "err", err)
return err
}
defer sfile.Close()
writer := bufio.NewWriter(sfile)
// 逐行读取
scanner := bufio.NewScanner(tfile)
lineCount := 1
lineText := ""
for scanner.Scan() {
if lineCount%100000 == 0 {
slog.Info("read line", "lineno", lineCount)
}
// 撞库
lineText = scanner.Text()
hit := bfile.TestString(lineText)
intHit := 0
if hit {
intHit = 1
}
if filter {
if hit {
writer.WriteString(fmt.Sprintf("%v\n", lineText))
}
} else {
writer.WriteString(fmt.Sprintf("%v\t%v\n", lineText, intHit))
}
lineCount++
}
writer.Flush()
return nil
}