package main import ( "bufio" "flag" "fmt" "log/slog" "os" "git.algo.com.cn/public/bloomtool/internal/bloom" ) func RunHitTest(args ...string) error { fs := flag.NewFlagSet("hittest", flag.ExitOnError) txtFile := fs.String("d", "", "device id filename") bmpFile := fs.String("b", "", "bitmap filename") outStateFile := fs.String("o", "", "state filename for output") filter := fs.Bool("f", false, "filter for hit only") if err := fs.Parse(args); err != nil { return err } else if fs.NArg() > 0 || *txtFile == "" || *bmpFile == "" || *outStateFile == "" { fmt.Println(fs.NArg()) fs.Usage() return nil } return hitTest(*txtFile, *bmpFile, *outStateFile, *filter) } func hitTest(txtFile, bmpFile, stateFile string, filter bool) error { slog.Info("load bitmap file", "filename", bmpFile) bfile, err := bloom.LoadFromFile(bmpFile, false) if err != nil { slog.Error("open bitmap file error", "err", err) return err } slog.Info("load text file", "filename", txtFile) tfile, err := os.Open(txtFile) if err != nil { slog.Error("open text file error", "err", err) return err } defer tfile.Close() slog.Info("create state file", "filename", stateFile) sfile, err := os.Create(stateFile) if err != nil { slog.Error("create state file error", "err", err) return err } defer sfile.Close() writer := bufio.NewWriter(sfile) // 逐行读取 scanner := bufio.NewScanner(tfile) lineCount := 1 lineText := "" for scanner.Scan() { if lineCount%100000 == 0 { slog.Info("read line", "lineno", lineCount) } // 撞库 lineText = scanner.Text() hit := bfile.TestString(lineText) intHit := 0 if hit { intHit = 1 } if filter { if hit { writer.WriteString(fmt.Sprintf("%v\n", lineText)) } } else { writer.WriteString(fmt.Sprintf("%v\t%v\n", lineText, intHit)) } lineCount++ } writer.Flush() return nil }