package main import ( "fmt" "os" "path/filepath" "strings" "testing" "git.algo.com.cn/public/bloomtool/internal/bloom" ) // TestBloomFilterBasic 测试布隆过滤器基本功能 func TestBloomFilterBasic(t *testing.T) { // 创建布隆过滤器 bf := bloom.NewWithEstimates(1000, 0.01) // 测试添加元素 testData := []string{"apple", "banana", "cherry", "date"} for _, data := range testData { bf.AddString(data) } // 刷新缓冲区确保所有元素都被处理 bf.Flush() // 测试存在的元素 for _, data := range testData { if !bf.TestString(data) { t.Errorf("Expected element %s to be present", data) } } // 测试不存在的元素(可能有假阳性,但不应该总是假阳性) nonExistentData := []string{"elderberry", "fig", "grape"} falsePositives := 0 for _, data := range nonExistentData { if bf.TestString(data) { falsePositives++ } } // 假阳性率应该合理 falsePositiveRate := float64(falsePositives) / float64(len(nonExistentData)) if falsePositiveRate > 0.1 { // 允许一定的假阳性率 t.Errorf("False positive rate too high: %f", falsePositiveRate) } } // TestBloomFilterStatistics 测试布隆过滤器统计信息 func TestBloomFilterStatistics(t *testing.T) { bf := bloom.NewWithEstimates(1000, 0.01) // 初始状态 stat := bf.GetStat() if stat.ElementsAdded != 0 { t.Errorf("Expected 0 elements added, got %d", stat.ElementsAdded) } if stat.ElementsMax != 1000 { t.Errorf("Expected 1000 max elements, got %d", stat.ElementsMax) } // 添加元素 testData := []string{"apple", "banana", "cherry"} for _, data := range testData { bf.AddString(data) } // 刷新缓冲区 bf.Flush() // 添加后的状态 stat = bf.GetStat() if stat.ElementsAdded != uint64(len(testData)) { t.Errorf("Expected %d elements added, got %d", len(testData), stat.ElementsAdded) } } // TestBloomFilterSaveAndLoad 测试文件保存和加载 func TestBloomFilterSaveAndLoad(t *testing.T) { // 创建临时目录 tempDir := t.TempDir() testFile := filepath.Join(tempDir, "test.bmp") // 创建布隆过滤器并添加数据 originalBf := bloom.NewWithEstimates(1000, 0.01) testData := []string{"apple", "banana", "cherry", "date", "elderberry"} for _, data := range testData { originalBf.AddString(data) } // 确保所有元素都被处理 originalBf.Flush() // 保存到文件 err := originalBf.SaveToFile(testFile) if err != nil { t.Fatalf("Failed to save bloom filter: %v", err) } // 从文件加载 loadedBf, err := bloom.LoadFromFile(testFile, false) if err != nil { t.Fatalf("Failed to load bloom filter: %v", err) } // 验证加载的数据 originalStat := originalBf.GetStat() loadedStat := loadedBf.GetStat() if originalStat.ElementsAdded != loadedStat.ElementsAdded { t.Errorf("Elements count mismatch: original=%d, loaded=%d", originalStat.ElementsAdded, loadedStat.ElementsAdded) } if originalStat.ElementsMax != loadedStat.ElementsMax { t.Errorf("Max elements mismatch: original=%d, loaded=%d", originalStat.ElementsMax, loadedStat.ElementsMax) } // 验证数据一致性 for _, data := range testData { if !loadedBf.TestString(data) { t.Errorf("Loaded bloom filter missing element: %s", data) } } } // TestBloomFilterAndOperation 测试AND操作 func TestBloomFilterAndOperation(t *testing.T) { // 创建两个布隆过滤器 bf1 := bloom.NewWithEstimates(1000, 0.01) bf2 := bloom.NewWithEstimates(1000, 0.01) // 添加数据 commonData := []string{"apple", "banana"} // 共同元素 onlyInBf1 := []string{"cherry", "date"} // 只在bf1中 onlyInBf2 := []string{"elderberry", "fig"} // 只在bf2中 for _, data := range commonData { bf1.AddString(data) bf2.AddString(data) } for _, data := range onlyInBf1 { bf1.AddString(data) } for _, data := range onlyInBf2 { bf2.AddString(data) } // 刷新缓冲区 bf1.Flush() bf2.Flush() // 执行AND操作 result, err := bf1.And(bf2) if err != nil { t.Fatalf("AND operation failed: %v", err) } // 验证结果:共同元素应该存在 missingCommon := false for _, data := range commonData { if !result.TestString(data) { missingCommon = true t.Logf("Common element %s missing from AND result", data) } } // 由于布隆过滤器的假阳性特性,我们检查是否有共同元素存在 if missingCommon { t.Error("Not all common elements found in AND result") } // 验证AND操作确实减少了某些元素 bf1Test := bf1.TestString("cherry") resultTest := result.TestString("cherry") if bf1Test && !resultTest { t.Log("AND operation correctly removed element present in only one filter") } } // TestBloomFilterOrOperation 测试OR操作 func TestBloomFilterOrOperation(t *testing.T) { // 创建两个布隆过滤器 bf1 := bloom.NewWithEstimates(1000, 0.01) bf2 := bloom.NewWithEstimates(1000, 0.01) // 添加数据 commonData := []string{"apple", "banana"} // 共同元素 onlyInBf1 := []string{"cherry", "date"} // 只在bf1中 onlyInBf2 := []string{"elderberry", "fig"} // 只在bf2中 allData := append(append(commonData, onlyInBf1...), onlyInBf2...) for _, data := range commonData { bf1.AddString(data) bf2.AddString(data) } for _, data := range onlyInBf1 { bf1.AddString(data) } for _, data := range onlyInBf2 { bf2.AddString(data) } // 刷新缓冲区 bf1.Flush() bf2.Flush() // 执行OR操作 result, err := bf1.Or(bf2) if err != nil { t.Fatalf("OR operation failed: %v", err) } // 验证结果:所有元素应该存在 missingElements := 0 for _, data := range allData { if !result.TestString(data) { missingElements++ t.Logf("Element %s missing from OR result", data) } } // 由于布隆过滤器的特性,允许少量元素缺失 if missingElements > len(allData)/2 { t.Errorf("Too many elements (%d/%d) missing from OR result", missingElements, len(allData)) } // 验证OR操作确实增加了某些元素 resultTest := result.TestString("cherry") bf1Test := bf1.TestString("cherry") if bf1Test && resultTest { t.Log("OR operation correctly preserved element from first filter") } } // TestBloomFilterOperationErrors 测试操作错误处理 func TestBloomFilterOperationErrors(t *testing.T) { // 创建两个不兼容的布隆过滤器(不同的误判率) bf1 := bloom.NewWithEstimates(1000, 0.01) bf2 := bloom.NewWithEstimates(1000, 0.02) // 不同的误判率 // 测试AND操作错误 _, err := bf1.And(bf2) if err == nil { t.Error("Expected AND operation to fail with different false positive rates") } // 测试OR操作错误 _, err = bf1.Or(bf2) if err == nil { t.Error("Expected OR operation to fail with different false positive rates") } // 创建不同参数数量的过滤器(这会导致不同的k值) bf3 := bloom.NewWithEstimates(2000, 0.01) // 不同的最大元素数 // 测试AND操作错误 _, err = bf1.And(bf3) if err == nil { t.Log("Note: Different element counts resulted in same k value, AND operation succeeded") } else { t.Log("AND operation correctly failed with different parameters") } // 测试OR操作错误 _, err = bf1.Or(bf3) if err == nil { t.Log("Note: Different element counts resulted in same k value, OR operation succeeded") } else { t.Log("OR operation correctly failed with different parameters") } } // TestRunMakeBloom 测试makebloom命令 func TestRunMakeBloom(t *testing.T) { // 创建临时目录 tempDir := t.TempDir() // 创建测试数据文件 dataFile := filepath.Join(tempDir, "test_data.txt") outputFile := filepath.Join(tempDir, "test_output.bmp") testData := "apple\nbanana\ncherry\ndate\nelderberry\n" err := os.WriteFile(dataFile, []byte(testData), 0644) if err != nil { t.Fatalf("Failed to create test data file: %v", err) } // 运行makebloom命令 err = RunMakeBloom("-d", dataFile, "-o", outputFile, "-e", "1000", "-r", "0.01") if err != nil { t.Fatalf("RunMakeBloom failed: %v", err) } // 验证输出文件存在 if _, err := os.Stat(outputFile); os.IsNotExist(err) { t.Error("Output file was not created") } // 加载并验证bitmap bf, err := bloom.LoadFromFile(outputFile, false) if err != nil { t.Fatalf("Failed to load created bitmap: %v", err) } // 验证数据 lines := strings.Split(strings.TrimSpace(testData), "\n") for _, line := range lines { if !bf.TestString(line) { t.Errorf("Element %s not found in created bitmap", line) } } } // TestRunHitTest 测试hittest命令 func TestRunHitTest(t *testing.T) { // 创建临时目录 tempDir := t.TempDir() // 创建测试bitmap bitmapFile := filepath.Join(tempDir, "test.bmp") dataFile := filepath.Join(tempDir, "test_data.txt") resultFile := filepath.Join(tempDir, "test_result.txt") // 创建原始数据 originalData := []string{"apple", "banana", "cherry", "date"} // 创建bitmap bf := bloom.NewWithEstimates(1000, 0.01) for _, data := range originalData { bf.AddString(data) } bf.Flush() err := bf.SaveToFile(bitmapFile) if err != nil { t.Fatalf("Failed to save bitmap: %v", err) } // 创建测试数据文件 testData := "apple\nbanana\nelderberry\nfig\n" err = os.WriteFile(dataFile, []byte(testData), 0644) if err != nil { t.Fatalf("Failed to create test data file: %v", err) } // 运行hittest命令 err = RunHitTest("-d", dataFile, "-b", bitmapFile, "-o", resultFile) if err != nil { t.Fatalf("RunHitTest failed: %v", err) } // 验证结果文件 resultContent, err := os.ReadFile(resultFile) if err != nil { t.Fatalf("Failed to read result file: %v", err) } lines := strings.Split(strings.TrimSpace(string(resultContent)), "\n") // 应该找到apple和banana,找不到elderberry和fig expectedHits := 2 actualHits := 0 for _, line := range lines { if strings.Contains(line, "apple") || strings.Contains(line, "banana") { actualHits++ } } if actualHits != expectedHits { t.Errorf("Expected %d hits, got %d", expectedHits, actualHits) } } // TestRunInfo 测试info命令 func TestRunInfo(t *testing.T) { // 创建临时目录 tempDir := t.TempDir() bitmapFile := filepath.Join(tempDir, "test.bmp") // 创建测试bitmap bf := bloom.NewWithEstimates(1000, 0.01) bf.AddString("apple") bf.AddString("banana") err := bf.SaveToFile(bitmapFile) if err != nil { t.Fatalf("Failed to save bitmap: %v", err) } // 运行info命令(这里只是测试不报错,实际输出需要手动验证) err = RunInfo("-b", bitmapFile) if err != nil { t.Errorf("RunInfo failed: %v", err) } } // TestRunAnd 测试and命令 func TestRunAnd(t *testing.T) { // 创建临时目录 tempDir := t.TempDir() // 创建两个bitmap文件 bitmapFile1 := filepath.Join(tempDir, "test1.bmp") bitmapFile2 := filepath.Join(tempDir, "test2.bmp") outputFile := filepath.Join(tempDir, "and_result.bmp") // 创建第一个bitmap bf1 := bloom.NewWithEstimates(1000, 0.01) bf1.AddString("apple") bf1.AddString("banana") bf1.AddString("cherry") bf1.Flush() err := bf1.SaveToFile(bitmapFile1) if err != nil { t.Fatalf("Failed to save bitmap1: %v", err) } // 创建第二个bitmap bf2 := bloom.NewWithEstimates(1000, 0.01) bf2.AddString("banana") bf2.AddString("cherry") bf2.AddString("date") bf2.Flush() err = bf2.SaveToFile(bitmapFile2) if err != nil { t.Fatalf("Failed to save bitmap2: %v", err) } // 运行and命令 err = RunAnd("-b1", bitmapFile1, "-b2", bitmapFile2, "-o", outputFile) if err != nil { t.Fatalf("RunAnd failed: %v", err) } // 验证结果 resultBf, err := bloom.LoadFromFile(outputFile, false) if err != nil { t.Fatalf("Failed to load result bitmap: %v", err) } // banana和cherry应该存在 if !resultBf.TestString("banana") || !resultBf.TestString("cherry") { t.Error("Expected common elements missing from AND result") } } // TestRunOr 测试or命令 func TestRunOr(t *testing.T) { // 创建临时目录 tempDir := t.TempDir() // 创建两个bitmap文件 bitmapFile1 := filepath.Join(tempDir, "test1.bmp") bitmapFile2 := filepath.Join(tempDir, "test2.bmp") outputFile := filepath.Join(tempDir, "or_result.bmp") // 创建第一个bitmap bf1 := bloom.NewWithEstimates(1000, 0.01) bf1.AddString("apple") bf1.AddString("banana") bf1.Flush() err := bf1.SaveToFile(bitmapFile1) if err != nil { t.Fatalf("Failed to save bitmap1: %v", err) } // 创建第二个bitmap bf2 := bloom.NewWithEstimates(1000, 0.01) bf2.AddString("cherry") bf2.AddString("date") bf2.Flush() err = bf2.SaveToFile(bitmapFile2) if err != nil { t.Fatalf("Failed to save bitmap2: %v", err) } // 运行or命令 err = RunOr("-b1", bitmapFile1, "-b2", bitmapFile2, "-o", outputFile) if err != nil { t.Fatalf("RunOr failed: %v", err) } // 验证结果 resultBf, err := bloom.LoadFromFile(outputFile, false) if err != nil { t.Fatalf("Failed to load result bitmap: %v", err) } // 所有元素应该存在 testElements := []string{"apple", "banana", "cherry", "date"} for _, element := range testElements { if !resultBf.TestString(element) { t.Errorf("Element %s missing from OR result", element) } } } // TestBloomFilterEdgeCases 测试边界情况 func TestBloomFilterEdgeCases(t *testing.T) { // 测试空字符串 bf := bloom.NewWithEstimates(100, 0.01) bf.AddString("") bf.Flush() if !bf.TestString("") { t.Error("Empty string should be testable") } // 测试长字符串 longString := strings.Repeat("a", 10000) bf.AddString(longString) bf.Flush() if !bf.TestString(longString) { t.Error("Long string should be testable") } // 测试特殊字符 specialString := "测试中文字符符!@#$%^&*()" bf.AddString(specialString) bf.Flush() if !bf.TestString(specialString) { t.Error("Special characters should be testable") } } // TestBloomFilterConcurrency 测试并发安全性 func TestBloomFilterConcurrency(t *testing.T) { bf := bloom.NewWithEstimates(10000, 0.01) // 使用多个goroutine并发添加元素 done := make(chan bool, 10) for i := 0; i < 10; i++ { go func(id int) { for j := 0; j < 100; j++ { data := fmt.Sprintf("item_%d_%d", id, j) bf.AddString(data) } done <- true }(i) } // 等待所有goroutine完成 for i := 0; i < 10; i++ { <-done } // 刷新所有缓冲区 bf.Flush() // 验证所有元素都存在 for i := 0; i < 10; i++ { for j := 0; j < 100; j++ { data := fmt.Sprintf("item_%d_%d", i, j) if !bf.TestString(data) { t.Errorf("Concurrent element %s not found", data) } } } } // BenchmarkBloomFilterAdd 性能测试:添加元素 func BenchmarkBloomFilterAdd(b *testing.B) { bf := bloom.NewWithEstimates(1000000, 0.01) b.ResetTimer() for i := 0; i < b.N; i++ { bf.AddString(fmt.Sprintf("item_%d", i)) } bf.Flush() } // BenchmarkBloomFilterTest 性能测试:测试元素 func BenchmarkBloomFilterTest(b *testing.B) { bf := bloom.NewWithEstimates(100000, 0.01) // 预先添加一些元素 for i := 0; i < 1000; i++ { bf.AddString(fmt.Sprintf("item_%d", i)) } bf.Flush() b.ResetTimer() for i := 0; i < b.N; i++ { bf.TestString(fmt.Sprintf("item_%d", i%1000)) } } // BenchmarkBloomFilterAndOperation 性能测试:AND操作 func BenchmarkBloomFilterAndOperation(b *testing.B) { bf1 := bloom.NewWithEstimates(10000, 0.01) bf2 := bloom.NewWithEstimates(10000, 0.01) // 预先添加元素 for i := 0; i < 1000; i++ { bf1.AddString(fmt.Sprintf("item_%d", i)) bf2.AddString(fmt.Sprintf("item_%d", i)) } bf1.Flush() bf2.Flush() b.ResetTimer() for i := 0; i < b.N; i++ { _, err := bf1.And(bf2) if err != nil { b.Fatal(err) } } } // BenchmarkBloomFilterOrOperation 性能测试:OR操作 func BenchmarkBloomFilterOrOperation(b *testing.B) { bf1 := bloom.NewWithEstimates(10000, 0.01) bf2 := bloom.NewWithEstimates(10000, 0.01) // 预先添加元素 for i := 0; i < 1000; i++ { bf1.AddString(fmt.Sprintf("item_%d", i)) bf2.AddString(fmt.Sprintf("item_%d", i+1000)) } bf1.Flush() bf2.Flush() b.ResetTimer() for i := 0; i < b.N; i++ { _, err := bf1.Or(bf2) if err != nil { b.Fatal(err) } } }