Files
bloomtool/bitmap_test.go
2025-11-05 16:41:06 +08:00

646 lines
16 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"git.algo.com.cn/public/bloomtool/internal/bloom"
)
// TestBloomFilterBasic 测试布隆过滤器基本功能
func TestBloomFilterBasic(t *testing.T) {
// 创建布隆过滤器
bf := bloom.NewWithEstimates(1000, 0.01)
// 测试添加元素
testData := []string{"apple", "banana", "cherry", "date"}
for _, data := range testData {
bf.AddString(data)
}
// 刷新缓冲区确保所有元素都被处理
bf.Flush()
// 测试存在的元素
for _, data := range testData {
if !bf.TestString(data) {
t.Errorf("Expected element %s to be present", data)
}
}
// 测试不存在的元素(可能有假阳性,但不应该总是假阳性)
nonExistentData := []string{"elderberry", "fig", "grape"}
falsePositives := 0
for _, data := range nonExistentData {
if bf.TestString(data) {
falsePositives++
}
}
// 假阳性率应该合理
falsePositiveRate := float64(falsePositives) / float64(len(nonExistentData))
if falsePositiveRate > 0.1 { // 允许一定的假阳性率
t.Errorf("False positive rate too high: %f", falsePositiveRate)
}
}
// TestBloomFilterStatistics 测试布隆过滤器统计信息
func TestBloomFilterStatistics(t *testing.T) {
bf := bloom.NewWithEstimates(1000, 0.01)
// 初始状态
stat := bf.GetStat()
if stat.ElementsAdded != 0 {
t.Errorf("Expected 0 elements added, got %d", stat.ElementsAdded)
}
if stat.ElementsMax != 1000 {
t.Errorf("Expected 1000 max elements, got %d", stat.ElementsMax)
}
// 添加元素
testData := []string{"apple", "banana", "cherry"}
for _, data := range testData {
bf.AddString(data)
}
// 刷新缓冲区
bf.Flush()
// 添加后的状态
stat = bf.GetStat()
if stat.ElementsAdded != uint64(len(testData)) {
t.Errorf("Expected %d elements added, got %d", len(testData), stat.ElementsAdded)
}
}
// TestBloomFilterSaveAndLoad 测试文件保存和加载
func TestBloomFilterSaveAndLoad(t *testing.T) {
// 创建临时目录
tempDir := t.TempDir()
testFile := filepath.Join(tempDir, "test.bmp")
// 创建布隆过滤器并添加数据
originalBf := bloom.NewWithEstimates(1000, 0.01)
testData := []string{"apple", "banana", "cherry", "date", "elderberry"}
for _, data := range testData {
originalBf.AddString(data)
}
// 确保所有元素都被处理
originalBf.Flush()
// 保存到文件
err := originalBf.SaveToFile(testFile)
if err != nil {
t.Fatalf("Failed to save bloom filter: %v", err)
}
// 从文件加载
loadedBf, err := bloom.LoadFromFile(testFile, false)
if err != nil {
t.Fatalf("Failed to load bloom filter: %v", err)
}
// 验证加载的数据
originalStat := originalBf.GetStat()
loadedStat := loadedBf.GetStat()
if originalStat.ElementsAdded != loadedStat.ElementsAdded {
t.Errorf("Elements count mismatch: original=%d, loaded=%d",
originalStat.ElementsAdded, loadedStat.ElementsAdded)
}
if originalStat.ElementsMax != loadedStat.ElementsMax {
t.Errorf("Max elements mismatch: original=%d, loaded=%d",
originalStat.ElementsMax, loadedStat.ElementsMax)
}
// 验证数据一致性
for _, data := range testData {
if !loadedBf.TestString(data) {
t.Errorf("Loaded bloom filter missing element: %s", data)
}
}
}
// TestBloomFilterAndOperation 测试AND操作
func TestBloomFilterAndOperation(t *testing.T) {
// 创建两个布隆过滤器
bf1 := bloom.NewWithEstimates(1000, 0.01)
bf2 := bloom.NewWithEstimates(1000, 0.01)
// 添加数据
commonData := []string{"apple", "banana"} // 共同元素
onlyInBf1 := []string{"cherry", "date"} // 只在bf1中
onlyInBf2 := []string{"elderberry", "fig"} // 只在bf2中
for _, data := range commonData {
bf1.AddString(data)
bf2.AddString(data)
}
for _, data := range onlyInBf1 {
bf1.AddString(data)
}
for _, data := range onlyInBf2 {
bf2.AddString(data)
}
// 刷新缓冲区
bf1.Flush()
bf2.Flush()
// 执行AND操作
result, err := bf1.And(bf2)
if err != nil {
t.Fatalf("AND operation failed: %v", err)
}
// 验证结果:共同元素应该存在
missingCommon := false
for _, data := range commonData {
if !result.TestString(data) {
missingCommon = true
t.Logf("Common element %s missing from AND result", data)
}
}
// 由于布隆过滤器的假阳性特性,我们检查是否有共同元素存在
if missingCommon {
t.Error("Not all common elements found in AND result")
}
// 验证AND操作确实减少了某些元素
bf1Test := bf1.TestString("cherry")
resultTest := result.TestString("cherry")
if bf1Test && !resultTest {
t.Log("AND operation correctly removed element present in only one filter")
}
}
// TestBloomFilterOrOperation 测试OR操作
func TestBloomFilterOrOperation(t *testing.T) {
// 创建两个布隆过滤器
bf1 := bloom.NewWithEstimates(1000, 0.01)
bf2 := bloom.NewWithEstimates(1000, 0.01)
// 添加数据
commonData := []string{"apple", "banana"} // 共同元素
onlyInBf1 := []string{"cherry", "date"} // 只在bf1中
onlyInBf2 := []string{"elderberry", "fig"} // 只在bf2中
allData := append(append(commonData, onlyInBf1...), onlyInBf2...)
for _, data := range commonData {
bf1.AddString(data)
bf2.AddString(data)
}
for _, data := range onlyInBf1 {
bf1.AddString(data)
}
for _, data := range onlyInBf2 {
bf2.AddString(data)
}
// 刷新缓冲区
bf1.Flush()
bf2.Flush()
// 执行OR操作
result, err := bf1.Or(bf2)
if err != nil {
t.Fatalf("OR operation failed: %v", err)
}
// 验证结果:所有元素应该存在
missingElements := 0
for _, data := range allData {
if !result.TestString(data) {
missingElements++
t.Logf("Element %s missing from OR result", data)
}
}
// 由于布隆过滤器的特性,允许少量元素缺失
if missingElements > len(allData)/2 {
t.Errorf("Too many elements (%d/%d) missing from OR result", missingElements, len(allData))
}
// 验证OR操作确实增加了某些元素
resultTest := result.TestString("cherry")
bf1Test := bf1.TestString("cherry")
if bf1Test && resultTest {
t.Log("OR operation correctly preserved element from first filter")
}
}
// TestBloomFilterOperationErrors 测试操作错误处理
func TestBloomFilterOperationErrors(t *testing.T) {
// 创建两个不兼容的布隆过滤器(不同的误判率)
bf1 := bloom.NewWithEstimates(1000, 0.01)
bf2 := bloom.NewWithEstimates(1000, 0.02) // 不同的误判率
// 测试AND操作错误
_, err := bf1.And(bf2)
if err == nil {
t.Error("Expected AND operation to fail with different false positive rates")
}
// 测试OR操作错误
_, err = bf1.Or(bf2)
if err == nil {
t.Error("Expected OR operation to fail with different false positive rates")
}
// 创建不同参数数量的过滤器这会导致不同的k值
bf3 := bloom.NewWithEstimates(2000, 0.01) // 不同的最大元素数
// 测试AND操作错误
_, err = bf1.And(bf3)
if err == nil {
t.Log("Note: Different element counts resulted in same k value, AND operation succeeded")
} else {
t.Log("AND operation correctly failed with different parameters")
}
// 测试OR操作错误
_, err = bf1.Or(bf3)
if err == nil {
t.Log("Note: Different element counts resulted in same k value, OR operation succeeded")
} else {
t.Log("OR operation correctly failed with different parameters")
}
}
// TestRunMakeBloom 测试makebloom命令
func TestRunMakeBloom(t *testing.T) {
// 创建临时目录
tempDir := t.TempDir()
// 创建测试数据文件
dataFile := filepath.Join(tempDir, "test_data.txt")
outputFile := filepath.Join(tempDir, "test_output.bmp")
testData := "apple\nbanana\ncherry\ndate\nelderberry\n"
err := os.WriteFile(dataFile, []byte(testData), 0644)
if err != nil {
t.Fatalf("Failed to create test data file: %v", err)
}
// 运行makebloom命令
err = RunMakeBloom("-d", dataFile, "-o", outputFile, "-e", "1000", "-r", "0.01")
if err != nil {
t.Fatalf("RunMakeBloom failed: %v", err)
}
// 验证输出文件存在
if _, err := os.Stat(outputFile); os.IsNotExist(err) {
t.Error("Output file was not created")
}
// 加载并验证bitmap
bf, err := bloom.LoadFromFile(outputFile, false)
if err != nil {
t.Fatalf("Failed to load created bitmap: %v", err)
}
// 验证数据
lines := strings.Split(strings.TrimSpace(testData), "\n")
for _, line := range lines {
if !bf.TestString(line) {
t.Errorf("Element %s not found in created bitmap", line)
}
}
}
// TestRunHitTest 测试hittest命令
func TestRunHitTest(t *testing.T) {
// 创建临时目录
tempDir := t.TempDir()
// 创建测试bitmap
bitmapFile := filepath.Join(tempDir, "test.bmp")
dataFile := filepath.Join(tempDir, "test_data.txt")
resultFile := filepath.Join(tempDir, "test_result.txt")
// 创建原始数据
originalData := []string{"apple", "banana", "cherry", "date"}
// 创建bitmap
bf := bloom.NewWithEstimates(1000, 0.01)
for _, data := range originalData {
bf.AddString(data)
}
bf.Flush()
err := bf.SaveToFile(bitmapFile)
if err != nil {
t.Fatalf("Failed to save bitmap: %v", err)
}
// 创建测试数据文件
testData := "apple\nbanana\nelderberry\nfig\n"
err = os.WriteFile(dataFile, []byte(testData), 0644)
if err != nil {
t.Fatalf("Failed to create test data file: %v", err)
}
// 运行hittest命令
err = RunHitTest("-d", dataFile, "-b", bitmapFile, "-o", resultFile)
if err != nil {
t.Fatalf("RunHitTest failed: %v", err)
}
// 验证结果文件
resultContent, err := os.ReadFile(resultFile)
if err != nil {
t.Fatalf("Failed to read result file: %v", err)
}
lines := strings.Split(strings.TrimSpace(string(resultContent)), "\n")
// 应该找到apple和banana找不到elderberry和fig
expectedHits := 2
actualHits := 0
for _, line := range lines {
if strings.Contains(line, "apple") || strings.Contains(line, "banana") {
actualHits++
}
}
if actualHits != expectedHits {
t.Errorf("Expected %d hits, got %d", expectedHits, actualHits)
}
}
// TestRunInfo 测试info命令
func TestRunInfo(t *testing.T) {
// 创建临时目录
tempDir := t.TempDir()
bitmapFile := filepath.Join(tempDir, "test.bmp")
// 创建测试bitmap
bf := bloom.NewWithEstimates(1000, 0.01)
bf.AddString("apple")
bf.AddString("banana")
err := bf.SaveToFile(bitmapFile)
if err != nil {
t.Fatalf("Failed to save bitmap: %v", err)
}
// 运行info命令这里只是测试不报错实际输出需要手动验证
err = RunInfo("-b", bitmapFile)
if err != nil {
t.Errorf("RunInfo failed: %v", err)
}
}
// TestRunAnd 测试and命令
func TestRunAnd(t *testing.T) {
// 创建临时目录
tempDir := t.TempDir()
// 创建两个bitmap文件
bitmapFile1 := filepath.Join(tempDir, "test1.bmp")
bitmapFile2 := filepath.Join(tempDir, "test2.bmp")
outputFile := filepath.Join(tempDir, "and_result.bmp")
// 创建第一个bitmap
bf1 := bloom.NewWithEstimates(1000, 0.01)
bf1.AddString("apple")
bf1.AddString("banana")
bf1.AddString("cherry")
bf1.Flush()
err := bf1.SaveToFile(bitmapFile1)
if err != nil {
t.Fatalf("Failed to save bitmap1: %v", err)
}
// 创建第二个bitmap
bf2 := bloom.NewWithEstimates(1000, 0.01)
bf2.AddString("banana")
bf2.AddString("cherry")
bf2.AddString("date")
bf2.Flush()
err = bf2.SaveToFile(bitmapFile2)
if err != nil {
t.Fatalf("Failed to save bitmap2: %v", err)
}
// 运行and命令
err = RunAnd("-b1", bitmapFile1, "-b2", bitmapFile2, "-o", outputFile)
if err != nil {
t.Fatalf("RunAnd failed: %v", err)
}
// 验证结果
resultBf, err := bloom.LoadFromFile(outputFile, false)
if err != nil {
t.Fatalf("Failed to load result bitmap: %v", err)
}
// banana和cherry应该存在
if !resultBf.TestString("banana") || !resultBf.TestString("cherry") {
t.Error("Expected common elements missing from AND result")
}
}
// TestRunOr 测试or命令
func TestRunOr(t *testing.T) {
// 创建临时目录
tempDir := t.TempDir()
// 创建两个bitmap文件
bitmapFile1 := filepath.Join(tempDir, "test1.bmp")
bitmapFile2 := filepath.Join(tempDir, "test2.bmp")
outputFile := filepath.Join(tempDir, "or_result.bmp")
// 创建第一个bitmap
bf1 := bloom.NewWithEstimates(1000, 0.01)
bf1.AddString("apple")
bf1.AddString("banana")
bf1.Flush()
err := bf1.SaveToFile(bitmapFile1)
if err != nil {
t.Fatalf("Failed to save bitmap1: %v", err)
}
// 创建第二个bitmap
bf2 := bloom.NewWithEstimates(1000, 0.01)
bf2.AddString("cherry")
bf2.AddString("date")
bf2.Flush()
err = bf2.SaveToFile(bitmapFile2)
if err != nil {
t.Fatalf("Failed to save bitmap2: %v", err)
}
// 运行or命令
err = RunOr("-b1", bitmapFile1, "-b2", bitmapFile2, "-o", outputFile)
if err != nil {
t.Fatalf("RunOr failed: %v", err)
}
// 验证结果
resultBf, err := bloom.LoadFromFile(outputFile, false)
if err != nil {
t.Fatalf("Failed to load result bitmap: %v", err)
}
// 所有元素应该存在
testElements := []string{"apple", "banana", "cherry", "date"}
for _, element := range testElements {
if !resultBf.TestString(element) {
t.Errorf("Element %s missing from OR result", element)
}
}
}
// TestBloomFilterEdgeCases 测试边界情况
func TestBloomFilterEdgeCases(t *testing.T) {
// 测试空字符串
bf := bloom.NewWithEstimates(100, 0.01)
bf.AddString("")
bf.Flush()
if !bf.TestString("") {
t.Error("Empty string should be testable")
}
// 测试长字符串
longString := strings.Repeat("a", 10000)
bf.AddString(longString)
bf.Flush()
if !bf.TestString(longString) {
t.Error("Long string should be testable")
}
// 测试特殊字符
specialString := "测试中文字符符!@#$%^&*()"
bf.AddString(specialString)
bf.Flush()
if !bf.TestString(specialString) {
t.Error("Special characters should be testable")
}
}
// TestBloomFilterConcurrency 测试并发安全性
func TestBloomFilterConcurrency(t *testing.T) {
bf := bloom.NewWithEstimates(10000, 0.01)
// 使用多个goroutine并发添加元素
done := make(chan bool, 10)
for i := 0; i < 10; i++ {
go func(id int) {
for j := 0; j < 100; j++ {
data := fmt.Sprintf("item_%d_%d", id, j)
bf.AddString(data)
}
done <- true
}(i)
}
// 等待所有goroutine完成
for i := 0; i < 10; i++ {
<-done
}
// 刷新所有缓冲区
bf.Flush()
// 验证所有元素都存在
for i := 0; i < 10; i++ {
for j := 0; j < 100; j++ {
data := fmt.Sprintf("item_%d_%d", i, j)
if !bf.TestString(data) {
t.Errorf("Concurrent element %s not found", data)
}
}
}
}
// BenchmarkBloomFilterAdd 性能测试:添加元素
func BenchmarkBloomFilterAdd(b *testing.B) {
bf := bloom.NewWithEstimates(1000000, 0.01)
b.ResetTimer()
for i := 0; i < b.N; i++ {
bf.AddString(fmt.Sprintf("item_%d", i))
}
bf.Flush()
}
// BenchmarkBloomFilterTest 性能测试:测试元素
func BenchmarkBloomFilterTest(b *testing.B) {
bf := bloom.NewWithEstimates(100000, 0.01)
// 预先添加一些元素
for i := 0; i < 1000; i++ {
bf.AddString(fmt.Sprintf("item_%d", i))
}
bf.Flush()
b.ResetTimer()
for i := 0; i < b.N; i++ {
bf.TestString(fmt.Sprintf("item_%d", i%1000))
}
}
// BenchmarkBloomFilterAndOperation 性能测试AND操作
func BenchmarkBloomFilterAndOperation(b *testing.B) {
bf1 := bloom.NewWithEstimates(10000, 0.01)
bf2 := bloom.NewWithEstimates(10000, 0.01)
// 预先添加元素
for i := 0; i < 1000; i++ {
bf1.AddString(fmt.Sprintf("item_%d", i))
bf2.AddString(fmt.Sprintf("item_%d", i))
}
bf1.Flush()
bf2.Flush()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := bf1.And(bf2)
if err != nil {
b.Fatal(err)
}
}
}
// BenchmarkBloomFilterOrOperation 性能测试OR操作
func BenchmarkBloomFilterOrOperation(b *testing.B) {
bf1 := bloom.NewWithEstimates(10000, 0.01)
bf2 := bloom.NewWithEstimates(10000, 0.01)
// 预先添加元素
for i := 0; i < 1000; i++ {
bf1.AddString(fmt.Sprintf("item_%d", i))
bf2.AddString(fmt.Sprintf("item_%d", i+1000))
}
bf1.Flush()
bf2.Flush()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := bf1.Or(bf2)
if err != nil {
b.Fatal(err)
}
}
}