什么是布隆过滤器?
以下摘自百度百科
布隆过滤器(BloomFilter)是1970年由布隆提出的。它实际上是一个很长的二进制向量和一系列随机映射函数。布隆过滤器可以用于检索一个元素是否在一个集合中。它的优点是空间效率和查询时间都比一般的算法要好的多,缺点是有一定的误识别率和删除困难。
以下为借鉴其他资料实现布隆过滤器代码
package com.zmx.common.common.utils.bitmap; import java.util.BitSet; import java.util.concurrent.atomic.AtomicInteger; public class BloomFileter { private int size; private BitSet bitSet; private double clearRate; private boolean isClearRate = false; private int[] seeds; private AtomicInteger count = new AtomicInteger(0); public BloomFileter(){} public BloomFileter(int capacity, double clearRate) { this(SeedsEnum.MIDDLE, capacity, clearRate); } public BloomFileter(SeedsEnum seedsEnum, int capacity, double clearRate) { this.size = seedsEnum.seeds.length * capacity; this.seeds = seedsEnum.seeds; this.bitSet = new BitSet(this.size); this.clearRate = clearRate; this.isClearRate = clearRate > 0; } private void add(String element){ //检查是否需要清除旧数据 checkNeedClear(); for (int i = 0;i < seeds.length; i++){ //计算hash值 int hashCode = hash(element, seeds[i]); setBitSet(hashCode); } } private boolean contain(String element) { boolean exit = true; for (int i = 0; i < seeds.length; i++) { //计算hash值 int hashCode = hash(element, seeds[i]); if (!bitSet.get(hashCode)) { exit = false; break; } } return exit; } private boolean addIfNoExit(String element) { //检查是否需要清除旧数据 checkNeedClear(); boolean exit = true; int[] hashCodes = new int[seeds.length]; for (int i = 0; i < seeds.length; i++) { //计算hash值 int hashCode = hash(element, seeds[i]); hashCodes[i] = hashCode; //重点说明下这段代码,只要有一次hash不存在,就说明这个元素不存在。 //遍历整个hash判断,如果存在就去bitSet中判断当前hash是否为true,如果为fale,说明不存在, //那么当前hash值及之前都要存入bitSet中,if(exit)会自动将剩余hash值放入bitSet中 //如此便可完成存在返回true,不存在就放入bitSet中,返回false if (exit) { if (!bitSet.get(hashCode)) { exit = false; //补充原有 for (int j = 0; j < i + 1; j++) { setBitSet(hashCodes[j]); } } } else { setBitSet(hashCode); } } return exit; } private void checkNeedClear() { if (isClearRate) { if (usedRate() >= clearRate) { synchronized (this) { if (usedRate() >= clearRate) { bitSet.clear(); count.set(0); } } } } } private void setBitSet(int hashCode) { //总数量原则自增 count.incrementAndGet(); bitSet.set(hashCode, true); } private double usedRate() { return (double) this.count.intValue() / this.size; } private int hash(String element, int seed) { char[] chars = element.toCharArray(); int hashCode = 0; for (int i = 0; i < chars.length; i++) { hashCode = i * hashCode + chars[i]; } hashCode = hashCode * seed % size; // 防止溢出变成负数 return Math.abs(hashCode); } public enum SeedsEnum { VERY_SMALL(new int[]{2, 3, 5, 7}), SMALL(new int[]{2, 3, 5, 7, 11, 13, 17, 19}), MIDDLE(new int[]{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53}), HIGH(new int[]{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131}); private int[] seeds; private SeedsEnum(int[] seeds) { this.seeds = seeds; } public int[] getSeeds() { return seeds; } public void setSeeds(int[] seeds) { this.seeds = seeds; } } public static void main(String[] args) { BloomFileter bloomFileter = new BloomFileter(1100000, 0.9); long addStart = System.currentTimeMillis(); bloomFileter.add("爱"); bloomFileter.add("意"); bloomFileter.add("随"); bloomFileter.add("风"); bloomFileter.add("起"); System.out.println(bloomFileter.addIfNoExit("风")); System.out.println(bloomFileter.addIfNoExit("起")); System.out.println(bloomFileter.addIfNoExit("意")); System.out.println(bloomFileter.addIfNoExit("难")); System.out.println(bloomFileter.addIfNoExit("平")); for (int i = 0; i < 1000000; i++) { bloomFileter.add(String.valueOf(i)); } System.out.println("存储元素用时:" + "" + (System.currentTimeMillis() - addStart)); System.out.println("----------------over----------------"); System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / 1024 +"KB"); System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / (1024 * 1024) +"MB"); System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / (1024 * 1024 * 1024) +"MB"); System.out.println("------------------------------------"); System.out.println(bloomFileter.addIfNoExit("难")); System.out.println(bloomFileter.addIfNoExit("平")); } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)