自定义实现布隆过滤器

自定义实现布隆过滤器,第1张

自定义实现布隆过滤器

什么是布隆过滤器?

以下摘自百度百科

布隆过滤器(BloomFilter)是1970年由布隆提出的。它实际上是一个很长的二进制向量和一系列随机映射函数。布隆过滤器可以用于检索一个元素是否在一个集合中。它的优点是空间效率和查询时间都比一般的算法要好的多,缺点是有一定的误识别率和删除困难。

以下为借鉴其他资料实现布隆过滤器代码

package com.zmx.common.common.utils.bitmap;

import java.util.BitSet;
import java.util.concurrent.atomic.AtomicInteger;


public class BloomFileter {

    
    private int size;

    
    private BitSet bitSet;

    
    private double clearRate;

    private boolean isClearRate = false;

    
    private int[] seeds;

    
    private AtomicInteger count = new AtomicInteger(0);

    public BloomFileter(){}

    public BloomFileter(int capacity, double clearRate) {
        this(SeedsEnum.MIDDLE, capacity, clearRate);
    }

    public BloomFileter(SeedsEnum seedsEnum, int capacity, double clearRate) {
        this.size = seedsEnum.seeds.length * capacity;
        this.seeds = seedsEnum.seeds;
        this.bitSet = new BitSet(this.size);
        this.clearRate = clearRate;
        this.isClearRate = clearRate > 0;
    }

    
    private void add(String element){
        //检查是否需要清除旧数据
        checkNeedClear();

        for (int i = 0;i < seeds.length; i++){
            //计算hash值
            int hashCode = hash(element, seeds[i]);
            setBitSet(hashCode);
        }
    }

    
    private boolean contain(String element) {
        boolean exit = true;
        for (int i = 0; i < seeds.length; i++) {
            //计算hash值
            int hashCode = hash(element, seeds[i]);
            if (!bitSet.get(hashCode)) {
                exit = false;
                break;
            }
        }
        return exit;
    }

    
    private boolean addIfNoExit(String element) {
        //检查是否需要清除旧数据
        checkNeedClear();

        boolean exit = true;
        int[] hashCodes = new int[seeds.length];

        for (int i = 0; i < seeds.length; i++) {
            //计算hash值
            int hashCode = hash(element, seeds[i]);
            hashCodes[i] = hashCode;

            //重点说明下这段代码,只要有一次hash不存在,就说明这个元素不存在。

            //遍历整个hash判断,如果存在就去bitSet中判断当前hash是否为true,如果为fale,说明不存在,
            //那么当前hash值及之前都要存入bitSet中,if(exit)会自动将剩余hash值放入bitSet中
            //如此便可完成存在返回true,不存在就放入bitSet中,返回false
            if (exit) {
                if (!bitSet.get(hashCode)) {
                    exit = false;

                    //补充原有
                    for (int j = 0; j < i + 1; j++) {
                        setBitSet(hashCodes[j]);
                    }
                }
            } else {
                setBitSet(hashCode);
            }

        }
        return exit;
    }

    private void checkNeedClear() {
        if (isClearRate) {
            if (usedRate() >= clearRate) {
                synchronized (this) {
                    if (usedRate() >= clearRate) {
                        bitSet.clear();
                        count.set(0);
                    }
                }
            }
        }
    }

    private void setBitSet(int hashCode) {
        //总数量原则自增
        count.incrementAndGet();
        bitSet.set(hashCode, true);
    }

    private double usedRate() {
        return (double) this.count.intValue() / this.size;
    }

    private int hash(String element, int seed) {
        char[] chars = element.toCharArray();
        int hashCode = 0;

        for (int i = 0; i < chars.length; i++) {
            hashCode = i * hashCode + chars[i];
        }

        hashCode = hashCode * seed % size;
        // 防止溢出变成负数
        return Math.abs(hashCode);
    }

    public enum SeedsEnum {
        
        VERY_SMALL(new int[]{2, 3, 5, 7}),
        
        SMALL(new int[]{2, 3, 5, 7, 11, 13, 17, 19}),
        
        MIDDLE(new int[]{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53}),
        
        HIGH(new int[]{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
                101, 103, 107, 109, 113, 127, 131});

        private int[] seeds;

        private SeedsEnum(int[] seeds) {
            this.seeds = seeds;
        }

        public int[] getSeeds() {
            return seeds;
        }

        public void setSeeds(int[] seeds) {
            this.seeds = seeds;
        }
    }

    public static void main(String[] args) {
        BloomFileter bloomFileter = new BloomFileter(1100000, 0.9);
        long addStart = System.currentTimeMillis();
        bloomFileter.add("爱");
        bloomFileter.add("意");
        bloomFileter.add("随");
        bloomFileter.add("风");
        bloomFileter.add("起");

        System.out.println(bloomFileter.addIfNoExit("风"));
        System.out.println(bloomFileter.addIfNoExit("起"));
        System.out.println(bloomFileter.addIfNoExit("意"));
        System.out.println(bloomFileter.addIfNoExit("难"));
        System.out.println(bloomFileter.addIfNoExit("平"));
        for (int i = 0; i < 1000000; i++) {
            bloomFileter.add(String.valueOf(i));
        }
        System.out.println("存储元素用时:" + "" + (System.currentTimeMillis() - addStart));
        System.out.println("----------------over----------------");

        System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / 1024 +"KB");
        System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / (1024 * 1024) +"MB");
        System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / (1024 * 1024 * 1024) +"MB");

        System.out.println("------------------------------------");

        System.out.println(bloomFileter.addIfNoExit("难"));
        System.out.println(bloomFileter.addIfNoExit("平"));
    }
}

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5637175.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-16
下一篇 2022-12-16

发表评论

登录后才能评论

评论列表(0条)

保存