红黑树hashmap-RbtHashMap实现

红黑树hashmap-RbtHashMap实现,第1张

      在之前的文章(自定义固定长度map)中实现过一个固定长度的map,其目的主要是为了实现固定长度在插入和删除过程中避免new和delete的内存调用,重复利用内存,但是它有一个很明显的缺陷,就是因为是固定长度冲突率会比标准库的map高,当冲突率高到一定长度时,其查找和删除效率会直线下降,所以为了进一步优化它的实现,就把发生冲突的元素由链表维护改为红黑树维护,这样最坏的情况下所有的元素的key的hash值都一样也只是退化为std::map而不是一个链表,因此带来此种情况下插入和删除性能的极大的提升。

      

std::unordered_map和之前的实现方式hash+单链表

新的实现方式hash+RBTree

     通过对比可以很直观的看到,当最坏的情况下所有的元素都为同一个hash值的时候,两者的区别一个退化为链表而新的实现则为一棵红黑树,查找效率有着天壤之别。

完整源码实现链接:

gitee地址https://gitee.com/DGuco/dsalgorithm/tree/master/rbtarr_mapgithub地址https://github.com/DGuco/DSAlgorithm/tree/master/rbtarr_map

测试用例:

//
// Created by DGuco on 2022/4/13.
// email:1139140929@qq.com
//
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include "rbthash_map.h"
#include "rb_tree.h"


using namespace rbt_hash;
using namespace std;
#define TEST_COUNT 1000000
#define REMOVE_COUNT 50000
#define RB_COUNT 5000
#define HASH_CONFLICT_COUNT 5
#define HASH_CONFLICT_COUNT1 10000
#define COMSUME_KEEP_TIME 60 * 1000 * 1000
void testRBTree()
{
    printf("==========================test rbtree start===================================\n");
    // 设置种子
    srand( (unsigned)time( NULL ) );
    int *a= new int[RB_COUNT];
    RBTNode* node = new RBTNode[RB_COUNT];
    RBTreetree(node,0);
    cout << "== 原始数据: ";
    int i = 0;
    std::unordered_map stdmap;
    for(i=0; i < RB_COUNT; i++)
    {
        int key = rand();
        while (1)
        {
            if(stdmap.find(key) == stdmap.end())
            {
                break;
            }else
            {
                key = rand();
            }
        }
        a[i] = key;
        stdmap.insert(std::make_pair(key,1));
        cout << a[i] <<" ";
        node[i].init_rb();
        node[i].set_key(a[i]);
    }
    cout << endl;

    for(i=0; i*> resList;
    resList.clear();
    tree.preOrder(resList);
    std::list*>::iterator it = resList.begin();
    for(;it != resList.end();it++)
    {
        cout << (*it)->get_key() << " ";
    }
    cout << "\n== 中序遍历: ";
    resList.clear();
    tree.inOrder(resList);
    it = resList.begin();
    for(;it != resList.end();it++)
    {
        cout << (*it)->get_key() << " ";
    }
    cout << "\n== 后序遍历: ";
    resList.clear();
    tree.postOrder(resList);
    it = resList.begin();
    for(;it != resList.end();it++)
    {
        cout << (*it)->get_key() << " ";
    }
    cout << endl;
    cout << "== 最小值: " << tree.minimum()->get_key() << endl;
    cout << "== 最大值: " << tree.maximum()->get_key() << endl;
//    cout << "== 树的详细信息: " << endl;
//    tree.print();
    printf("== test rbtree insert done\n");

    for(i=0; i= 0;
    }

    int a;
};

void testInsert()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    printf("==========================test insert start===================================\n");
    RbtHashMap* testMap = new RbtHashMap();
    std::unordered_map stdmap;
    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            testMap->insert(key,key);
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }

    if(stdmap.size() != testMap->size())
    {
        printf("test insert failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    RbtHashMap::iterator it = testMap->begin();
    for(;it != testMap->end();it++)
    {
        std::unordered_map::iterator  stdit = stdmap.find(it->first);
        if(stdit == stdmap.end())
        {
            printf("test insert failed never insert key = %d\n",it->first);
            exit(0);
        }
        if(it->second->a != stdit->second.a)
        {
            printf("test insert failed stdvalue= %d,rbtvalue = %d\n",stdit->second.a,it->second->a);
            exit(0);
        }
        //printf("it->first = %d,it->second = %d\n",it->first,it->second->a);
        stdmap.erase(it->first);
    }
    if(stdmap.size() > 0)
    {
        printf("test insert failed look map not all,last =  %d\n",stdmap.size());
        exit(0);
    }
    printf("==========================test insert done===================================\n");
    delete testMap;
    testMap = NULL;
}

void testremove()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    printf("==========================test remove begin===================================\n");
    RbtHashMap* testMap = new RbtHashMap();
    std::unordered_map stdmap;
    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            testMap->insert(key,key);
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }

    std::unordered_map::iterator stdit = stdmap.begin();
    int count = 0;
    for(;stdit != stdmap.end();)
    {
        if(!testMap->erase_check(stdit->first))
        {
            printf("test remove failed never remove key = %d\n",stdit->first);
            exit(0);
        }
        stdit = stdmap.erase(stdit);
        count++;
        if(count >= REMOVE_COUNT)
        {
            break;
        }
    }

    if(stdmap.size() != testMap->size())
    {
        printf("test remove failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    RbtHashMap::iterator it = testMap->begin();
    count = 0;
    for(;it != testMap->end();it++)
    {
        count++;
        std::unordered_map::iterator  stdit = stdmap.find(it->first);
        if(stdit == stdmap.end())
        {
            printf("test remove failed never lost key = %d\n",it->first);
            exit(0);
        }
        if(it->second->a != stdit->second.a)
        {
            printf("test remove failed stdvalue= %d,rbtvalue = %d\n",stdit->second.a,it->second->a);
            exit(0);
        }
        stdmap.erase(it->first);
    }
    if(stdmap.size() > 0)
    {
        printf("test remove failed look map not all,last =  %d\n",stdmap.size());
        exit(0);
    }

    RbtHashMap::iterator  beginit = testMap->begin();
    int erasecount = 0;
    int size = testMap->size();
    for(;beginit != testMap->end();)
    {
        beginit = testMap->erase(beginit);
        erasecount++;
    }
    if(erasecount != size || testMap->size() != 0)
    {
        printf("test remove failed size = %d,erasecount = %d\n",size,erasecount);
        exit(0);
    }
    printf("==========================test remove done===================================\n");
    delete testMap;
    testMap = NULL;
}

void testmempool()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    printf("==========================test mempool begin===================================\n");
    RbtHashMap* testMap = new RbtHashMap();
    std::unordered_map stdmap;
    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            testMap->insert(key,key);
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }

    std::unordered_map::iterator stdit = stdmap.begin();
    int count = 0;
    for(;stdit != stdmap.end();)
    {
        if(!testMap->erase_check(stdit->first))
        {
            printf("test mempool failed never remove key = %d\n",stdit->first);
            exit(0);
        }
        stdit = stdmap.erase(stdit);
        count++;
        if(count >= REMOVE_COUNT)
        {
            break;
        }
    }

    if(stdmap.size() != testMap->size())
    {
        printf("test mempool failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            if(!testMap->insert(key,key))
            {
                printf("test mempool failed some mem not reuse\n",stdmap.size(),testMap->size());
            }
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }
    if(stdmap.size() != testMap->size())
    {
        printf("test mempool failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    printf("==========================test mempool done===================================\n");
    delete testMap;
    testMap = NULL;
}

// 获取当前微秒
time_t GetUSTime()
{
    struct timeval tmval = {0};
    int nRetCode = gettimeofday(&tmval, NULL);
    if (nRetCode != 0)
    {
        return 0;
    }
    return ((tmval.tv_sec * 1000 * 1000) + tmval.tv_usec);
}

void testformance()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    time_t start = 0;
    time_t end = 0;
    unsigned long long res = 0;
    printf("---------------------------test performance begin---------------------------------------------\n");
    RbtHashMap* testMap = new RbtHashMap();
    start = GetUSTime();
    for(int i = 0;i < TEST_COUNT;i++)
    {
        testMap->insert(i * HASH_CONFLICT_COUNT, ValueType(i));
    }
    end = GetUSTime();
    printf("RbtHashMap[conflict count = %d], insert use  %ld ms\n",TEST_COUNT,HASH_CONFLICT_COUNT,(end - start) / 1000);
    start = GetUSTime();
    res = 0;
    for(int i = 0;i < TEST_COUNT;i++)
    {
        res += testMap->find(i * HASH_CONFLICT_COUNT)->second->a;
    }
    end = GetUSTime();
    printf("RbtHashMap[conflict count = %d] find use  %ld ms,res = %llu\n",TEST_COUNT,HASH_CONFLICT_COUNT,(end - start) / 1000,res);
    testMap->clear();
    start = GetUSTime();
    for(int i = 0;i < TEST_COUNT;i++)
    {
        testMap->insert(i * HASH_CONFLICT_COUNT1, ValueType(i));
    }
    end = GetUSTime();
    printf("RbtHashMap[conflict count = %d], insert use  %ld ms\n",TEST_COUNT,HASH_CONFLICT_COUNT1,(end - start) / 1000);
    start = GetUSTime();
    res = 0;
    for(int i = 0;i < TEST_COUNT;i++)
    {
        res += testMap->find(i * HASH_CONFLICT_COUNT1)->second->a;
    }
    end = GetUSTime();
    printf("RbtHashMap[conflict count = %d] find use  %ld ms,res = %llu\n",TEST_COUNT,HASH_CONFLICT_COUNT1,(end - start) / 1000,res);
    delete testMap;
    testMap = NULL;
    printf("------------------------------------------------------------------------\n");
    {
        std::map stdtMap;
        start = GetUSTime();
        for(int i = 0;i < TEST_COUNT;i++)
        {
            stdtMap.insert(std::make_pair(i * HASH_CONFLICT_COUNT, ValueType (i)));
        }
        end = GetUSTime();
        printf("std::map insert use  %ld ms\n",(end - start) / 1000);
        start = GetUSTime();
        res = 0;
        for(int i = 0;i < TEST_COUNT;i++)
        {
            res += stdtMap.find(i * HASH_CONFLICT_COUNT)->second.a;
        }
        end = GetUSTime();
        printf("std::map find use  %ld ms,res = %llu\n",(end - start) / 1000,res);
    }
    printf("------------------------------------------------------------------------\n");
    {
        std::unordered_map testUnorderMap;
        start = GetUSTime();
        for(int i = 0;i < TEST_COUNT;i++)
        {
            testUnorderMap.insert(std::make_pair(i * HASH_CONFLICT_COUNT, ValueType(i)));
        }
        end = GetUSTime();
        printf("std::unordered_map insert use  %ld ms\n",(end - start) / 1000);
        res = 0;
        start = GetUSTime();
        for(int i = 0;i < TEST_COUNT;i++)
        {
            res += testUnorderMap.find(i * HASH_CONFLICT_COUNT)->second.a;
        }
        end = GetUSTime();
        printf("std::unordered_map find use  %ld ms,res = %llu\n",(end - start) / 1000,res);
    }
    printf("-----------------------------test performance done-------------------------------------------\n");
    return;
}

RbtHashMap* g_testMap = new RbtHashMap();
std::unordered_map g_testUnorderMap;
std::mutex g_mtx;
bool g_exit = 0;
bool g_print1 = 0;
bool g_print2 = 0;
void* consume_insert(void* data)
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    unsigned int count = 0;
    while(true && !g_exit)
    {
        if(g_print1)
        {
            printf("consume inserting .....\n");
            g_print1 = 0;
        }
        {
            std::lock_guard lck(g_mtx);
            if(g_testUnorderMap.size() >= TEST_COUNT)
            {
                continue;
            }else
            {
                int key = rand();
                if(g_testUnorderMap.find(key) == g_testUnorderMap.end())
                {
                    g_testUnorderMap.insert(std::make_pair(key,ValueType(key)));
                    if(!g_testMap->insert(key,key))
                    {
                        g_exit = 1;
                        printf("consume_insert insert failed testmap key = %d\n", key);
                        return 0;
                    }
                    count++;
                }
            }
        }
    }
    printf("consume_insert insert count = %d\n", count);
}

void* consume_remove(void* data)
{
    unsigned int count = 0;
    while (true && !g_exit)
    {
        if(g_print2)
        {
            printf("consume removing .....\n");
            g_print2 = 0;
        }
        {
            std::lock_guard lck(g_mtx);
            std::unordered_map::iterator it = g_testUnorderMap.begin();
            if (it != g_testUnorderMap.end())
            {
                if (g_testMap->find(it->first) == g_testMap->end()) {
                    g_exit = 1;
                    printf("consume_remove find failed testmap lost key = %d\n", it->first);
                    return 0;
                }
                int key = it->first;
                g_testUnorderMap.erase(key);
                if (!g_testMap->erase_check(key))
                {
                    g_exit = 1;
                    printf("consume_remove erase failed testmap lost key = %d\n", key);
                    return 0;
                }
                count++;
            }
        }
    }
    printf("consume_insert remove count = %d\n", count);
}


void testconsume()
{
    printf("-----------------------------test consume begin-------------------------------------------\n");
    printf("test consume need time %d S,please waiting\n",COMSUME_KEEP_TIME / 1000 / 1000);
    pthread_t t1,t2;
    time_t start = GetUSTime();
    pthread_create(&t1,0,consume_insert,NULL);
    pthread_create(&t2,0,consume_remove,NULL);
    time_t lasttime = COMSUME_KEEP_TIME;
    time_t printstart = start;
    while (true)
    {
        time_t now = GetUSTime();
        if(now - printstart >= 20 * 1000 * 1000)
        {
            lasttime = lasttime - (now - printstart);
            printf("test consume last time %ld S,please waiting\n",lasttime / 1000 / 1000);
            g_print1 = 1;
            g_print2 = 1;
            printstart = now;
        }
        time_t keep = now - start;
        if(keep >= COMSUME_KEEP_TIME)
        {
            g_exit = 1;
            break;
        }
        usleep(10);
    }
    pthread_join(t1,NULL);
    pthread_join(t2,NULL);
    printf("test consume keep time %d S\n",COMSUME_KEEP_TIME / 1000 / 1000);
    printf("-----------------------------test consume done-------------------------------------------\n");
}

int main()
{
    testRBTree();
    testInsert();
    testremove();
    testmempool();
    testformance();
    testconsume();
    std::cout << "Test Done,Hello, World!" << std::endl;
}

测试结果:

/home/dguco/Workspace/github/dsalgorithm/rbtarr_map/cmake-build-debug/rbtarr_map
==========================test rbtree start===================================
== 原始数据: ....100000个数太长了,此处省略
== 前序遍历: ....100000个数太长了,此处省略
== 后序遍历: ....100000个数太长了,此处省略
== 最大值: 2147408880
== test rbtree insert done
== test rbtree remove done
==========================test rbtree done===================================
==========================test insert start===================================
==========================test insert done===================================
==========================test remove begin===================================
==========================test remove done===================================
==========================test mempool begin===================================
==========================test mempool done===================================
---------------------------test performance begin---------------------------------------------
RbtHashMap[conflict count = 5], insert use  219 ms
RbtHashMap[conflict count = 5] find use  51 ms,res = 499999500000
RbtHashMap[conflict count = 10000], insert use  649 ms
RbtHashMap[conflict count = 10000] find use  75 ms,res = 499999500000
------------------------------------------------------------------------
std::map insert use  704 ms
std::map find use  308 ms,res = 499999500000
------------------------------------------------------------------------
std::unordered_map insert use  159 ms
std::unordered_map find use  62 ms,res = 499999500000
-----------------------------test performance done-------------------------------------------
-----------------------------test consume begin-------------------------------------------
test consume need time 60 S,please waiting
test consume last time 39 S,please waiting
consume inserting .....
consume removing .....
test consume last time 19 S,please waiting
consume inserting .....
consume removing .....
consume_insert remove count = 88001355
consume_insert insert count = 88008711
test consume keep time 60 S
-----------------------------test consume done-------------------------------------------
Test Done,Hello, World!

Process finished with exit code 0

       测试结果可以看到在插入和删除以及查找等 *** 作上,这里都要比标准库的要好(当然这里的插入和删除没有像标准库那样有new和delete的开销),但是查找大家都是一样的都不会有内存开销,甚至当冲突率达到10000(1000000个元素每10000个元素插入同一个hash中)它的查找效率依然比std::map高,几乎和unordered_map保持持平。

参考文章

基础优化-让哈希表更公平一些

红黑树(一)之 原理和算法详细介绍

红黑树(四)之 C++的实现

红黑树动画演示

漫画:什么是红黑树?

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/1294910.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-06-10
下一篇 2022-06-10

发表评论

登录后才能评论

评论列表(0条)

保存