apriori算法_python

概述apriori算法

下面是内存溢出 jb51.cc 通过网络收集整理的代码片段。

内存溢出小编现在分享给大家，也给大家做个参考。

"""Description     : Simple Python implementation of the Apriori AlgorithmUsage:    $python apriori.py -f DATASET.csv -s minSupport  -c minConfIDence    $python apriori.py -f DATASET.csv -s 0.15 -c 0.6"""import sysfrom itertools import chain,combinationsfrom collections import defaultdictfrom optparse import OptionParserdef subsets(arr):    """ Returns non empty subsets of arr"""    return chain(*[combinations(arr,i + 1) for i,a in enumerate(arr)])def returnItemsWithMinSupport(itemSet,transactionList,minSupport,freqSet):        """calculates the support for items in the itemSet and returns a subset       of the itemSet each of whose elements satisfIEs the minimum support"""        _itemSet = set()        localSet = defaultdict(int)        for item in itemSet:                for transaction in transactionList:                        if item.issubset(transaction):                                freqSet[item] += 1                                localSet[item] += 1        for item,count in localSet.items():                support = float(count)/len(transactionList)                if support >= minSupport:                        _itemSet.add(item)        return _itemSetdef joinSet(itemSet,length):        """Join a set with itself and returns the n-element itemsets"""        return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])def getItemSetTransactionList(data_iterator):    transactionList = List()    itemSet = set()    for record in data_iterator:        transaction = froZenset(record)        transactionList.append(transaction)        for item in transaction:            itemSet.add(froZenset([item]))              # Generate 1-itemSets    return itemSet,transactionListdef runApriori(data_iter,minConfIDence):    """    run the apriori algorithm. data_iter is a record iterator    Return both:     - items (tuple,support)     - rules ((pretuple,posttuple),confIDence)    """    itemSet,transactionList = getItemSetTransactionList(data_iter)    freqSet = defaultdict(int)    largeSet = dict()    # Global dictionary which stores (key=n-itemSets,value=support)    # which satisfy minSupport    assocrules = dict()    # Dictionary which stores Association Rules    oneCSet = returnItemsWithMinSupport(itemSet,freqSet)    currentLSet = oneCSet    k = 2    while(currentLSet != set([])):        largeSet[k-1] = currentLSet        currentLSet = joinSet(currentLSet,k)        currentCSet = returnItemsWithMinSupport(currentLSet,freqSet)        currentLSet = currentCSet        k = k + 1    def getSupport(item):            """local function which Returns the support of an item"""            return float(freqSet[item])/len(transactionList)    toRetItems = []    for key,value in largeSet.items():        toRetItems.extend([(tuple(item),getSupport(item))                           for item in value])    toRetRules = []    for key,value in largeSet.items()[1:]:        for item in value:            _subsets = map(froZenset,[x for x in subsets(item)])            for element in _subsets:                remain = item.difference(element)                if len(remain) > 0:                    confIDence = getSupport(item)/getSupport(element)                    if confIDence >= minConfIDence:                        toRetRules.append(((tuple(element),tuple(remain)),confIDence))    return toRetItems,toRetRulesdef printResults(items,rules):    """prints the generated itemsets and the confIDence rules"""    for item,support in items:        print "item: %s,%.3f" % (str(item),support)    print "\n------------------------ RulES:"    for rule,confIDence in rules:        pre,post = rule        print "Rule: %s ==> %s,%.3f" % (str(pre),str(post),confIDence)def dataFromfile(fname):        """Function which reads from the file and yIElds a generator"""        file_iter = open(fname,'rU')        for line in file_iter:                line = line.strip().rstrip(',')                         # Remove trailing comma                record = froZenset(line.split(','))                yIEld recordif __name__ == "__main__":    optparser = OptionParser()    optparser.add_option('-f','--inputfile',dest='input',help='filename containing csv',default=None)    optparser.add_option('-s','--minSupport',dest='minS',help='minimum support value',default=0.15,type='float')    optparser.add_option('-c','--minConfIDence',dest='minC',help='minimum confIDence value',default=0.6,type='float')    (options,args) = optparser.parse_args()    infile = None    if options.input is None:            infile = sys.stdin    elif options.input is not None:            infile = dataFromfile(options.input)    else:            print 'No dataset filename specifIEd,system with exit\n'            sys.exit('System will exit')    minSupport = options.minS    minConfIDence = options.minC    items,rules = runApriori(infile,minConfIDence)    printResults(items,rules)

以上是内存溢出(jb51.cc)为你收集整理的全部代码内容，希望文章能够帮你解决所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错，欢迎将内存溢出网站推荐给程序员好友。

总结

以上是内存溢出为你收集整理的apriori算法全部内容，希望文章能够帮你解决apriori算法所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错，欢迎将内存溢出网站推荐给程序员好友。

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/langs/1198809.html