下面是内存溢出 jb51.cc 通过网络收集整理的代码片段。
内存溢出小编现在分享给大家,也给大家做个参考。
"""Description : Simple Python implementation of the Apriori AlgorithmUsage: $python apriori.py -f DATASET.csv -s minSupport -c minConfIDence $python apriori.py -f DATASET.csv -s 0.15 -c 0.6"""import sysfrom itertools import chain,combinationsfrom collections import defaultdictfrom optparse import OptionParserdef subsets(arr): """ Returns non empty subsets of arr""" return chain(*[combinations(arr,i + 1) for i,a in enumerate(arr)])def returnItemsWithMinSupport(itemSet,transactionList,minSupport,freqSet): """calculates the support for items in the itemSet and returns a subset of the itemSet each of whose elements satisfIEs the minimum support""" _itemSet = set() localSet = defaultdict(int) for item in itemSet: for transaction in transactionList: if item.issubset(transaction): freqSet[item] += 1 localSet[item] += 1 for item,count in localSet.items(): support = float(count)/len(transactionList) if support >= minSupport: _itemSet.add(item) return _itemSetdef joinSet(itemSet,length): """Join a set with itself and returns the n-element itemsets""" return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])def getItemSetTransactionList(data_iterator): transactionList = List() itemSet = set() for record in data_iterator: transaction = froZenset(record) transactionList.append(transaction) for item in transaction: itemSet.add(froZenset([item])) # Generate 1-itemSets return itemSet,transactionListdef runApriori(data_iter,minConfIDence): """ run the apriori algorithm. data_iter is a record iterator Return both: - items (tuple,support) - rules ((pretuple,posttuple),confIDence) """ itemSet,transactionList = getItemSetTransactionList(data_iter) freqSet = defaultdict(int) largeSet = dict() # Global dictionary which stores (key=n-itemSets,value=support) # which satisfy minSupport assocrules = dict() # Dictionary which stores Association Rules oneCSet = returnItemsWithMinSupport(itemSet,freqSet) currentLSet = oneCSet k = 2 while(currentLSet != set([])): largeSet[k-1] = currentLSet currentLSet = joinSet(currentLSet,k) currentCSet = returnItemsWithMinSupport(currentLSet,freqSet) currentLSet = currentCSet k = k + 1 def getSupport(item): """local function which Returns the support of an item""" return float(freqSet[item])/len(transactionList) toRetItems = [] for key,value in largeSet.items(): toRetItems.extend([(tuple(item),getSupport(item)) for item in value]) toRetRules = [] for key,value in largeSet.items()[1:]: for item in value: _subsets = map(froZenset,[x for x in subsets(item)]) for element in _subsets: remain = item.difference(element) if len(remain) > 0: confIDence = getSupport(item)/getSupport(element) if confIDence >= minConfIDence: toRetRules.append(((tuple(element),tuple(remain)),confIDence)) return toRetItems,toRetRulesdef printResults(items,rules): """prints the generated itemsets and the confIDence rules""" for item,support in items: print "item: %s,%.3f" % (str(item),support) print "\n------------------------ RulES:" for rule,confIDence in rules: pre,post = rule print "Rule: %s ==> %s,%.3f" % (str(pre),str(post),confIDence)def dataFromfile(fname): """Function which reads from the file and yIElds a generator""" file_iter = open(fname,'rU') for line in file_iter: line = line.strip().rstrip(',') # Remove trailing comma record = froZenset(line.split(',')) yIEld recordif __name__ == "__main__": optparser = OptionParser() optparser.add_option('-f','--inputfile',dest='input',help='filename containing csv',default=None) optparser.add_option('-s','--minSupport',dest='minS',help='minimum support value',default=0.15,type='float') optparser.add_option('-c','--minConfIDence',dest='minC',help='minimum confIDence value',default=0.6,type='float') (options,args) = optparser.parse_args() infile = None if options.input is None: infile = sys.stdin elif options.input is not None: infile = dataFromfile(options.input) else: print 'No dataset filename specifIEd,system with exit\n' sys.exit('System will exit') minSupport = options.minS minConfIDence = options.minC items,rules = runApriori(infile,minConfIDence) printResults(items,rules)
以上是内存溢出(jb51.cc)为你收集整理的全部代码内容,希望文章能够帮你解决所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
总结以上是内存溢出为你收集整理的apriori算法全部内容,希望文章能够帮你解决apriori算法所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)