1- import collections
1+ from collections import Counter , defaultdict
2+ from candidateGen import *
3+ import pdb
24
35def getTransaction (inputfile ):
46 transaction_set = []
@@ -14,52 +16,39 @@ def getTransaction(inputfile):
1416 return []
1517
1618def Apriori (transaction_set , minsupport ):
17- init_set = collections .defaultdict (lambda : 0 )
19+ init_set = defaultdict (int )
20+ transaction_count = 0
1821
1922 # create init itemsets
2023 for transaction in transaction_set :
2124 for item in transaction :
2225 init_set [item ] = init_set [item ] + 1
23-
26+ transaction_count += 1
2427
2528 #find first frequent set
26- transaction_len = len (transaction_set )
2729 first_set = {item : frequency for (item , frequency ) in init_set .iteritems ()
28- if float (init_set [item ]) / transaction_len >= minsupport }
30+ if float (init_set [item ]) / transaction_count >= minsupport }
2931 freqset = []
3032 freqset .append (first_set )
33+ candidate_dict = Counter ()
3134
32- while len (freqset [- 1 ]) != 0 :
33- pass
34-
35- return init_set
3635
37- def candidateGen (freqset ):
38- # Set of new candidate
39- candidate_set = []
36+ while len (freqset [- 1 ]) != 0 :
37+ current_candidate = candidateGen ([list (x ) for x in freqset [- 1 ].keys ()])
38+ pdb .set_trace ()
39+ for transaction in transaction_set :
4040
41- # Join step
42- keylist = freqset .keys ()
43- for i in range (len (keylist ) - 1 ):
44- for j in range (i + 1 , len (freqset )):
45- item1 = freqset [i ]
46- item2 = freqset [j ]
47- if cmp (item1 [:- 1 ], item2 [:- 2 ]) == 0 and item1 [- 1 ] != item2 [- 1 ]:
48- newset = item1
41+ for candidate_item in current_candidate :
42+ if candidate_item in transaction :
43+ candidate_dict [tuple (candidate_item )] += 1
4944
50- #transaction = getTransaction('retail.dat' )
51- #print len(Apriori(transaction, 0.0001))
45+ new_freqset = { item : freq for item , freq in candidate_dict . iteritems ( )
46+ if float ( freq ) / transaction_len >= minsupport }
5247
48+ freqset .append (new_freqset )
5349
54- set = {i :i + 1 for i in range (1 , 11 )}
55- setb = {a : b for a , b in set .iteritems ()
56- if set [a ] >= 8 }
57- setc = []
58- setc .append (set )
59- setc .append ({})
60- if not setc [- 1 ]:
61- print 'fsdfsd'
62- print setc
50+ return freqset
6351
64- key = set .keys ()
65- print key
52+ if __name__ == "__main__" :
53+ transaction = getTransaction ('retail.dat' )
54+ print len (Apriori (transaction , 0.0001 ))
0 commit comments