Skip to content

Commit 2f903ee

Browse files
committed
run failed
1 parent 1eb70a1 commit 2f903ee

File tree

3 files changed

+40
-43
lines changed

3 files changed

+40
-43
lines changed

candidateGen.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
import sys
22
import os
33
import itertools
4+
from collections import defaultdict
45
import pdb
56

6-
def candidateGen(inputfile):
7+
def candidateGen(freqset):
8+
'''
9+
params:
10+
*freqset: dict
11+
'''
12+
713
# Set of new candidate
14+
print "type: " + str(type(freqset))
815
candidate_set = []
916

10-
# Get frequent itemsets from inputfile
11-
freqset = getFreqSet(inputfile)
12-
if freqset == []:
17+
if not freqset:
1318
return []
1419

1520
# Join step
@@ -52,19 +57,22 @@ def writeCandidate():
5257
print "Input file not found"
5358
return
5459

60+
# Get frequent itemsets from inputfile
61+
freqset = getFreqSet(inputfile)
62+
5563
# Generate candidate
56-
candidate_set = candidateGen(inputfile)
64+
candidate_set = candidateGen(freqset)
5765

5866
# Write output
5967
try:
6068
with open(outputfile, "wt") as file:
61-
for candidate in candidate_set:
62-
for item in candidate:
63-
file.write(item + ' ')
64-
file.write('\n')
69+
for key in candidate_set:
70+
line = ' '.join(key)
71+
file.write(line)
6572
file.close()
6673
except:
6774
print "Cannot write file"
6875
return
6976

70-
writeCandidate()
77+
if __name__ == "__main__":
78+
writeCandidate()

candidateGen.pyc

1.97 KB
Binary file not shown.

myApriori.py

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import collections
1+
from collections import Counter, defaultdict
2+
from candidateGen import *
3+
import pdb
24

35
def getTransaction(inputfile):
46
transaction_set = []
@@ -14,52 +16,39 @@ def getTransaction(inputfile):
1416
return []
1517

1618
def Apriori(transaction_set, minsupport):
17-
init_set = collections.defaultdict(lambda: 0)
19+
init_set = defaultdict(int)
20+
transaction_count = 0
1821

1922
# create init itemsets
2023
for transaction in transaction_set:
2124
for item in transaction:
2225
init_set[item] = init_set[item] + 1
23-
26+
transaction_count += 1
2427

2528
#find first frequent set
26-
transaction_len = len(transaction_set)
2729
first_set = {item: frequency for (item, frequency) in init_set.iteritems()
28-
if float(init_set[item]) / transaction_len >= minsupport}
30+
if float(init_set[item]) / transaction_count >= minsupport}
2931
freqset = []
3032
freqset.append(first_set)
33+
candidate_dict = Counter()
3134

32-
while len(freqset[-1]) != 0:
33-
pass
34-
35-
return init_set
3635

37-
def candidateGen(freqset):
38-
# Set of new candidate
39-
candidate_set = []
36+
while len(freqset[-1]) != 0:
37+
current_candidate = candidateGen([list(x) for x in freqset[-1].keys()])
38+
pdb.set_trace()
39+
for transaction in transaction_set:
4040

41-
# Join step
42-
keylist = freqset.keys()
43-
for i in range(len(keylist) - 1):
44-
for j in range(i + 1, len(freqset)):
45-
item1 = freqset[i]
46-
item2 = freqset[j]
47-
if cmp(item1[:-1], item2[:-2]) == 0 and item1[-1] != item2[-1]:
48-
newset = item1
41+
for candidate_item in current_candidate:
42+
if candidate_item in transaction:
43+
candidate_dict[tuple(candidate_item)] += 1
4944

50-
#transaction = getTransaction('retail.dat')
51-
#print len(Apriori(transaction, 0.0001))
45+
new_freqset = { item: freq for item, freq in candidate_dict.iteritems()
46+
if float(freq)/transaction_len >= minsupport }
5247

48+
freqset.append(new_freqset)
5349

54-
set = {i:i + 1 for i in range(1, 11)}
55-
setb = {a: b for a, b in set.iteritems()
56-
if set[a] >= 8}
57-
setc = []
58-
setc.append(set)
59-
setc.append({})
60-
if not setc[-1]:
61-
print 'fsdfsd'
62-
print setc
50+
return freqset
6351

64-
key = set.keys()
65-
print key
52+
if __name__ == "__main__":
53+
transaction = getTransaction('retail.dat')
54+
print len(Apriori(transaction, 0.0001))

0 commit comments

Comments
 (0)