File tree Expand file tree Collapse file tree 2 files changed +47
-0
lines changed Expand file tree Collapse file tree 2 files changed +47
-0
lines changed Original file line number Diff line number Diff line change
1
+ The shortest yet efficient PrefixSpan implemenation in Python 3, in only 20 lines in core part.
2
+
3
+ # Usage
4
+ Just replace the variable db with your own sequences, and variable minsup with your own minimum support threshold.
5
+
6
+ # Features
7
+ Based on state-of-the-art [ PrefixSpan] ( http://www.cs.sfu.ca/~jpei/publications/span.pdf ) algorithm.
Original file line number Diff line number Diff line change
1
+ #! /usr/bin/env python3
2
+
3
+ from collections import defaultdict
4
+
5
+ db = [
6
+ [0 , 1 , 2 , 3 , 4 ],
7
+ [1 , 1 , 1 , 3 , 4 ],
8
+ [2 , 1 , 2 , 2 , 0 ],
9
+ [1 , 1 , 1 , 2 , 2 ],
10
+ ]
11
+
12
+ minsup = 2
13
+
14
+ results = []
15
+
16
+ def mine_rec (patt , mdb ):
17
+ def localOccurs (mdb ):
18
+ occurs = defaultdict (list )
19
+
20
+ for (i , stoppos ) in mdb :
21
+ seq = db [i ]
22
+ for j in range (stoppos , len (seq )):
23
+ l = occurs [seq [j ]]
24
+ if len (l ) == 0 or l [- 1 ][0 ] != i :
25
+ l .append ((i , j + 1 ))
26
+
27
+ return occurs
28
+
29
+ for (c , newmdb ) in localOccurs (mdb ).items ():
30
+ newsup = len (newmdb )
31
+
32
+ if newsup >= minsup :
33
+ newpatt = patt + [c ]
34
+
35
+ results .append ((newpatt , [i for (i , stoppos ) in newmdb ]))
36
+ mine_rec (newpatt , newmdb )
37
+
38
+ mine_rec ([], [(i , 0 ) for i in range (len (db ))])
39
+
40
+ print (results )
You can’t perform that action at this time.
0 commit comments