Skip to content

Commit 4bb70b1

Browse files
Add Project: Page Rank
1 parent 943881e commit 4bb70b1

File tree

1 file changed

+97
-0
lines changed

1 file changed

+97
-0
lines changed

PageRank/Page Rank_stage_5.py

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import numpy as np
2+
import numpy.linalg as la
3+
from io import StringIO
4+
5+
def print_mat(mat):
6+
7+
stream = StringIO()
8+
np.savetxt(stream, mat, fmt="%.3f")
9+
print( stream.getvalue() )
10+
11+
# -----------------------------------
12+
13+
def get_convergent_vector(L, r_0, threshold=0.01):
14+
15+
'''
16+
:param L: transition matrix
17+
:param r_0: initial vector
18+
:param threshold: parameter for convergence condition
19+
:return: convergent vector
20+
'''
21+
22+
r_cur = r_0
23+
24+
while True:
25+
26+
r_next = np.matmul(L, r_cur)
27+
28+
if la.norm(r_next - r_cur) < threshold:
29+
# check convergence condition is met or not
30+
break
31+
32+
r_cur = r_next
33+
34+
return r_cur
35+
36+
# -----------------------------------
37+
def get_matrix_with_damping(matrix, damping=0.5):
38+
39+
# get the size of matrix
40+
n, _ = matrix.shape
41+
42+
return matrix * damping + ( 1 - damping ) * np.ones((n, n)) / n
43+
44+
45+
46+
# -----------------------------------
47+
48+
# get the size
49+
n = int( input() )
50+
51+
# get the name of website
52+
websites = input().split()
53+
54+
# get transition matrix
55+
matrix = [ [ 0.0 for x in range(n)] for y in range(n) ]
56+
57+
for y in range(n):
58+
matrix[y] = [ *map( float, input().split() ) ]
59+
60+
# get the name of target website
61+
target = input()
62+
63+
# convert to numpy array
64+
matrix = np.array(matrix)
65+
66+
matrix_with_damping = get_matrix_with_damping(matrix, damping=0.5)
67+
68+
r_0 = ( np.ones(n) / n) * 100
69+
70+
# compute pagerank
71+
r_pagerank = get_convergent_vector(L=matrix_with_damping, r_0=r_0, threshold=0.01)
72+
73+
# output result
74+
75+
76+
web_pagerank_dict = {}
77+
for idx in range(n):
78+
web_pagerank_dict[ websites[idx] ] = r_pagerank[idx]
79+
80+
# target is always on the top
81+
result = [ target ]
82+
83+
# remove target from dictionary
84+
del web_pagerank_dict[target]
85+
86+
# sorted by pagerank and name of website in ascending order
87+
for website in sorted(web_pagerank_dict, key=lambda w: (web_pagerank_dict[w], w), reverse=True):
88+
result.append( website )
89+
90+
# output top 5 results
91+
for idx, website in enumerate(result):
92+
93+
if idx < 5:
94+
print(website)
95+
else:
96+
break
97+

0 commit comments

Comments
 (0)