66
77
88def download (url_ , path_ ):
9- folder_path = os .path .dirname (path_ ) # 检查路径,对不存在的路径进行创建
10- if not os .path .exists (folder_path ):
11- os .makedirs (folder_path )
12- if not os .path .exists (path_ ): # 检查文件是否存在,对本地不存在的文件进行下载
13- response = requests .get (url_ )
14- if response .status_code == 200 :
15- if path_ .endswith ((".css" , ".js" , ".html" , ".c" , "cpp" , ".py" , ".sh" , ".S" )):
16- with open (path_ , 'w' , encoding = "utf-8" ) as file :
17- file .write (response .text )
18- else :
19- # 非文本文件的下载
20- with open (path_ , 'wb' ) as file :
21- file .write (response .content )
22- print (f"\033 [32m已下载 \033 [0m{ path_ } " )
9+ if not os .path .exists (os .path .dirname (path_ )):
10+ os .makedirs (os .path .dirname (path_ ))
11+ if os .path .exists (path_ ):
12+ return
13+ response = requests .get (url_ )
14+ if response .status_code == 200 :
15+ if response .headers ['Content-Type' ].startswith ('text' ):
16+ with open (path_ , 'w' , encoding = "utf-8" ) as file :
17+ file .write (response .text )
2318 else :
24- print (f"\033 [91m无法下载文件链接:\033 [0m{ url_ } , \033 [91m状态码:\033 [0m{ response .status_code } " )
19+ with open (path_ , 'wb' ) as file :
20+ file .write (response .content )
21+ print (f"\033 [32m已下载 \033 [0m{ path_ } " )
22+ else :
23+ print (f"\033 [91m无法下载文件链接:\033 [0m{ url_ } , \033 [91m状态码:\033 [0m{ response .status_code } " )
2524
2625
2726class OSCourseware :
2827 BASE_URL = "https://jyywiki.cn"
29- SOURCE_FILE_TYPE = (
30- ".png" , ".jpg" , ".gif" , ".webp" , "jpeg" ,
31- ".js" , ".css" , ".html" ,
32- ".c" , ".h" , ".cpp" , ".py" , ".sh" , ".S" , ".lua" , ".txt"
33- )
28+ BASE_DIR = ""
3429 COURSEWARE_DIR = "Courseware"
3530 WITHOUT_DOWNLOAD = [
3631 "https://jyywiki.cn/pages/OS/2022/Labs/lab-makefile.png" , # 404
37- "https://jyywiki.cn/index.html" # unnecessary
32+ "https://jyywiki.cn/OS/2021/slides/Slides_Author" , # 404
33+ "https://jyywiki.cn/index.html" # unnecessary
3834 ]
3935 KEY_YEAR = {'A' : "2021" , 'B' : "2022" , 'C' : "2023" , 'D' : "ALL" , '' : "2023" }
4036 year_input = ''
41- current_dir = ''
42- sources_url_path_pairs = {} # 不用去重
37+ sources_url_path_pairs = {}
4338
4439 def __init__ (self ):
45- self .current_dir = os .path .join (os .getcwd (), self .COURSEWARE_DIR )
4640 self .file_download_option ()
4741 self .file_download ()
48- print ("下载完成 " )
42+ print ("\033 [32m下载完成 " )
4943
5044 def file_download_option (self ):
5145 def build_courseware_url_path (year_ ):
5246 url_ = f'{ self .BASE_URL } /OS/{ year_ } '
53- path_ = f'{ self .current_dir } \\ OS\\ { year_ } \\ index.html'
47+ path_ = f'{ os . path . join ( self .BASE_DIR , self . COURSEWARE_DIR ) } \\ OS\\ { year_ } \\ index.html'
5448 return {url_ : path_ }
5549
50+ self .BASE_DIR = os .getcwd ()
5651 self .year_input = input ("无法下载的文件会提示。下载成功后提示“下载成功”\n " +
5752 "通过选项下载对应年份课件,回车默认下载2023年课件,输入其他符号则退出\n " +
5853 "\033 [32mA\033 [0m:2021 \033 [32mB\033 [0m:2022 "
@@ -67,7 +62,7 @@ def build_courseware_url_path(year_):
6762 if self .year_input != "2023" :
6863 self .WITHOUT_DOWNLOAD .append (f'{ self .BASE_URL } /OS/2023/index.html' )
6964 else :
70- print ("输入非法 ,程序退出" )
65+ print ("\033 [91m输入非法 ,程序退出" )
7166 sys .exit ()
7267
7368 def file_download (self ):
@@ -93,21 +88,22 @@ def file_analyse(self, filepath):
9388 _links_tags = soup .find_all (href = True ) + soup .find_all (src = True )
9489 _links_attr = []
9590 for link in _links_tags :
96- _links_attr .extend ([link .get ("href" ), link .get ("src" )])
91+ _links_attr .extend ([link .get ("href" ), link .get ("src" ), link . get ( "data" ) ])
9792 _links_attr = list (set (_links_attr )) # 去除重复的元素
9893
9994 # 补全完整的文件地址和链接
10095 for link in _links_attr :
101- if link is None or link .startswith (("http" , "data" )): # data是ipynb.html文件资源
102- continue
103- if link .endswith (self .SOURCE_FILE_TYPE ):
96+ if link is not None and not link .startswith (("http" , "data" )): # data是ipynb.html文件资源
10497 # 以filepath指定的文件为参照补全文件中的网址以及在本地存储的地址
10598 path = os .path .normpath (os .path .join (os .path .dirname (filepath ), link .replace ("/" , "\\ " )))
106- relative_path = path .split (os .getcwd () + os .sep + self .COURSEWARE_DIR )[1 ]
107- url = urljoin (self .BASE_URL , relative_path .replace ("\\ " , "/" ))
108- if url not in self .WITHOUT_DOWNLOAD :
109- self .sources_url_path_pairs .update ({url : path })
110- self .WITHOUT_DOWNLOAD .append (url )
99+ try :
100+ relative_path = path .split (self .BASE_DIR + os .sep + self .COURSEWARE_DIR )[1 ]
101+ url = urljoin (self .BASE_URL , relative_path .replace ("\\ " , "/" ))
102+ if url not in self .WITHOUT_DOWNLOAD :
103+ self .sources_url_path_pairs .update ({url : path })
104+ self .WITHOUT_DOWNLOAD .append (url )
105+ except IndexError :
106+ continue
111107
112108
113109courseware = OSCourseware ()
0 commit comments