From 55c98bb50d570211f9e3a67305a949384a22dd6c Mon Sep 17 00:00:00 2001
From: Denperidge <denperidge@gmail.com>
Date: Fri, 20 Jan 2023 14:18:18 +0100
Subject: [PATCH] Created Repo class, which downloads the repo files

---
 .env.example |   3 +-
 .gitignore   |   1 +
 app/index.py |  23 +++++------
 app/repo.py  | 109 ++++++++++++++++++++++++++++++++++-----------------
 4 files changed, 87 insertions(+), 49 deletions(-)

diff --git a/.env.example b/.env.example
index a5960f1..1cb2336 100644
--- a/.env.example
+++ b/.env.example
@@ -1,2 +1 @@
-USERORORG=Denperidge-Redpencil
-REPOS=mu-project,mu-cl-resources
\ No newline at end of file
+REPOS=Denperidge-Redpencil/mu-project,Denperidge-Redpencil/mu-cl-resources
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index a17bd9d..742752a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 __pycache__
 .env
 docs/
+tmp/
diff --git a/app/index.py b/app/index.py
index 8893e27..2ab1388 100644
--- a/app/index.py
+++ b/app/index.py
@@ -3,7 +3,7 @@
 
 from dotenv import load_dotenv
 
-from repo import get_repos, get_file_contents, get_markdown_from_repo_direcetory
+from repo import get_repos, get_file_contents, download_and_unzip, Repo
 from colourstring import ok, nok
 from sections import sections, RepoSection, Section
 from table import setup_table, add_and_print
@@ -24,38 +24,39 @@
 
 
 if __name__ == "__main__":
-    username = environ.get('USERORORG')
+    userOrOrgFallback = environ.get('USERORORG') or None  # Used as default repo owner
     reponames = environ.get('REPOS').split(',')
 
-    repos: list = [ repo for repo in get_repos(username) if repo['name'] in reponames ]
+    repos: list = [ Repo(repo, owner=userOrOrgFallback) for repo in reponames ]
 
     for repo in repos:
-        reponame = repo['name']
-        main = repo['default_branch']
 
         # Start the row  with the repo name
         repoHeaderLength = len(headers[0])
-        reponamePadded = reponame[:repoHeaderLength].center(repoHeaderLength)
-        table = add_and_print(table, f"\n| {reponamePadded} |", f"Checking {reponame}...")
+        reponamePadded = repo.name[:repoHeaderLength].center(repoHeaderLength)
+        table = add_and_print(table, f"\n| {reponamePadded} |", f"Checking {repo.name}...")
 
-        readme_content = get_file_contents(username, reponame, main)
+        #readme_content = get_file_contents(username, reponame, main)
 
         # Go over every section
         for i, section_id in enumerate(sections):
             repoSection = RepoSection(sections[section_id])
             
+            """
             # Check if its in the README
             section_in_readme = repoSection.section.found_in(readme_content)
             if section_in_readme:
                 repoSection.sourceContent = readme_content
                 add_to_docs(reponame, repoSection.section, repoSection.output)
-            
+            """
 
             # TODO check if its in a file somewhere
             section_in_file = False
-
-            print(get_markdown_from_repo_direcetory(username, reponame, "", main))
             
+
+            #print(get_markdown_from_repo_direcetory(username, reponame, "", main))
+
+
             input()
 
 
diff --git a/app/repo.py b/app/repo.py
index c6d62f6..6eef10d 100644
--- a/app/repo.py
+++ b/app/repo.py
@@ -1,5 +1,72 @@
 from urllib import request, error
 from json import loads
+from os import mkdir
+from os.path import join, exists
+from shutil import rmtree
+from zipfile import ZipFile
+
+tmp_dir = "tmp/"
+if exists(tmp_dir):
+    rmtree(tmp_dir)
+mkdir(tmp_dir)
+
+class Repo():
+    def __init__(self, path:str=None, owner:str=None, reponame:str=None, branch:str=None) -> None:
+        # If the path is provided
+        if path:
+            # And contains the branch, remove it and set the branch variable to handle later
+            if "@" in path:
+                splitPath = path.split("@")
+                branch = splitPath[1]
+                path = splitPath[0]
+            
+            # Extract the path info into userOrOrg & reponame
+            print(path)
+            
+            try:
+                owner, reponame = path.split("/")
+            except ValueError:
+                raise ValueError(f"{path} is missing the user/org name, or the repo name!")
+
+        if owner and reponame:
+            self.owner = owner
+            self.name = reponame
+            
+        else:
+            raise ValueError("The repo path or (userOrOrg and repoName) have to be defined!")
+
+        # If the path has either been explicitly defined or extracted from path
+        if branch:
+            self.branch = branch
+        else:
+            self.branch = get_repo_data(self.path)['default_branch']
+        
+        download_and_unzip(self.zip_url, self.tmp_files)
+
+
+    @property
+    def path(self):
+        return self.owner + "/" + self.name
+
+    @property
+    def zip_url(self):
+        return f"https://github.com/{self.path}/archive/refs/heads/{self.branch}.zip"
+    
+    @property
+    def tmp_files(self):
+        # Follows GitHub zip file naming
+        return f"{tmp_dir}/{self.name}-{self.branch}"
+    
+    
+
+
+def download_and_unzip(url, dest):
+    filename, res = request.urlretrieve(url, dest + ".zip")
+    ZipFile(filename).extractall(tmp_dir)
+    #ZipFile()
+    #with request.urlopen(url) as req:
+     #   ZipFile(StringIO() req.read()).extractall(dest)
+
 
 def get_file_contents(username: str, reponame: str, branch: str, path: str="README.md") -> str:
     url = f"https://raw.githubusercontent.com/{username}/{reponame}/{branch}/{path}"
@@ -11,47 +78,17 @@ def get_file_contents(username: str, reponame: str, branch: str, path: str="READ
             return ''
         else: 
             raise err
-    print(url)
-    print(data)
     return data
 
 
-def get_repos(username: str) -> list:
-    with request.urlopen(f"https://api.github.com/users/{username}/repos") as req:
+def get_repos(userOrOrg: str) -> list:
+    with request.urlopen(f"https://api.github.com/users/{userOrOrg}/repos") as req:
         repos = loads(req.read())
     return repos
 
 
-def get_repo_contents(username: str, reponame: str, path:str="", branch: str = ""):
-    url = f"https://api.github.com/repos/{username}/{reponame}/contents/{path}" 
-    if branch != "":
-        url += f"?ref={branch}"
+def get_repo_data(userAndReponame: str):
+    with request.urlopen(f"https://api.github.com/repos/{userAndReponame}") as req:
+        repo = loads(req.read())
+    return repo
 
-    try:
-        with request.urlopen(url) as req:
-            data = req.read().decode(req.headers.get_content_charset())  # https://stackoverflow.com/a/19156107
-    except error.HTTPError as err:
-        if err.code == 404:
-            return ''
-        else: 
-            raise err
-    return loads(data)
-
-def get_markdown_from_repo_direcetory(username: str, reponame: str, path:str="", branch: str = ""):
-    contents = get_repo_contents(username, reponame, path, branch)
-
-    all_markdown = ""
-
-
-    for file in contents:
-        print(file['name'])
-        print(file)
-        if file["type"] == 'dir':
-            print("dir")
-            all_markdown += "\n" + get_markdown_from_repo_direcetory(username, reponame, file["path"], branch)
-        elif file["name"].lower().endswith(".md"):
-            print("Markdwon f")
-            all_markdown += "\n" + get_file_contents(username, reponame, branch, file["path"])
-    
-    return all_markdown
-        
\ No newline at end of file