From 55c98bb50d570211f9e3a67305a949384a22dd6c Mon Sep 17 00:00:00 2001 From: Denperidge Date: Fri, 20 Jan 2023 14:18:18 +0100 Subject: [PATCH] Created Repo class, which downloads the repo files --- .env.example | 3 +- .gitignore | 1 + app/index.py | 23 +++++------ app/repo.py | 109 ++++++++++++++++++++++++++++++++++----------------- 4 files changed, 87 insertions(+), 49 deletions(-) diff --git a/.env.example b/.env.example index a5960f1..1cb2336 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1 @@ -USERORORG=Denperidge-Redpencil -REPOS=mu-project,mu-cl-resources \ No newline at end of file +REPOS=Denperidge-Redpencil/mu-project,Denperidge-Redpencil/mu-cl-resources \ No newline at end of file diff --git a/.gitignore b/.gitignore index a17bd9d..742752a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__ .env docs/ +tmp/ diff --git a/app/index.py b/app/index.py index 8893e27..2ab1388 100644 --- a/app/index.py +++ b/app/index.py @@ -3,7 +3,7 @@ from dotenv import load_dotenv -from repo import get_repos, get_file_contents, get_markdown_from_repo_direcetory +from repo import get_repos, get_file_contents, download_and_unzip, Repo from colourstring import ok, nok from sections import sections, RepoSection, Section from table import setup_table, add_and_print @@ -24,38 +24,39 @@ if __name__ == "__main__": - username = environ.get('USERORORG') + userOrOrgFallback = environ.get('USERORORG') or None # Used as default repo owner reponames = environ.get('REPOS').split(',') - repos: list = [ repo for repo in get_repos(username) if repo['name'] in reponames ] + repos: list = [ Repo(repo, owner=userOrOrgFallback) for repo in reponames ] for repo in repos: - reponame = repo['name'] - main = repo['default_branch'] # Start the row with the repo name repoHeaderLength = len(headers[0]) - reponamePadded = reponame[:repoHeaderLength].center(repoHeaderLength) - table = add_and_print(table, f"\n| {reponamePadded} |", f"Checking {reponame}...") + reponamePadded = repo.name[:repoHeaderLength].center(repoHeaderLength) + table = add_and_print(table, f"\n| {reponamePadded} |", f"Checking {repo.name}...") - readme_content = get_file_contents(username, reponame, main) + #readme_content = get_file_contents(username, reponame, main) # Go over every section for i, section_id in enumerate(sections): repoSection = RepoSection(sections[section_id]) + """ # Check if its in the README section_in_readme = repoSection.section.found_in(readme_content) if section_in_readme: repoSection.sourceContent = readme_content add_to_docs(reponame, repoSection.section, repoSection.output) - + """ # TODO check if its in a file somewhere section_in_file = False - - print(get_markdown_from_repo_direcetory(username, reponame, "", main)) + + #print(get_markdown_from_repo_direcetory(username, reponame, "", main)) + + input() diff --git a/app/repo.py b/app/repo.py index c6d62f6..6eef10d 100644 --- a/app/repo.py +++ b/app/repo.py @@ -1,5 +1,72 @@ from urllib import request, error from json import loads +from os import mkdir +from os.path import join, exists +from shutil import rmtree +from zipfile import ZipFile + +tmp_dir = "tmp/" +if exists(tmp_dir): + rmtree(tmp_dir) +mkdir(tmp_dir) + +class Repo(): + def __init__(self, path:str=None, owner:str=None, reponame:str=None, branch:str=None) -> None: + # If the path is provided + if path: + # And contains the branch, remove it and set the branch variable to handle later + if "@" in path: + splitPath = path.split("@") + branch = splitPath[1] + path = splitPath[0] + + # Extract the path info into userOrOrg & reponame + print(path) + + try: + owner, reponame = path.split("/") + except ValueError: + raise ValueError(f"{path} is missing the user/org name, or the repo name!") + + if owner and reponame: + self.owner = owner + self.name = reponame + + else: + raise ValueError("The repo path or (userOrOrg and repoName) have to be defined!") + + # If the path has either been explicitly defined or extracted from path + if branch: + self.branch = branch + else: + self.branch = get_repo_data(self.path)['default_branch'] + + download_and_unzip(self.zip_url, self.tmp_files) + + + @property + def path(self): + return self.owner + "/" + self.name + + @property + def zip_url(self): + return f"https://github.com/{self.path}/archive/refs/heads/{self.branch}.zip" + + @property + def tmp_files(self): + # Follows GitHub zip file naming + return f"{tmp_dir}/{self.name}-{self.branch}" + + + + +def download_and_unzip(url, dest): + filename, res = request.urlretrieve(url, dest + ".zip") + ZipFile(filename).extractall(tmp_dir) + #ZipFile() + #with request.urlopen(url) as req: + # ZipFile(StringIO() req.read()).extractall(dest) + def get_file_contents(username: str, reponame: str, branch: str, path: str="README.md") -> str: url = f"https://raw.githubusercontent.com/{username}/{reponame}/{branch}/{path}" @@ -11,47 +78,17 @@ def get_file_contents(username: str, reponame: str, branch: str, path: str="READ return '' else: raise err - print(url) - print(data) return data -def get_repos(username: str) -> list: - with request.urlopen(f"https://api.github.com/users/{username}/repos") as req: +def get_repos(userOrOrg: str) -> list: + with request.urlopen(f"https://api.github.com/users/{userOrOrg}/repos") as req: repos = loads(req.read()) return repos -def get_repo_contents(username: str, reponame: str, path:str="", branch: str = ""): - url = f"https://api.github.com/repos/{username}/{reponame}/contents/{path}" - if branch != "": - url += f"?ref={branch}" +def get_repo_data(userAndReponame: str): + with request.urlopen(f"https://api.github.com/repos/{userAndReponame}") as req: + repo = loads(req.read()) + return repo - try: - with request.urlopen(url) as req: - data = req.read().decode(req.headers.get_content_charset()) # https://stackoverflow.com/a/19156107 - except error.HTTPError as err: - if err.code == 404: - return '' - else: - raise err - return loads(data) - -def get_markdown_from_repo_direcetory(username: str, reponame: str, path:str="", branch: str = ""): - contents = get_repo_contents(username, reponame, path, branch) - - all_markdown = "" - - - for file in contents: - print(file['name']) - print(file) - if file["type"] == 'dir': - print("dir") - all_markdown += "\n" + get_markdown_from_repo_direcetory(username, reponame, file["path"], branch) - elif file["name"].lower().endswith(".md"): - print("Markdwon f") - all_markdown += "\n" + get_file_contents(username, reponame, branch, file["path"]) - - return all_markdown - \ No newline at end of file