Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions application/api/user/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@
),
"name": fields.String(required=True, description="Job name"),
"data": fields.String(required=True, description="Data to process"),
"repo_url": fields.String(description="GitHub repository URL"),
},
)
)
Expand All @@ -377,11 +378,18 @@
return missing_fields

try:
if "repo_url" in data:
source_data = data["repo_url"]
loader = "github"

Check warning on line 383 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L381-L383

Added lines #L381 - L383 were not covered by tests
else:
source_data = data["data"]
loader = data["source"]

Check warning on line 386 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L385-L386

Added lines #L385 - L386 were not covered by tests

task = ingest_remote.delay(
source_data=data["data"],
source_data=source_data,
job_name=data["name"],
user=data["user"],
loader=data["source"],
loader=loader,
)
except Exception as err:
return make_response(jsonify({"success": False, "error": str(err)}), 400)
Expand Down
53 changes: 53 additions & 0 deletions application/parser/remote/github_loader.py
Original file line number Diff line number Diff line change
@@ -1,0 +1,53 @@
import base64
import requests
from typing import List
from application.parser.remote.base import BaseRemote
from langchain_core.documents import Document

class GitHubLoader(BaseRemote):
def __init__(self):
self.access_token = None
self.headers = {

Check warning on line 10 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L9-L10

Added lines #L9 - L10 were not covered by tests
"Authorization": f"token {self.access_token}"
} if self.access_token else {}
return

Check warning on line 13 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L13

Added line #L13 was not covered by tests

def fetch_file_content(self, repo_url: str, file_path: str) -> str:
url = f"https://api.github.com/repos/{repo_url}/contents/{file_path}"
response = requests.get(url, headers=self.headers)

Check warning on line 17 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L16-L17

Added lines #L16 - L17 were not covered by tests

if response.status_code == 200:
content = response.json()
if content.get("encoding") == "base64":
try:
decoded_content = base64.b64decode(content["content"]).decode("utf-8")
return f"Filename: {file_path}\n\n{decoded_content}"
except Exception as e:
print(f"Error decoding content for {file_path}: {e}")
raise

Check warning on line 27 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L19-L27

Added lines #L19 - L27 were not covered by tests
else:
return f"Filename: {file_path}\n\n{content['content']}"

Check warning on line 29 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L29

Added line #L29 was not covered by tests
else:
response.raise_for_status()

Check warning on line 31 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L31

Added line #L31 was not covered by tests

def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]:
url = f"https://api.github.com/repos/{repo_url}/contents/{path}"
response = requests.get(url, headers={**self.headers, "Accept": "application/vnd.github.v3.raw"})
contents = response.json()
files = []
for item in contents:
if item["type"] == "file":
files.append(item["path"])
elif item["type"] == "dir":
files.extend(self.fetch_repo_files(repo_url, item["path"]))
return files

Check warning on line 43 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L34-L43

Added lines #L34 - L43 were not covered by tests

def load_data(self, repo_url: str) -> List[Document]:
repo_name = repo_url.split("github.com/")[-1]
files = self.fetch_repo_files(repo_name)
documents = []
for file_path in files:
content = self.fetch_file_content(repo_name, file_path)
documents.append(Document(page_content=content, metadata={"title": file_path,

Check warning on line 51 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L46-L51

Added lines #L46 - L51 were not covered by tests
"source": f"https://github.com/{repo_name}/blob/main/{file_path}"}))
return documents

Check warning on line 53 in application/parser/remote/github_loader.py

View check run for this annotation

Codecov / codecov/patch

application/parser/remote/github_loader.py#L53

Added line #L53 was not covered by tests
2 changes: 2 additions & 0 deletions application/parser/remote/remote_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from application.parser.remote.crawler_loader import CrawlerLoader
from application.parser.remote.web_loader import WebLoader
from application.parser.remote.reddit_loader import RedditPostsLoaderRemote
from application.parser.remote.github_loader import GitHubLoader


class RemoteCreator:
Expand All @@ -10,6 +11,7 @@ class RemoteCreator:
"sitemap": SitemapLoader,
"crawler": CrawlerLoader,
"reddit": RedditPostsLoaderRemote,
"github": GitHubLoader,
}

@classmethod
Expand Down
1 change: 1 addition & 0 deletions frontend/src/locale/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"train": "Train",
"link": "Link",
"urlLink": "URL Link",
"repoUrl": "Repository URL",
"reddit": {
"id": "Client ID",
"secret": "Client Secret",
Expand Down
1 change: 1 addition & 0 deletions frontend/src/locale/es.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"train": "Entrenar",
"link": "Enlace",
"urlLink": "Enlace URL",
"repoUrl": "URL del Repositorio",
"reddit": {
"id": "ID de Cliente",
"secret": "Secreto de Cliente",
Expand Down
1 change: 1 addition & 0 deletions frontend/src/locale/jp.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"train": "トレーニング",
"link": "リンク",
"urlLink": "URLリンク",
"repoUrl": "リポジトリURL",
"reddit": {
"id": "クライアントID",
"secret": "クライアントシークレット",
Expand Down
1 change: 1 addition & 0 deletions frontend/src/locale/zh.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"train": "训练",
"link": "链接",
"urlLink": "URL 链接",
"repoUrl": "存储库 URL",
"reddit": {
"id": "客户端 ID",
"secret": "客户端密钥",
Expand Down
34 changes: 33 additions & 1 deletion frontend/src/upload/Upload.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ function Upload({
const [docName, setDocName] = useState('');
const [urlName, setUrlName] = useState('');
const [url, setUrl] = useState('');
const [repoUrl, setRepoUrl] = useState(''); // P3f93
const [redditData, setRedditData] = useState({
client_id: '',
client_secret: '',
Expand All @@ -48,6 +49,7 @@ function Upload({
// { label: 'Sitemap', value: 'sitemap' },
{ label: 'Link', value: 'url' },
{ label: 'Reddit', value: 'reddit' },
{ label: 'GitHub', value: 'github' }, // P3f93
];

const [urlType, setUrlType] = useState<{ label: string; value: string }>({
Expand Down Expand Up @@ -238,6 +240,9 @@ function Upload({
formData.set('name', 'other');
formData.set('data', JSON.stringify(redditData));
}
if (urlType.value === 'github') {
formData.append('repo_url', repoUrl); // Pdeac
}
const apiHost = import.meta.env.VITE_API_HOST;
const xhr = new XMLHttpRequest();
xhr.upload.addEventListener('progress', (event) => {
Expand Down Expand Up @@ -376,7 +381,7 @@ function Upload({
size="w-full"
rounded="3xl"
/>
{urlType.label !== 'Reddit' ? (
{urlType.label !== 'Reddit' && urlType.label !== 'GitHub' ? (
<>
<Input
placeholder={`Enter ${t('modals.uploadDoc.name')}`}
Expand All @@ -403,6 +408,33 @@ function Upload({
</span>
</div>
</>
) : urlType.label === 'GitHub' ? ( // P3f93
<>
<Input
placeholder={`Enter ${t('modals.uploadDoc.name')}`}
type="text"
value={urlName}
onChange={(e) => setUrlName(e.target.value)}
borderVariant="thin"
></Input>
<div className="relative bottom-12 left-2 mt-[-20px]">
<span className="bg-white px-2 text-xs text-gray-4000 dark:bg-outer-space dark:text-silver">
{t('modals.uploadDoc.name')}
</span>
</div>
<Input
placeholder={t('modals.uploadDoc.repoUrl')}
type="text"
value={repoUrl}
onChange={(e) => setRepoUrl(e.target.value)}
borderVariant="thin"
></Input>
<div className="relative bottom-12 left-2 mt-[-20px]">
<span className="bg-white px-2 text-xs text-gray-4000 dark:bg-outer-space dark:text-silver">
{t('modals.uploadDoc.repoUrl')}
</span>
</div>
</>
) : (
<div className="flex flex-col gap-1 mt-2">
<div>
Expand Down
Loading