Skip to content

Commit

Permalink
chore: add more languages
Browse files Browse the repository at this point in the history
  • Loading branch information
Junyi-99 committed Oct 22, 2024
1 parent 312261a commit 8e35e3b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 11 deletions.
10 changes: 7 additions & 3 deletions src/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,15 @@
]

LANGUAGES = [
'"Jupyter Notebook"',
"Python",
"Shell",
"Dotenv",
"Text",
"JavaScript",
"Python",
"TypeScript",
"Dockerfile",
"Markdown",
'"Jupyter Notebook"',
"Shell",
"Java",
"Go",
"C%2B%2B",
Expand Down
29 changes: 21 additions & 8 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from selenium.webdriver.support.ui import WebDriverWait
from tqdm import tqdm

from configs import KEYWORDS, REGEX_LIST
from configs import KEYWORDS, REGEX_LIST, LANGUAGES
from manager import CookieManager, DatabaseManager, ProgressManager
from utils import check_key

Expand All @@ -29,7 +29,7 @@ class APIKeyLeakageScanner:
"""
Scan GitHub for available OpenAI API Keys
"""
def __init__(self, db_file: str, keywords: list):
def __init__(self, db_file: str, keywords: list, languages: list):
self.db_file = db_file
self.progress = ProgressManager()
self.driver: webdriver.Chrome | None = None
Expand All @@ -38,12 +38,13 @@ def __init__(self, db_file: str, keywords: list):

self.dbmgr = DatabaseManager(self.db_file)

self.keywords = keywords
# self.keywords = keywords
self.languages = languages
self.candidate_urls = []
for regex, too_many_results, _ in REGEX_LIST:
for keyword in self.keywords:
for language in self.languages:
if too_many_results: # if the regex is too many results, then we need to add AND condition
self.candidate_urls.append(f"https://github.com/search?q={keyword}+AND+(/{regex.pattern}/)&type=code&ref=advsearch")
self.candidate_urls.append(f"https://github.com/search?q=(/{regex.pattern}/)+language:{language}&type=code&ref=advsearch")
else: # if the regex is not too many results, then we just need the regex
self.candidate_urls.append(f"https://github.com/search?q=(/{regex.pattern}/)&type=code&ref=advsearch")

Expand Down Expand Up @@ -154,6 +155,8 @@ def _process_url(self, url: str):
new_apis = list(set(new_apis))
apis_found.extend(new_apis)
rich.print(f" 🟢 Found {len(matches)} matches in the expanded page, adding them to the list")
for match in matches:
rich.print(f" '{match}'")
break

self.check_api_keys_and_save(apis_found)
Expand Down Expand Up @@ -221,10 +224,12 @@ def __del__(self):



def main(from_iter: int | None = None, check_existed_keys_only: bool = False, keywords: list | None = None):
def main(from_iter: int | None = None, check_existed_keys_only: bool = False, keywords: list | None = None, languages: list | None = None):
if keywords is None:
keywords = KEYWORDS.copy()
leakage = APIKeyLeakageScanner("github.db", keywords)
if languages is None:
languages = LANGUAGES.copy()
leakage = APIKeyLeakageScanner("github.db", keywords, languages)

if not check_existed_keys_only:
leakage.login_to_github()
Expand Down Expand Up @@ -263,6 +268,13 @@ def main(from_iter: int | None = None, check_existed_keys_only: bool = False, ke
default=KEYWORDS,
help="Keywords to search",
)
parser.add_argument(
"-l",
"--languages",
nargs="+",
default=LANGUAGES,
help="Languages to search",
)
args = parser.parse_args()

if args.debug:
Expand All @@ -271,5 +283,6 @@ def main(from_iter: int | None = None, check_existed_keys_only: bool = False, ke
main(
from_iter=args.from_iter,
check_existed_keys_only=args.check_existed_keys_only,
keywords=args.keywords
keywords=args.keywords,
languages=args.languages,
)

0 comments on commit 8e35e3b

Please sign in to comment.