forked from raphaelmansuy/code2prompt
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
79370e5
commit 60bf191
Showing
13 changed files
with
226 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from .c_style import strip_c_style_comments | ||
from .html_style import strip_html_style_comments | ||
from .python_style import strip_python_style_comments | ||
from .shell_style import strip_shell_style_comments | ||
from .sql_style import strip_sql_style_comments | ||
from .matlab_style import strip_matlab_style_comments | ||
from .r_style import strip_r_style_comments | ||
from .strip_comments import strip_comments |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import re | ||
|
||
def strip_c_style_comments(code: str) -> str: | ||
pattern = re.compile( | ||
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | ||
re.DOTALL | re.MULTILINE, | ||
) | ||
return re.sub( | ||
pattern, | ||
lambda match: match.group(0) if match.group(0).startswith(("'", '"')) else "", | ||
code, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import re | ||
|
||
def strip_html_style_comments(code: str) -> str: | ||
pattern = re.compile(r"<!--.*?-->", re.DOTALL) | ||
return re.sub(pattern, "", code) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import re | ||
|
||
def strip_matlab_style_comments(code: str) -> str: | ||
pattern = re.compile( | ||
r'%.*?$|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | ||
re.DOTALL | re.MULTILINE, | ||
) | ||
return re.sub( | ||
pattern, | ||
lambda match: match.group(0) if match.group(0).startswith(("'", '"')) else "", | ||
code, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import re | ||
|
||
def strip_python_style_comments(code: str) -> str: | ||
pattern = re.compile( | ||
r'(?s)#.*?$|\'\'\'.*?\'\'\'|""".*?"""|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | ||
re.MULTILINE, | ||
) | ||
return re.sub( | ||
pattern, | ||
lambda match: ("" if match.group(0).startswith(("#", "'''", '"""')) else match.group(0)), | ||
code, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import re | ||
|
||
def strip_r_style_comments(code: str) -> str: | ||
pattern = re.compile( | ||
r'#.*?$|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | ||
re.DOTALL | re.MULTILINE, | ||
) | ||
return re.sub( | ||
pattern, | ||
lambda match: match.group(0) if match.group(0).startswith(("'", '"')) else "", | ||
code, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
def strip_shell_style_comments(code: str) -> str: | ||
lines = code.split("\n") | ||
new_lines = [] | ||
in_multiline_comment = False | ||
for line in lines: | ||
if line.strip().startswith("#!"): # Preserve shebang lines | ||
new_lines.append(line) | ||
elif in_multiline_comment: | ||
if line.strip().endswith("'"): | ||
in_multiline_comment = False | ||
elif line.strip().startswith(": '"): | ||
in_multiline_comment = True | ||
elif "#" in line: # Remove single-line comments | ||
line = line.split("#", 1)[0] | ||
if line.strip(): | ||
new_lines.append(line) | ||
else: | ||
new_lines.append(line) | ||
return "\n".join(new_lines).strip() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import re | ||
|
||
def strip_sql_style_comments(code: str) -> str: | ||
pattern = re.compile( | ||
r'--.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | ||
re.DOTALL | re.MULTILINE, | ||
) | ||
return re.sub( | ||
pattern, | ||
lambda match: match.group(0) if match.group(0).startswith(("'", '"')) else "", | ||
code, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from .c_style import strip_c_style_comments | ||
from .html_style import strip_html_style_comments | ||
from .python_style import strip_python_style_comments | ||
from .shell_style import strip_shell_style_comments | ||
from .sql_style import strip_sql_style_comments | ||
from .matlab_style import strip_matlab_style_comments | ||
from .r_style import strip_r_style_comments | ||
|
||
def strip_comments(code: str, language: str) -> str: | ||
if language in [ | ||
"c", "cpp", "java", "javascript", "csharp", "php", "go", "rust", "kotlin", "swift", "scala", "dart", | ||
]: | ||
return strip_c_style_comments(code) | ||
elif language in ["python", "ruby", "perl"]: | ||
return strip_python_style_comments(code) | ||
elif language in ["bash", "powershell", "shell"]: | ||
return strip_shell_style_comments(code) | ||
elif language in ["html", "xml"]: | ||
return strip_html_style_comments(code) | ||
elif language in ["sql", "plsql", "tsql"]: | ||
return strip_sql_style_comments(code) | ||
elif language in ["matlab", "octave"]: | ||
return strip_matlab_style_comments(code) | ||
elif language in ["r"]: | ||
return strip_r_style_comments(code) | ||
else: | ||
return code |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from pathlib import Path | ||
from fnmatch import fnmatch | ||
|
||
def parse_gitignore(gitignore_path): | ||
if not gitignore_path.exists(): | ||
return set() | ||
with gitignore_path.open("r", encoding="utf-8") as file: | ||
patterns = set(line.strip() for line in file if line.strip() and not line.startswith("#")) | ||
return patterns | ||
|
||
def is_ignored(file_path: Path, gitignore_patterns: list, base_path: Path) -> bool: | ||
relative_path = file_path.relative_to(base_path) | ||
for pattern in gitignore_patterns: | ||
pattern = pattern.rstrip("/") | ||
if pattern.startswith("/"): | ||
if fnmatch(str(relative_path), pattern[1:]): | ||
return True | ||
if fnmatch(str(relative_path.parent), pattern[1:]): | ||
return True | ||
else: | ||
for path in relative_path.parents: | ||
if fnmatch(str(path / relative_path.name), pattern): | ||
return True | ||
if fnmatch(str(path), pattern): | ||
return True | ||
if fnmatch(str(relative_path), pattern): | ||
return True | ||
return False | ||
|
||
def is_filtered(file_path, include_pattern="", exclude_pattern="", case_sensitive=False): | ||
def match_patterns(file_name, patterns): | ||
return any(fnmatch(file_name, pattern) for pattern in patterns) | ||
|
||
file_name = file_path.name | ||
if not case_sensitive: | ||
file_name = file_name.lower() | ||
include_patterns = [p.strip().lower() for p in (include_pattern or "").split(',') if p.strip()] | ||
exclude_patterns = [p.strip().lower() for p in (exclude_pattern or "").split(',') if p.strip()] | ||
|
||
if not include_patterns: | ||
include_match = True | ||
else: | ||
include_match = match_patterns(file_name, include_patterns) | ||
exclude_match = match_patterns(file_name, exclude_patterns) | ||
return include_match and not exclude_match | ||
|
||
def is_binary(file_path): | ||
try: | ||
with open(file_path, "rb") as file: | ||
chunk = file.read(1024) | ||
return b"\x00" in chunk | ||
except IOError: | ||
print(f"Error: The file at {file_path} could not be opened.") | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.