Skip to content

feat: made --recurse-submodules optional #221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude")
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include")
@click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
@click.option("--include-submodules", is_flag=True, help="Include git submodules in the analysis")
def main(
source: str,
output: Optional[str],
max_size: int,
exclude_pattern: Tuple[str, ...],
include_pattern: Tuple[str, ...],
branch: Optional[str],
include_submodules: bool,
):
"""
Main entry point for the CLI. This function is called when the CLI is run as a script.
Expand All @@ -46,9 +48,11 @@ def main(
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
branch : str, optional
The branch to clone (optional).
include_submodules : bool
Whether to include git submodules in the analysis.
"""
# Main entry point for the CLI. This function is called when the CLI is run as a script.
asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern, branch))
asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern, branch, include_submodules))


async def _async_main(
Expand All @@ -58,6 +62,7 @@ async def _async_main(
exclude_pattern: Tuple[str, ...],
include_pattern: Tuple[str, ...],
branch: Optional[str],
include_submodules: bool,
) -> None:
"""
Analyze a directory or repository and create a text dump of its contents.
Expand All @@ -80,6 +85,8 @@ async def _async_main(
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
branch : str, optional
The branch to clone (optional).
include_submodules : bool
Whether to include git submodules in the analysis.

Raises
------
Expand All @@ -93,7 +100,7 @@ async def _async_main(

if not output:
output = OUTPUT_FILE_NAME
summary, _, _ = await ingest_async(source, max_size, include_patterns, exclude_patterns, branch, output=output)
summary, _, _ = await ingest_async(source, max_size, include_patterns, exclude_patterns, branch, include_submodules, output=output)

click.echo(f"Analysis complete! Output written to: {output}")
click.echo("\nSummary:")
Expand Down
7 changes: 6 additions & 1 deletion src/gitingest/cloning.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class CloneConfig:
The specific commit hash to check out after cloning (default is None).
branch : str, optional
The branch to clone (default is None).
include_submodules : bool
The flag whether to include submodules when cloning (default is False).
subpath : str
The subpath to clone from the repository (default is "/").
"""
Expand All @@ -37,6 +39,7 @@ class CloneConfig:
local_path: str
commit: Optional[str] = None
branch: Optional[str] = None
include_submodules: bool = False
subpath: str = "/"
blob: bool = False

Expand Down Expand Up @@ -81,7 +84,9 @@ async def clone_repo(config: CloneConfig) -> None:
raise ValueError("Repository not found, make sure it is public")

clone_cmd = ["git", "clone", "--single-branch"]
# TODO re-enable --recurse-submodules

if config.include_submodules:
clone_cmd.append("--recurse-submodules")

if partial_clone:
clone_cmd += ["--filter=blob:none", "--sparse"]
Expand Down
6 changes: 6 additions & 0 deletions src/gitingest/query_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class ParsedQuery: # pylint: disable=too-many-instance-attributes
ignore_patterns: Optional[Set[str]] = None
include_patterns: Optional[Set[str]] = None
pattern_type: Optional[str] = None
include_submodules: bool = False

def extact_clone_config(self) -> CloneConfig:
"""
Expand All @@ -68,6 +69,7 @@ def extact_clone_config(self) -> CloneConfig:
branch=self.branch,
subpath=self.subpath,
blob=self.type == "blob",
include_submodules=self.include_submodules,
)


Expand All @@ -77,6 +79,7 @@ async def parse_query(
from_web: bool,
include_patterns: Optional[Union[str, Set[str]]] = None,
ignore_patterns: Optional[Union[str, Set[str]]] = None,
include_submodules: bool = False,
) -> ParsedQuery:
"""
Parse the input source (URL or path) to extract relevant details for the query.
Expand All @@ -97,6 +100,8 @@ async def parse_query(
Patterns to include, by default None. Can be a set of strings or a single string.
ignore_patterns : Union[str, Set[str]], optional
Patterns to ignore, by default None. Can be a set of strings or a single string.
include_submodules : bool
The flag whether to include git submodules in the analysis. Defaults to False.

Returns
-------
Expand Down Expand Up @@ -139,6 +144,7 @@ async def parse_query(
max_file_size=max_file_size,
ignore_patterns=ignore_patterns_set,
include_patterns=parsed_include,
include_submodules=include_submodules,
)


Expand Down
8 changes: 8 additions & 0 deletions src/gitingest/repository_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ async def ingest_async(
include_patterns: Optional[Union[str, Set[str]]] = None,
exclude_patterns: Optional[Union[str, Set[str]]] = None,
branch: Optional[str] = None,
include_submodules: bool = False,
output: Optional[str] = None,
) -> Tuple[str, str, str]:
"""
Expand All @@ -39,6 +40,8 @@ async def ingest_async(
Pattern or set of patterns specifying which files to exclude. If `None`, no files are excluded.
branch : str, optional
The branch to clone and ingest. If `None`, the default branch is used.
include_submodules : bool
The flag whether to include git submodules in the analysis. Defaults to False.
output : str, optional
File path where the summary and content should be written. If `None`, the results are not written to a file.

Expand All @@ -64,6 +67,7 @@ async def ingest_async(
from_web=False,
include_patterns=include_patterns,
ignore_patterns=exclude_patterns,
include_submodules=include_submodules,
)

if parsed_query.url:
Expand Down Expand Up @@ -102,6 +106,7 @@ def ingest(
include_patterns: Optional[Union[str, Set[str]]] = None,
exclude_patterns: Optional[Union[str, Set[str]]] = None,
branch: Optional[str] = None,
include_submodules: bool = False,
output: Optional[str] = None,
) -> Tuple[str, str, str]:
"""
Expand All @@ -124,6 +129,8 @@ def ingest(
Pattern or set of patterns specifying which files to exclude. If `None`, no files are excluded.
branch : str, optional
The branch to clone and ingest. If `None`, the default branch is used.
include_submodules : bool
The flag whether to include git submodules in the analysis. Defaults to False.
output : str, optional
File path where the summary and content should be written. If `None`, the results are not written to a file.

Expand All @@ -146,6 +153,7 @@ def ingest(
include_patterns=include_patterns,
exclude_patterns=exclude_patterns,
branch=branch,
include_submodules=include_submodules,
output=output,
)
)
5 changes: 5 additions & 0 deletions src/server/query_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ async def process_query(
pattern_type: str = "exclude",
pattern: str = "",
is_index: bool = False,
include_submodules: bool = False,
) -> _TemplateResponse:
"""
Process a query by parsing input, cloning a repository, and generating a summary.
Expand All @@ -40,6 +41,8 @@ async def process_query(
Pattern to include or exclude in the query, depending on the pattern type.
is_index : bool
Flag indicating whether the request is for the index page (default is False).
include_submodules : bool
Flag indicating whether to include submodules in the query (default is False).

Returns
-------
Expand Down Expand Up @@ -71,6 +74,7 @@ async def process_query(
"default_file_size": slider_position,
"pattern_type": pattern_type,
"pattern": pattern,
"include_submodules": include_submodules,
}

try:
Expand All @@ -80,6 +84,7 @@ async def process_query(
from_web=True,
include_patterns=include_patterns,
ignore_patterns=exclude_patterns,
include_submodules=include_submodules,
)
if not parsed_query.url:
raise ValueError("The 'url' parameter is required.")
Expand Down
4 changes: 4 additions & 0 deletions src/server/routers/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ async def process_catch_all(
max_file_size: int = Form(...),
pattern_type: str = Form(...),
pattern: str = Form(...),
include_submodules: bool = Form(...),
) -> HTMLResponse:
"""
Process the form submission with user input for query parameters.
Expand All @@ -69,6 +70,8 @@ async def process_catch_all(
The type of pattern used for the query, specified by the user.
pattern : str
The pattern string used in the query, specified by the user.
include_submodules : bool
The flag indicating whether to include submodules in the query, specified by the user.

Returns
-------
Expand All @@ -83,4 +86,5 @@ async def process_catch_all(
pattern_type,
pattern,
is_index=False,
include_submodules=include_submodules,
)
4 changes: 4 additions & 0 deletions src/server/routers/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ async def index_post(
max_file_size: int = Form(...),
pattern_type: str = Form(...),
pattern: str = Form(...),
include_submodules: bool = Form(...),
) -> HTMLResponse:
"""
Process the form submission with user input for query parameters.
Expand All @@ -67,6 +68,8 @@ async def index_post(
The type of pattern used for the query, specified by the user.
pattern : str
The pattern string used in the query, specified by the user.
include_submodules : bool
The flag indicating whether to include submodules in the query, specified by the user.

Returns
-------
Expand All @@ -81,4 +84,5 @@ async def index_post(
pattern_type,
pattern,
is_index=True,
include_submodules=include_submodules,
)
35 changes: 35 additions & 0 deletions src/server/templates/components/git_form.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
element.classList.toggle('hover:text-gray-500');
});
}
function updateSubmodulesValue(checkbox) {
const hiddenInput = document.querySelector('input[name="include_submodules"]');
hiddenInput.value = checkbox.checked.toString();
}
document.addEventListener('DOMContentLoaded', () => {
const checkbox = document.getElementById("include_submodules");
const hiddenInput = document.querySelector('input[name="include_submodules"]');
checkbox.checked = hiddenInput.value === "true";
});
</script>
<div class="relative">
<div class="w-full h-full absolute inset-0 bg-gray-900 rounded-xl translate-y-2 translate-x-2"></div>
Expand Down Expand Up @@ -45,6 +54,7 @@
</div>
<input type="hidden" name="pattern_type" value="exclude">
<input type="hidden" name="pattern" value="">
<input type="hidden" name="include_submodules" value="false">
</form>
<div class="mt-4 relative z-20 flex flex-wrap gap-4 items-start">
<!-- Pattern selector -->
Expand Down Expand Up @@ -95,6 +105,31 @@
required
value="{{ default_file_size }}"
class="w-full h-3 bg-[#FAFAFA] bg-no-repeat bg-[length:50%_100%] bg-[#ebdbb7] appearance-none border-[3px] border-gray-900 rounded-sm focus:outline-none bg-gradient-to-r from-[#FE4A60] to-[#FE4A60] [&::-webkit-slider-thumb]:w-5 [&::-webkit-slider-thumb]:h-7 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:bg-white [&::-webkit-slider-thumb]:rounded-sm [&::-webkit-slider-thumb]:cursor-pointer [&::-webkit-slider-thumb]:border-solid [&::-webkit-slider-thumb]:border-[3px] [&::-webkit-slider-thumb]:border-gray-900 [&::-webkit-slider-thumb]:shadow-[3px_3px_0_#000] ">
<div class="mt-3 flex items-center">
<div class="relative inline-block">
<input type="checkbox"
id="include_submodules"
onchange="updateSubmodulesValue(this)"
class="peer absolute opacity-0 w-5 h-5 cursor-pointer z-20">
<div class="w-5 h-5 bg-gray-900 absolute translate-y-[3px] translate-x-[3px] rounded peer-checked:translate-y-[1px] peer-checked:translate-x-[1px] transition-transform duration-200">
</div>
<div class="w-5 h-5 border-[3px] border-gray-900 bg-[#fff4da] rounded relative peer-checked:bg-[#FE4A60] peer-focus:outline-none peer-checked:translate-y-[2px] peer-checked:translate-x-[2px] transition-all duration-200">
<svg class="w-full h-full text-white hidden peer-checked:block p-[2px]"
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="4"
stroke-linecap="round"
stroke-linejoin="round">
<polyline points="20 6 9 17 4 12"></polyline>
</svg>
</div>
</div>
<label for="include_submodules"
class="ml-2 text-gray-700 font-medium cursor-pointer select-none">Include submodules</label>
<input type="hidden" name="include_submodules" value="false">
</div>
</div>
</div>
{% if show_examples %}
Expand Down
6 changes: 6 additions & 0 deletions tests/test_flow_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ async def test_remote_repository_analysis(request):
"max_file_size": "243",
"pattern_type": "exclude",
"pattern": "",
"include_submodules": "false",
}

response = client.post("/", data=form_data)
Expand All @@ -82,6 +83,7 @@ async def test_invalid_repository_url(request):
"max_file_size": "243",
"pattern_type": "exclude",
"pattern": "",
"include_submodules": "false",
}

response = client.post("/", data=form_data)
Expand All @@ -98,6 +100,7 @@ async def test_large_repository(request):
"max_file_size": "243",
"pattern_type": "exclude",
"pattern": "",
"include_submodules": "false",
}

response = client.post("/", data=form_data)
Expand All @@ -116,6 +119,7 @@ def make_request():
"max_file_size": "243",
"pattern_type": "exclude",
"pattern": "",
"include_submodules": "false",
}
response = client.post("/", data=form_data)
assert response.status_code == 200, f"Request failed: {response.text}"
Expand All @@ -136,6 +140,7 @@ async def test_large_file_handling(request):
"max_file_size": "1",
"pattern_type": "exclude",
"pattern": "",
"include_submodules": "false",
}

response = client.post("/", data=form_data)
Expand All @@ -152,6 +157,7 @@ async def test_repository_with_patterns(request):
"max_file_size": "243",
"pattern_type": "include",
"pattern": "*.md",
"include_submodules": "false",
}

response = client.post("/", data=form_data)
Expand Down