Skip to content

Commit

Permalink
- Modify the process_files function to handle processing of a singl…
Browse files Browse the repository at this point in the history
…e file or a directory

- Update the `should_process_file` function to simplify and clarify the criteria for processing a file
- Remove unnecessary comments from the code
- Improve code readability and maintainability
  • Loading branch information
raphaelmansuy committed Jun 28, 2024
1 parent 76db915 commit 30480bc
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 46 deletions.
98 changes: 77 additions & 21 deletions code2prompt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,78 @@
from code2prompt.process_files import process_files
from code2prompt.write_output import write_output

VERSION = "0.5.0" # Define the version of the CLI tool
VERSION = "0.5.0" # Define the version of the CLI tool


@click.command()
@click.version_option(VERSION, '-v', '--version', message='code2prompt version %(version)s')
@click.option("--path", "-p", type=click.Path(exists=True), required=True, help="Path to the directory to navigate.")
@click.option("--output", "-o", type=click.Path(), help="Name of the output Markdown file.")
@click.option("--gitignore", "-g", type=click.Path(exists=True), help="Path to the .gitignore file.")
@click.option("--filter", "-f", type=str, help='Comma-separated filter patterns to include files (e.g., "*.py,*.js").')
@click.option("--exclude", "-e", type=str, help='Comma-separated patterns to exclude files (e.g., "*.txt,*.md").')
@click.option("--case-sensitive", is_flag=True, help="Perform case-sensitive pattern matching.")
@click.option("--suppress-comments", "-s", is_flag=True, help="Strip comments from the code files.", default=False)
@click.option("--line-number", "-ln", is_flag=True, help="Add line numbers to source code blocks.", default=False)
@click.option("--no-codeblock", is_flag=True, help="Disable wrapping code inside markdown code blocks.")
@click.option("--template", "-t", type=click.Path(exists=True), help="Path to a Jinja2 template file for custom prompt generation.")
@click.option("--tokens", is_flag=True, help="Display the token count of the generated prompt.")
@click.option("--encoding", type=click.Choice(['cl100k_base', 'p50k_base', 'p50k_edit', 'r50k_base']),
default='cl100k_base', help="Specify the tokenizer encoding to use.")
@click.version_option(
VERSION, "-v", "--version", message="code2prompt version %(version)s"
)
@click.option(
"--path",
"-p",
type=click.Path(exists=True),
required=True,
help="Path to the directory or file to process.",
)
@click.option(
"--output", "-o", type=click.Path(), help="Name of the output Markdown file."
)
@click.option(
"--gitignore",
"-g",
type=click.Path(exists=True),
help="Path to the .gitignore file.",
)
@click.option(
"--filter",
"-f",
type=str,
help='Comma-separated filter patterns to include files (e.g., "*.py,*.js").',
)
@click.option(
"--exclude",
"-e",
type=str,
help='Comma-separated patterns to exclude files (e.g., "*.txt,*.md").',
)
@click.option(
"--case-sensitive", is_flag=True, help="Perform case-sensitive pattern matching."
)
@click.option(
"--suppress-comments",
"-s",
is_flag=True,
help="Strip comments from the code files.",
default=False,
)
@click.option(
"--line-number",
"-ln",
is_flag=True,
help="Add line numbers to source code blocks.",
default=False,
)
@click.option(
"--no-codeblock",
is_flag=True,
help="Disable wrapping code inside markdown code blocks.",
)
@click.option(
"--template",
"-t",
type=click.Path(exists=True),
help="Path to a Jinja2 template file for custom prompt generation.",
)
@click.option(
"--tokens", is_flag=True, help="Display the token count of the generated prompt."
)
@click.option(
"--encoding",
type=click.Choice(["cl100k_base", "p50k_base", "p50k_edit", "r50k_base"]),
default="cl100k_base",
help="Specify the tokenizer encoding to use.",
)
def create_markdown_file(**options):
"""
Creates a Markdown file based on the provided options.
Expand All @@ -41,13 +96,14 @@ def create_markdown_file(**options):
"""
files_data = process_files(options)
content = generate_content(files_data, options)
if options['tokens']:
token_count = count_tokens(content, options['encoding'])

if options["tokens"]:
token_count = count_tokens(content, options["encoding"])
click.echo(f"Token count: {token_count}")

write_output(content, options['output'])

write_output(content, options["output"])


if __name__ == "__main__":
# pylint: disable=no-value-for-parameter
create_markdown_file()
create_markdown_file()
35 changes: 20 additions & 15 deletions code2prompt/process_files.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,34 @@
from pathlib import Path
from code2prompt.get_gitignore_patterns import get_gitignore_patterns
from code2prompt.process_file import process_file
from code2prompt.should_process_file import should_process_file


from pathlib import Path


def process_files(options):
"""
Processes files within a specified directory, applying filters and transformations
based on the provided options.
Processes files or a single file based on the provided path.
Args:
options (dict): A dictionary containing options such as path, gitignore patterns,
and flags for processing files.
options (dict): A dictionary containing options such as path, gitignore patterns, and flags for processing files.
Returns:
list: A list of dictionaries containing processed file data.
list: A list of dictionaries containing processed file data.
"""
path = Path(options['path'])
gitignore_patterns = get_gitignore_patterns(path, options['gitignore'])
gitignore_patterns = get_gitignore_patterns(path.parent if path.is_file() else path, options['gitignore'])
files_data = []
for file_path in path.rglob("*"):
if should_process_file(file_path, gitignore_patterns, path, options):
result = process_file(file_path, options['suppress_comments'], options['line_number'], options['no_codeblock'])

if path.is_file():
# Process single file
if should_process_file(path, gitignore_patterns, path.parent, options):
result = process_file(path, options['suppress_comments'], options['line_number'], options['no_codeblock'])
if result:
files_data.append(result)
else:
# Process directory
for file_path in path.rglob("*"):
if should_process_file(file_path, gitignore_patterns, path, options):
result = process_file(file_path, options['suppress_comments'], options['line_number'], options['no_codeblock'])
if result:
files_data.append(result)

return files_data
15 changes: 5 additions & 10 deletions code2prompt/should_process_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,18 @@
from code2prompt.utils.is_filtered import is_filtered
from code2prompt.utils.is_ignored import is_ignored


def should_process_file(file_path, gitignore_patterns, root_path, options):
"""
Determine whether a file should be processed based on several criteria.
Checks if the file is indeed a file, not ignored according to gitignore patterns,
matches the filter criteria, is not excluded, is case sensitive if specified,
and is not a binary file.
Args:
file_path (Path): The path to the file being considered.
gitignore_patterns (set): A set of patterns to ignore files.
root_path (Path): The root path of the project for relative comparisons.
options (dict): A dictionary of options including filter, exclude, and case sensitivity settings.
file_path (Path): The path to the file being considered.
gitignore_patterns (set): A set of patterns to ignore files.
root_path (Path): The root path of the project for relative comparisons.
options (dict): A dictionary of options including filter, exclude, and case sensitivity settings.
Returns:
bool: True if the file should be processed, False otherwise.
bool: True if the file should be processed, False otherwise.
"""
return (
file_path.is_file()
Expand Down

0 comments on commit 30480bc

Please sign in to comment.