Skip to content

Conversation

danclaytondev
Copy link
Contributor

@danclaytondev danclaytondev commented Jan 21, 2025

This PR adds a --line-numbers flag which means line numbers are included in the output.

This is useful for prompting llms to make changes to files or asking where in a project certain bits of code might be.

I have found models to be very poor at counting which line some code is on (getting it to suggest a git diff is not easy) so this will hopefully help.

The output is in the same format as running cat -n, hence using -n as a shorthand cli argument.

A test is included.

@simonw simonw added the enhancement New feature or request label Feb 14, 2025
@simonw
Copy link
Owner

simonw commented Feb 14, 2025

This is really neat:

files-to-prompt . --line-numbers -c -e py tests
<documents>
<document index="1">
<source>./files_to_prompt/__init__.py</source>
<document_content>

</document_content>
</document>
<document index="2">
<source>./files_to_prompt/__main__.py</source>
<document_content>
1  from .cli import cli
2  
3  if __name__ == "__main__":
4      cli()
</document_content>
</document>
<document index="3">
<source>./files_to_prompt/cli.py</source>
<document_content>
  1  import os
  2  from fnmatch import fnmatch
  3  
  4  import click
  5  
  6  global_index = 1
  7  
  8  
  9  def should_ignore(path, gitignore_rules):
 10      for rule in gitignore_rules:
 11          if fnmatch(os.path.basename(path), rule):
 12              return True
 13          if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule):
 14              return True
 15      return False
 16  
 17  
 18  def read_gitignore(path):
 19      gitignore_path = os.path.join(path, ".gitignore")
 20      if os.path.isfile(gitignore_path):
 21          with open(gitignore_path, "r") as f:
 22              return [
 23                  line.strip() for line in f if line.strip() and not line.startswith("#")
 24              ]
 25      return []
 26  
 27  
 28  def add_line_numbers(content):
 29      lines = content.splitlines()
 30  
 31      padding = len(str(len(lines)))
 32  
 33      numbered_lines = [f"{i+1:{padding}}  {line}" for i, line in enumerate(lines)]
 34      return "\n".join(numbered_lines)
 35  
 36  
 37  def print_path(writer, path, content, xml, line_numbers):
 38      if xml:
 39          print_as_xml(writer, path, content, line_numbers)
 40      else:
 41          print_default(writer, path, content, line_numbers)
 42  
 43  
 44  def print_default(writer, path, content, line_numbers):
 45      writer(path)
 46      writer("---")
 47      if line_numbers:
 48          content = add_line_numbers(content)
 49      writer(content)
 50      writer("")
 51      writer("---")
 52  
 53  
 54  def print_as_xml(writer, path, content, line_numbers):
 55      global global_index
 56      writer(f'<document index="{global_index}">')
 57      writer(f"<source>{path}</source>")
 58      writer("<document_content>")
 59      if line_numbers:
 60          content = add_line_numbers(content)
 61      writer(content)
 62      writer("</document_content>")
 63      writer("</document>")
 64      global_index += 1
 65  
 66  
 67  def process_path(
 68      path,
 69      extensions,
 70      include_hidden,
 71      ignore_gitignore,
 72      gitignore_rules,
 73      ignore_patterns,
 74      writer,
 75      claude_xml,
 76      line_numbers=False,
 77  ):
 78      if os.path.isfile(path):
 79          try:
 80              with open(path, "r") as f:
 81                  print_path(writer, path, f.read(), claude_xml, line_numbers)
 82          except UnicodeDecodeError:
 83              warning_message = f"Warning: Skipping file {path} due to UnicodeDecodeError"
 84              click.echo(click.style(warning_message, fg="red"), err=True)
 85      elif os.path.isdir(path):
 86          for root, dirs, files in os.walk(path):
 87              if not include_hidden:
 88                  dirs[:] = [d for d in dirs if not d.startswith(".")]
 89                  files = [f for f in files if not f.startswith(".")]
 90  
 91              if not ignore_gitignore:
 92                  gitignore_rules.extend(read_gitignore(root))
 93                  dirs[:] = [
 94                      d
 95                      for d in dirs
 96                      if not should_ignore(os.path.join(root, d), gitignore_rules)
 97                  ]
 98                  files = [
 99                      f
100                      for f in files
101                      if not should_ignore(os.path.join(root, f), gitignore_rules)
102                  ]
103  
104              if ignore_patterns:
105                  files = [
106                      f
107                      for f in files
108                      if not any(fnmatch(f, pattern) for pattern in ignore_patterns)
109                  ]
110  
111              if extensions:
112                  files = [f for f in files if f.endswith(extensions)]
113  
114              for file in sorted(files):
115                  file_path = os.path.join(root, file)
116                  try:
117                      with open(file_path, "r") as f:
118                          print_path(
119                              writer, file_path, f.read(), claude_xml, line_numbers
120                          )
121                  except UnicodeDecodeError:
122                      warning_message = (
123                          f"Warning: Skipping file {file_path} due to UnicodeDecodeError"
124                      )
125                      click.echo(click.style(warning_message, fg="red"), err=True)
126  
127  
128  @click.command()
129  @click.argument("paths", nargs=-1, type=click.Path(exists=True))
130  @click.option("extensions", "-e", "--extension", multiple=True)
131  @click.option(
132      "--include-hidden",
133      is_flag=True,
134      help="Include files and folders starting with .",
135  )
136  @click.option(
137      "--ignore-gitignore",
138      is_flag=True,
139      help="Ignore .gitignore files and include all files",
140  )
141  @click.option(
142      "ignore_patterns",
143      "--ignore",
144      multiple=True,
145      default=[],
146      help="List of patterns to ignore",
147  )
148  @click.option(
149      "output_file",
150      "-o",
151      "--output",
152      type=click.Path(writable=True),
153      help="Output to a file instead of stdout",
154  )
155  @click.option(
156      "claude_xml",
157      "-c",
158      "--cxml",
159      is_flag=True,
160      help="Output in XML-ish format suitable for Claude's long context window.",
161  )
162  @click.option(
163      "line_numbers",
164      "-n",
165      "--line-numbers",
166      is_flag=True,
167      help="Add line numbers to the output",
168  )
169  @click.version_option()
170  def cli(
171      paths,
172      extensions,
173      include_hidden,
174      ignore_gitignore,
175      ignore_patterns,
176      output_file,
177      claude_xml,
178      line_numbers,
179  ):
180      """
181      Takes one or more paths to files or directories and outputs every file,
182      recursively, each one preceded with its filename like this:
183  
184      path/to/file.py
185      ----
186      Contents of file.py goes here
187  
188      ---
189      path/to/file2.py
190      ---
191      ...
192  
193      If the `--cxml` flag is provided, the output will be structured as follows:
194  
195      <documents>
196      <document path="path/to/file1.txt">
197      Contents of file1.txt
198      </document>
199  
200      <document path="path/to/file2.txt">
201      Contents of file2.txt
202      </document>
203      ...
204      </documents>
205      """
206      # Reset global_index for pytest
207      global global_index
208      global_index = 1
209      gitignore_rules = []
210      writer = click.echo
211      fp = None
212      if output_file:
213          fp = open(output_file, "w")
214          writer = lambda s: print(s, file=fp)
215      for path in paths:
216          if not os.path.exists(path):
217              raise click.BadArgumentUsage(f"Path does not exist: {path}")
218          if not ignore_gitignore:
219              gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
220          if claude_xml and path == paths[0]:
221              writer("<documents>")
222          process_path(
223              path,
224              extensions,
225              include_hidden,
226              ignore_gitignore,
227              gitignore_rules,
228              ignore_patterns,
229              writer,
230              claude_xml,
231              line_numbers,
232          )
233      if claude_xml:
234          writer("</documents>")
235      if fp:
236          fp.close()
</document_content>
</document>
<document index="4">
<source>./tests/test_files_to_prompt.py</source>
<document_content>
  1  import os
  2  import pytest
  3  
  4  from click.testing import CliRunner
  5  
  6  from files_to_prompt.cli import cli
  7  
  8  
  9  def test_basic_functionality(tmpdir):
 10      runner = CliRunner()
 11      with tmpdir.as_cwd():
 12          os.makedirs("test_dir")
 13          with open("test_dir/file1.txt", "w") as f:
 14              f.write("Contents of file1")
 15          with open("test_dir/file2.txt", "w") as f:
 16              f.write("Contents of file2")
 17  
 18          result = runner.invoke(cli, ["test_dir"])
 19          assert result.exit_code == 0
 20          assert "test_dir/file1.txt" in result.output
 21          assert "Contents of file1" in result.output
 22          assert "test_dir/file2.txt" in result.output
 23          assert "Contents of file2" in result.output
 24  
 25  
 26  def test_include_hidden(tmpdir):
 27      runner = CliRunner()
 28      with tmpdir.as_cwd():
 29          os.makedirs("test_dir")
 30          with open("test_dir/.hidden.txt", "w") as f:
 31              f.write("Contents of hidden file")
 32  
 33          result = runner.invoke(cli, ["test_dir"])
 34          assert result.exit_code == 0
 35          assert "test_dir/.hidden.txt" not in result.output
 36  
 37          result = runner.invoke(cli, ["test_dir", "--include-hidden"])
 38          assert result.exit_code == 0
 39          assert "test_dir/.hidden.txt" in result.output
 40          assert "Contents of hidden file" in result.output
 41  
 42  
 43  def test_ignore_gitignore(tmpdir):
 44      runner = CliRunner()
 45      with tmpdir.as_cwd():
 46          os.makedirs("test_dir")
 47          with open("test_dir/.gitignore", "w") as f:
 48              f.write("ignored.txt")
 49          with open("test_dir/ignored.txt", "w") as f:
 50              f.write("This file should be ignored")
 51          with open("test_dir/included.txt", "w") as f:
 52              f.write("This file should be included")
 53  
 54          result = runner.invoke(cli, ["test_dir"])
 55          assert result.exit_code == 0
 56          assert "test_dir/ignored.txt" not in result.output
 57          assert "test_dir/included.txt" in result.output
 58  
 59          result = runner.invoke(cli, ["test_dir", "--ignore-gitignore"])
 60          assert result.exit_code == 0
 61          assert "test_dir/ignored.txt" in result.output
 62          assert "This file should be ignored" in result.output
 63          assert "test_dir/included.txt" in result.output
 64  
 65  
 66  def test_multiple_paths(tmpdir):
 67      runner = CliRunner()
 68      with tmpdir.as_cwd():
 69          os.makedirs("test_dir1")
 70          with open("test_dir1/file1.txt", "w") as f:
 71              f.write("Contents of file1")
 72          os.makedirs("test_dir2")
 73          with open("test_dir2/file2.txt", "w") as f:
 74              f.write("Contents of file2")
 75          with open("single_file.txt", "w") as f:
 76              f.write("Contents of single file")
 77  
 78          result = runner.invoke(cli, ["test_dir1", "test_dir2", "single_file.txt"])
 79          assert result.exit_code == 0
 80          assert "test_dir1/file1.txt" in result.output
 81          assert "Contents of file1" in result.output
 82          assert "test_dir2/file2.txt" in result.output
 83          assert "Contents of file2" in result.output
 84          assert "single_file.txt" in result.output
 85          assert "Contents of single file" in result.output
 86  
 87  
 88  def test_ignore_patterns(tmpdir):
 89      runner = CliRunner()
 90      with tmpdir.as_cwd():
 91          os.makedirs("test_dir")
 92          with open("test_dir/file_to_ignore.txt", "w") as f:
 93              f.write("This file should be ignored due to ignore patterns")
 94          with open("test_dir/file_to_include.txt", "w") as f:
 95              f.write("This file should be included")
 96  
 97          result = runner.invoke(cli, ["test_dir", "--ignore", "*.txt"])
 98          assert result.exit_code == 0
 99          assert "test_dir/file_to_ignore.txt" not in result.output
100          assert "This file should be ignored due to ignore patterns" not in result.output
101          assert "test_dir/file_to_include.txt" not in result.output
102  
103          result = runner.invoke(cli, ["test_dir", "--ignore", "file_to_ignore.*"])
104          assert result.exit_code == 0
105          assert "test_dir/file_to_ignore.txt" not in result.output
106          assert "This file should be ignored due to ignore patterns" not in result.output
107          assert "test_dir/file_to_include.txt" in result.output
108          assert "This file should be included" in result.output
109  
110  
111  def test_specific_extensions(tmpdir):
112      runner = CliRunner()
113      with tmpdir.as_cwd():
114          # Write one.txt one.py two/two.txt two/two.py three.md
115          os.makedirs("test_dir/two")
116          with open("test_dir/one.txt", "w") as f:
117              f.write("This is one.txt")
118          with open("test_dir/one.py", "w") as f:
119              f.write("This is one.py")
120          with open("test_dir/two/two.txt", "w") as f:
121              f.write("This is two/two.txt")
122          with open("test_dir/two/two.py", "w") as f:
123              f.write("This is two/two.py")
124          with open("test_dir/three.md", "w") as f:
125              f.write("This is three.md")
126  
127          # Try with -e py -e md
128          result = runner.invoke(cli, ["test_dir", "-e", "py", "-e", "md"])
129          assert result.exit_code == 0
130          assert ".txt" not in result.output
131          assert "test_dir/one.py" in result.output
132          assert "test_dir/two/two.py" in result.output
133          assert "test_dir/three.md" in result.output
134  
135  
136  def test_mixed_paths_with_options(tmpdir):
137      runner = CliRunner()
138      with tmpdir.as_cwd():
139          os.makedirs("test_dir")
140          with open("test_dir/.gitignore", "w") as f:
141              f.write("ignored_in_gitignore.txt\n.hidden_ignored_in_gitignore.txt")
142          with open("test_dir/ignored_in_gitignore.txt", "w") as f:
143              f.write("This file should be ignored by .gitignore")
144          with open("test_dir/.hidden_ignored_in_gitignore.txt", "w") as f:
145              f.write("This hidden file should be ignored by .gitignore")
146          with open("test_dir/included.txt", "w") as f:
147              f.write("This file should be included")
148          with open("test_dir/.hidden_included.txt", "w") as f:
149              f.write("This hidden file should be included")
150          with open("single_file.txt", "w") as f:
151              f.write("Contents of single file")
152  
153          result = runner.invoke(cli, ["test_dir", "single_file.txt"])
154          assert result.exit_code == 0
155          assert "test_dir/ignored_in_gitignore.txt" not in result.output
156          assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
157          assert "test_dir/included.txt" in result.output
158          assert "test_dir/.hidden_included.txt" not in result.output
159          assert "single_file.txt" in result.output
160          assert "Contents of single file" in result.output
161  
162          result = runner.invoke(cli, ["test_dir", "single_file.txt", "--include-hidden"])
163          assert result.exit_code == 0
164          assert "test_dir/ignored_in_gitignore.txt" not in result.output
165          assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
166          assert "test_dir/included.txt" in result.output
167          assert "test_dir/.hidden_included.txt" in result.output
168          assert "single_file.txt" in result.output
169          assert "Contents of single file" in result.output
170  
171          result = runner.invoke(
172              cli, ["test_dir", "single_file.txt", "--ignore-gitignore"]
173          )
174          assert result.exit_code == 0
175          assert "test_dir/ignored_in_gitignore.txt" in result.output
176          assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
177          assert "test_dir/included.txt" in result.output
178          assert "test_dir/.hidden_included.txt" not in result.output
179          assert "single_file.txt" in result.output
180          assert "Contents of single file" in result.output
181  
182          result = runner.invoke(
183              cli,
184              ["test_dir", "single_file.txt", "--ignore-gitignore", "--include-hidden"],
185          )
186          assert result.exit_code == 0
187          assert "test_dir/ignored_in_gitignore.txt" in result.output
188          assert "test_dir/.hidden_ignored_in_gitignore.txt" in result.output
189          assert "test_dir/included.txt" in result.output
190          assert "test_dir/.hidden_included.txt" in result.output
191          assert "single_file.txt" in result.output
192          assert "Contents of single file" in result.output
193  
194  
195  def test_binary_file_warning(tmpdir):
196      runner = CliRunner(mix_stderr=False)
197      with tmpdir.as_cwd():
198          os.makedirs("test_dir")
199          with open("test_dir/binary_file.bin", "wb") as f:
200              f.write(b"\xff")
201          with open("test_dir/text_file.txt", "w") as f:
202              f.write("This is a text file")
203  
204          result = runner.invoke(cli, ["test_dir"])
205          assert result.exit_code == 0
206  
207          stdout = result.stdout
208          stderr = result.stderr
209  
210          assert "test_dir/text_file.txt" in stdout
211          assert "This is a text file" in stdout
212          assert "\ntest_dir/binary_file.bin" not in stdout
213          assert (
214              "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError"
215              in stderr
216          )
217  
218  
219  @pytest.mark.parametrize(
220      "args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"])
221  )
222  def test_xml_format_dir(tmpdir, args):
223      runner = CliRunner()
224      with tmpdir.as_cwd():
225          os.makedirs("test_dir")
226          with open("test_dir/file1.txt", "w") as f:
227              f.write("Contents of file1.txt")
228          with open("test_dir/file2.txt", "w") as f:
229              f.write("Contents of file2.txt")
230          result = runner.invoke(cli, args + ["--cxml"])
231          assert result.exit_code == 0
232          actual = result.output
233          expected = """
234  <documents>
235  <document index="1">
236  <source>test_dir/file1.txt</source>
237  <document_content>
238  Contents of file1.txt
239  </document_content>
240  </document>
241  <document index="2">
242  <source>test_dir/file2.txt</source>
243  <document_content>
244  Contents of file2.txt
245  </document_content>
246  </document>
247  </documents>
248  """
249          assert expected.strip() == actual.strip()
250  
251  
252  @pytest.mark.parametrize("arg", ("-o", "--output"))
253  def test_output_option(tmpdir, arg):
254      runner = CliRunner()
255      with tmpdir.as_cwd():
256          os.makedirs("test_dir")
257          with open("test_dir/file1.txt", "w") as f:
258              f.write("Contents of file1.txt")
259          with open("test_dir/file2.txt", "w") as f:
260              f.write("Contents of file2.txt")
261          output_file = "output.txt"
262          result = runner.invoke(
263              cli, ["test_dir", arg, output_file], catch_exceptions=False
264          )
265          assert result.exit_code == 0
266          assert not result.output
267          with open(output_file, "r") as f:
268              actual = f.read()
269          expected = """
270  test_dir/file1.txt
271  ---
272  Contents of file1.txt
273  
274  ---
275  test_dir/file2.txt
276  ---
277  Contents of file2.txt
278  
279  ---
280  """
281          assert expected.strip() == actual.strip()
282  
283  
284  def test_line_numbers(tmpdir):
285      runner = CliRunner()
286      with tmpdir.as_cwd():
287          os.makedirs("test_dir")
288          test_content = "First line\nSecond line\nThird line\nFourth line\n"
289          with open("test_dir/multiline.txt", "w") as f:
290              f.write(test_content)
291  
292          result = runner.invoke(cli, ["test_dir"])
293          assert result.exit_code == 0
294          assert "1  First line" not in result.output
295          assert test_content in result.output
296  
297          result = runner.invoke(cli, ["test_dir", "-n"])
298          assert result.exit_code == 0
299          assert "1  First line" in result.output
300          assert "2  Second line" in result.output
301          assert "3  Third line" in result.output
302          assert "4  Fourth line" in result.output
303  
304          result = runner.invoke(cli, ["test_dir", "--line-numbers"])
305          assert result.exit_code == 0
306          assert "1  First line" in result.output
307          assert "2  Second line" in result.output
308          assert "3  Third line" in result.output
309          assert "4  Fourth line" in result.output
</document_content>
</document>
<document index="5">
<source>tests/test_files_to_prompt.py</source>
<document_content>
  1  import os
  2  import pytest
  3  
  4  from click.testing import CliRunner
  5  
  6  from files_to_prompt.cli import cli
  7  
  8  
  9  def test_basic_functionality(tmpdir):
 10      runner = CliRunner()
 11      with tmpdir.as_cwd():
 12          os.makedirs("test_dir")
 13          with open("test_dir/file1.txt", "w") as f:
 14              f.write("Contents of file1")
 15          with open("test_dir/file2.txt", "w") as f:
 16              f.write("Contents of file2")
 17  
 18          result = runner.invoke(cli, ["test_dir"])
 19          assert result.exit_code == 0
 20          assert "test_dir/file1.txt" in result.output
 21          assert "Contents of file1" in result.output
 22          assert "test_dir/file2.txt" in result.output
 23          assert "Contents of file2" in result.output
 24  
 25  
 26  def test_include_hidden(tmpdir):
 27      runner = CliRunner()
 28      with tmpdir.as_cwd():
 29          os.makedirs("test_dir")
 30          with open("test_dir/.hidden.txt", "w") as f:
 31              f.write("Contents of hidden file")
 32  
 33          result = runner.invoke(cli, ["test_dir"])
 34          assert result.exit_code == 0
 35          assert "test_dir/.hidden.txt" not in result.output
 36  
 37          result = runner.invoke(cli, ["test_dir", "--include-hidden"])
 38          assert result.exit_code == 0
 39          assert "test_dir/.hidden.txt" in result.output
 40          assert "Contents of hidden file" in result.output
 41  
 42  
 43  def test_ignore_gitignore(tmpdir):
 44      runner = CliRunner()
 45      with tmpdir.as_cwd():
 46          os.makedirs("test_dir")
 47          with open("test_dir/.gitignore", "w") as f:
 48              f.write("ignored.txt")
 49          with open("test_dir/ignored.txt", "w") as f:
 50              f.write("This file should be ignored")
 51          with open("test_dir/included.txt", "w") as f:
 52              f.write("This file should be included")
 53  
 54          result = runner.invoke(cli, ["test_dir"])
 55          assert result.exit_code == 0
 56          assert "test_dir/ignored.txt" not in result.output
 57          assert "test_dir/included.txt" in result.output
 58  
 59          result = runner.invoke(cli, ["test_dir", "--ignore-gitignore"])
 60          assert result.exit_code == 0
 61          assert "test_dir/ignored.txt" in result.output
 62          assert "This file should be ignored" in result.output
 63          assert "test_dir/included.txt" in result.output
 64  
 65  
 66  def test_multiple_paths(tmpdir):
 67      runner = CliRunner()
 68      with tmpdir.as_cwd():
 69          os.makedirs("test_dir1")
 70          with open("test_dir1/file1.txt", "w") as f:
 71              f.write("Contents of file1")
 72          os.makedirs("test_dir2")
 73          with open("test_dir2/file2.txt", "w") as f:
 74              f.write("Contents of file2")
 75          with open("single_file.txt", "w") as f:
 76              f.write("Contents of single file")
 77  
 78          result = runner.invoke(cli, ["test_dir1", "test_dir2", "single_file.txt"])
 79          assert result.exit_code == 0
 80          assert "test_dir1/file1.txt" in result.output
 81          assert "Contents of file1" in result.output
 82          assert "test_dir2/file2.txt" in result.output
 83          assert "Contents of file2" in result.output
 84          assert "single_file.txt" in result.output
 85          assert "Contents of single file" in result.output
 86  
 87  
 88  def test_ignore_patterns(tmpdir):
 89      runner = CliRunner()
 90      with tmpdir.as_cwd():
 91          os.makedirs("test_dir")
 92          with open("test_dir/file_to_ignore.txt", "w") as f:
 93              f.write("This file should be ignored due to ignore patterns")
 94          with open("test_dir/file_to_include.txt", "w") as f:
 95              f.write("This file should be included")
 96  
 97          result = runner.invoke(cli, ["test_dir", "--ignore", "*.txt"])
 98          assert result.exit_code == 0
 99          assert "test_dir/file_to_ignore.txt" not in result.output
100          assert "This file should be ignored due to ignore patterns" not in result.output
101          assert "test_dir/file_to_include.txt" not in result.output
102  
103          result = runner.invoke(cli, ["test_dir", "--ignore", "file_to_ignore.*"])
104          assert result.exit_code == 0
105          assert "test_dir/file_to_ignore.txt" not in result.output
106          assert "This file should be ignored due to ignore patterns" not in result.output
107          assert "test_dir/file_to_include.txt" in result.output
108          assert "This file should be included" in result.output
109  
110  
111  def test_specific_extensions(tmpdir):
112      runner = CliRunner()
113      with tmpdir.as_cwd():
114          # Write one.txt one.py two/two.txt two/two.py three.md
115          os.makedirs("test_dir/two")
116          with open("test_dir/one.txt", "w") as f:
117              f.write("This is one.txt")
118          with open("test_dir/one.py", "w") as f:
119              f.write("This is one.py")
120          with open("test_dir/two/two.txt", "w") as f:
121              f.write("This is two/two.txt")
122          with open("test_dir/two/two.py", "w") as f:
123              f.write("This is two/two.py")
124          with open("test_dir/three.md", "w") as f:
125              f.write("This is three.md")
126  
127          # Try with -e py -e md
128          result = runner.invoke(cli, ["test_dir", "-e", "py", "-e", "md"])
129          assert result.exit_code == 0
130          assert ".txt" not in result.output
131          assert "test_dir/one.py" in result.output
132          assert "test_dir/two/two.py" in result.output
133          assert "test_dir/three.md" in result.output
134  
135  
136  def test_mixed_paths_with_options(tmpdir):
137      runner = CliRunner()
138      with tmpdir.as_cwd():
139          os.makedirs("test_dir")
140          with open("test_dir/.gitignore", "w") as f:
141              f.write("ignored_in_gitignore.txt\n.hidden_ignored_in_gitignore.txt")
142          with open("test_dir/ignored_in_gitignore.txt", "w") as f:
143              f.write("This file should be ignored by .gitignore")
144          with open("test_dir/.hidden_ignored_in_gitignore.txt", "w") as f:
145              f.write("This hidden file should be ignored by .gitignore")
146          with open("test_dir/included.txt", "w") as f:
147              f.write("This file should be included")
148          with open("test_dir/.hidden_included.txt", "w") as f:
149              f.write("This hidden file should be included")
150          with open("single_file.txt", "w") as f:
151              f.write("Contents of single file")
152  
153          result = runner.invoke(cli, ["test_dir", "single_file.txt"])
154          assert result.exit_code == 0
155          assert "test_dir/ignored_in_gitignore.txt" not in result.output
156          assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
157          assert "test_dir/included.txt" in result.output
158          assert "test_dir/.hidden_included.txt" not in result.output
159          assert "single_file.txt" in result.output
160          assert "Contents of single file" in result.output
161  
162          result = runner.invoke(cli, ["test_dir", "single_file.txt", "--include-hidden"])
163          assert result.exit_code == 0
164          assert "test_dir/ignored_in_gitignore.txt" not in result.output
165          assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
166          assert "test_dir/included.txt" in result.output
167          assert "test_dir/.hidden_included.txt" in result.output
168          assert "single_file.txt" in result.output
169          assert "Contents of single file" in result.output
170  
171          result = runner.invoke(
172              cli, ["test_dir", "single_file.txt", "--ignore-gitignore"]
173          )
174          assert result.exit_code == 0
175          assert "test_dir/ignored_in_gitignore.txt" in result.output
176          assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
177          assert "test_dir/included.txt" in result.output
178          assert "test_dir/.hidden_included.txt" not in result.output
179          assert "single_file.txt" in result.output
180          assert "Contents of single file" in result.output
181  
182          result = runner.invoke(
183              cli,
184              ["test_dir", "single_file.txt", "--ignore-gitignore", "--include-hidden"],
185          )
186          assert result.exit_code == 0
187          assert "test_dir/ignored_in_gitignore.txt" in result.output
188          assert "test_dir/.hidden_ignored_in_gitignore.txt" in result.output
189          assert "test_dir/included.txt" in result.output
190          assert "test_dir/.hidden_included.txt" in result.output
191          assert "single_file.txt" in result.output
192          assert "Contents of single file" in result.output
193  
194  
195  def test_binary_file_warning(tmpdir):
196      runner = CliRunner(mix_stderr=False)
197      with tmpdir.as_cwd():
198          os.makedirs("test_dir")
199          with open("test_dir/binary_file.bin", "wb") as f:
200              f.write(b"\xff")
201          with open("test_dir/text_file.txt", "w") as f:
202              f.write("This is a text file")
203  
204          result = runner.invoke(cli, ["test_dir"])
205          assert result.exit_code == 0
206  
207          stdout = result.stdout
208          stderr = result.stderr
209  
210          assert "test_dir/text_file.txt" in stdout
211          assert "This is a text file" in stdout
212          assert "\ntest_dir/binary_file.bin" not in stdout
213          assert (
214              "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError"
215              in stderr
216          )
217  
218  
219  @pytest.mark.parametrize(
220      "args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"])
221  )
222  def test_xml_format_dir(tmpdir, args):
223      runner = CliRunner()
224      with tmpdir.as_cwd():
225          os.makedirs("test_dir")
226          with open("test_dir/file1.txt", "w") as f:
227              f.write("Contents of file1.txt")
228          with open("test_dir/file2.txt", "w") as f:
229              f.write("Contents of file2.txt")
230          result = runner.invoke(cli, args + ["--cxml"])
231          assert result.exit_code == 0
232          actual = result.output
233          expected = """
234  <documents>
235  <document index="1">
236  <source>test_dir/file1.txt</source>
237  <document_content>
238  Contents of file1.txt
239  </document_content>
240  </document>
241  <document index="2">
242  <source>test_dir/file2.txt</source>
243  <document_content>
244  Contents of file2.txt
245  </document_content>
246  </document>
247  </documents>
248  """
249          assert expected.strip() == actual.strip()
250  
251  
252  @pytest.mark.parametrize("arg", ("-o", "--output"))
253  def test_output_option(tmpdir, arg):
254      runner = CliRunner()
255      with tmpdir.as_cwd():
256          os.makedirs("test_dir")
257          with open("test_dir/file1.txt", "w") as f:
258              f.write("Contents of file1.txt")
259          with open("test_dir/file2.txt", "w") as f:
260              f.write("Contents of file2.txt")
261          output_file = "output.txt"
262          result = runner.invoke(
263              cli, ["test_dir", arg, output_file], catch_exceptions=False
264          )
265          assert result.exit_code == 0
266          assert not result.output
267          with open(output_file, "r") as f:
268              actual = f.read()
269          expected = """
270  test_dir/file1.txt
271  ---
272  Contents of file1.txt
273  
274  ---
275  test_dir/file2.txt
276  ---
277  Contents of file2.txt
278  
279  ---
280  """
281          assert expected.strip() == actual.strip()
282  
283  
284  def test_line_numbers(tmpdir):
285      runner = CliRunner()
286      with tmpdir.as_cwd():
287          os.makedirs("test_dir")
288          test_content = "First line\nSecond line\nThird line\nFourth line\n"
289          with open("test_dir/multiline.txt", "w") as f:
290              f.write(test_content)
291  
292          result = runner.invoke(cli, ["test_dir"])
293          assert result.exit_code == 0
294          assert "1  First line" not in result.output
295          assert test_content in result.output
296  
297          result = runner.invoke(cli, ["test_dir", "-n"])
298          assert result.exit_code == 0
299          assert "1  First line" in result.output
300          assert "2  Second line" in result.output
301          assert "3  Third line" in result.output
302          assert "4  Fourth line" in result.output
303  
304          result = runner.invoke(cli, ["test_dir", "--line-numbers"])
305          assert result.exit_code == 0
306          assert "1  First line" in result.output
307          assert "2  Second line" in result.output
308          assert "3  Third line" in result.output
309          assert "4  Fourth line" in result.output
</document_content>
</document>
</documents>

@simonw simonw merged commit bc05005 into simonw:main Feb 14, 2025
5 checks passed
simonw added a commit that referenced this pull request Feb 14, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants