Skip to content

Commit 029d3df

Browse files
Merge pull request #2 from andreimerlescu/develop
Added python related nonsense to be ignored when summarizing stuff
2 parents 3a6f819 + a6b893f commit 029d3df

File tree

8 files changed

+829
-170
lines changed

8 files changed

+829
-170
lines changed

.github/workflows/test-summarize.yml

Lines changed: 3 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
matrix:
1818
os: [Ubuntu-latest, macOS-latest]
19-
go-version: ['1.23.8', '1.24.2']
19+
go-version: ['1.24.0']
2020
fail-fast: false # Continue testing all combinations even if one fails
2121

2222
steps:
@@ -47,13 +47,12 @@ jobs:
4747
run: |
4848
mkdir -p ${{ github.workspace }}/anotherProject
4949
echo -e "package main\n\nfunc main() {\n println(\"Hello, World!\")\n}" > ${{ github.workspace }}/anotherProject/hello.go
50-
mkdir -p ${{ github.workspace }}/summaries
5150
shell: bash
5251

5352
- name: Test 1 Step 5 Run summarize with command-line arguments
5453
run: |
55-
cd ${{ github.workspace }}/anotherProject
56-
${{ github.workspace }}/summarize -d . -o ${{ github.workspace }}/summaries
54+
cd ${{ github.workspace }}
55+
${{ github.workspace }}/summarize -d anotherProject -o ${{ github.workspace }}/summaries
5756
ls -lh ${{ github.workspace }}/summaries/
5857
shell: bash
5958

@@ -90,49 +89,3 @@ jobs:
9089
echo "Contents of $SUMMARY_FILE:"
9190
cat "$SUMMARY_FILE"
9291
shell: bash
93-
94-
# Step 9: Verify the summary contains the hello.go source code
95-
- name: Test 1 Step 9 Verify summary contains hello.go source code (command-line usage)
96-
run: |
97-
SUMMARY_FILE="${{ steps.find-summary-cli.outputs.summary_file }}"
98-
RANDOM_FILE="hello.go"
99-
RANDOM_FILE_ABS="${{ github.workspace }}/anotherProject/hello.go"
100-
echo "Checking if $SUMMARY_FILE contains the source code of $RANDOM_FILE"
101-
102-
SECTION_START=$(grep -n "^## $RANDOM_FILE$" "$SUMMARY_FILE" | cut -d: -f1)
103-
if [ -z "$SECTION_START" ]; then
104-
echo "Error: Could not find section for $RANDOM_FILE in $SUMMARY_FILE"
105-
echo "Listing all section headers in $SUMMARY_FILE:"
106-
grep "^## " "$SUMMARY_FILE"
107-
exit 1
108-
fi
109-
110-
CODE_START=$((SECTION_START + 3))
111-
CODE_END=$(tail -n +$CODE_START "$SUMMARY_FILE" | grep -n "^\`\`\`$" | head -n 1 | cut -d: -f1 || true)
112-
if [ -z "$CODE_END" ] || [ "$CODE_END" -eq 0 ]; then
113-
echo "Error: Could not find code block end for $RANDOM_FILE in $SUMMARY_FILE"
114-
echo "Dumping lines after section start for debugging (up to 20 lines):"
115-
tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20
116-
exit 1
117-
fi
118-
CODE_LINES=$((CODE_END - 1))
119-
if [ $CODE_LINES -le 0 ]; then
120-
echo "Error: Invalid code block length ($CODE_LINES lines) for $RANDOM_FILE"
121-
echo "Dumping lines after section start for debugging (up to 20 lines):"
122-
tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20
123-
exit 1
124-
fi
125-
tail -n +$CODE_START "$SUMMARY_FILE" > temp_code_block.txt
126-
head -n $CODE_LINES temp_code_block.txt > extracted_code.txt
127-
rm temp_code_block.txt
128-
129-
cat "$RANDOM_FILE_ABS" > original_code.txt
130-
131-
diff -wB extracted_code.txt original_code.txt > diff_output.txt
132-
if [ $? -ne 0 ]; then
133-
echo "Error: The source code in the summary does not match the original file"
134-
cat diff_output.txt
135-
exit 1
136-
fi
137-
echo "Success: The source code of $RANDOM_FILE in $SUMMARY_FILE matches the original file"
138-
shell: bash

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ summarize
22
.idea
33
.DS_Store
44
*.log
5-
summaries/
5+
summaries/
6+
bin/

Makefile

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Generic Makefile for Any Go Project (Lines 1-65)
2+
MAIN_PATH=.
3+
APP_NAME := $(shell basename "$(shell realpath $(MAIN_PATH))")
4+
BIN_DIR=bin
5+
6+
# Go build flags
7+
# -s: Strip symbols (reduces binary size)
8+
# -w: Omit DWARF debugging information
9+
LDFLAGS=-ldflags "-s -w"
10+
11+
.PHONY: all clean summary install darwin-amd64 darwin-amd64 linux-amd64 linux-arm64 windows-amd64
12+
13+
# Create build directory if it doesn't exist
14+
$(BIN_DIR):
15+
@mkdir -p $(BIN_DIR)
16+
17+
# Build for all platforms
18+
all: darwin-amd64 darwin-arm64 linux-amd64 linux-arm64 windows-amd64 install
19+
20+
summary:
21+
@if ! command -v summarize > /dev/null; then \
22+
go install github.com/andreimerlescu/summarize@latest; \
23+
fi
24+
@summarize -i "go,Makefile,mod" -debug -print
25+
26+
install: $(BIN_DIR)
27+
@if [[ "$(shell go env GOOS)" == "windows" ]]; then \
28+
cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH).exe "$(shell go env GOBIN)/$(APP_NAME).exe"; \
29+
else \
30+
cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH) "$(shell go env GOBIN)/$(APP_NAME)"; \
31+
fi
32+
@echo "NEW: $(shell which $(APP_NAME))"
33+
34+
# Build for macOS Intel (amd64)
35+
darwin-amd64: $(BIN_DIR)
36+
@GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-amd64 $(MAIN_PATH)
37+
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64"
38+
39+
# Build for macOS Silicon (arm64)
40+
darwin-arm64: $(BIN_DIR)
41+
@GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-arm64 $(MAIN_PATH)
42+
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64"
43+
44+
# Build for Linux ARM64
45+
linux-arm64: $(BIN_DIR)
46+
@GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-arm64 $(MAIN_PATH)
47+
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-arm64"
48+
49+
# Build for Linux AMD64
50+
linux-amd64: $(BIN_DIR)
51+
@GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-amd64 $(MAIN_PATH)
52+
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-linux-amd64"
53+
54+
# Build for Windows AMD64
55+
windows-amd64: $(BIN_DIR)
56+
@GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME).exe $(MAIN_PATH)
57+
@echo "NEW: $(BIN_DIR)/$(APP_NAME).exe"
58+
59+
# Clean build artifacts
60+
clean:
61+
@rm -rf $(BIN_DIR)
62+
@echo "REMOVED: $(BIN_DIR)"
63+
64+
# Project Specific
65+
66+
.PHONY: test
67+
68+
# Run tests
69+
test:
70+
./test.sh

README.md

Lines changed: 174 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,55 @@
11
# Summarize
22

3-
A go utility that will capture files with an extension pattern into a single markdown formatted
4-
file that looks like:
3+
The **Summarize** package was designed for developers who wish to leverage the use of Artificial Intelligence while
4+
working on a project. The `summarize` command give you a powerful interface that is managed by arguments and environment
5+
variables that define include/exclude extensions, and avoid substrings list while parsing paths. The binary has
6+
concurrency built into it and has limits for the output file. It ignores its default output directory so it won't
7+
recursively build summaries upon itself. It defaults to writing to a new directory that it'll try to create in the
8+
current working directory called `summaries`, that I recommend that you add to your `.gitignore` and `.dockerignore`.
9+
10+
I've found it useful to leverage the `make summary` command in all of my projects. This way, if I need to ask an AI a
11+
question about a piece of code, I can capture the source code of the entire directory quickly and then just `cat` the
12+
output file path provided and _voila_! The `-print` argument allows you to display the summary contents in the STDOUT
13+
instead of the `Summary generated: summaries/summary.2025.07.29.08.59.03.UTC.md` that it would normally generate.
14+
15+
The **Environment** can be used to control the native behavior of the `summarize` binary, such that you won't be required
16+
to type the arguments out each time. If you use _JSON_ all the time, you can enable its output format on every command
17+
by using the `SUMMARIZE_ALWAYS_JSON`. If you always want to write the summary, you can use the `SUMMARIZE_ALWAYS_WRITE`
18+
variable. If you want to always print the summary to STDOUT instead of the success message, you can use the variable
19+
`SUMMARIZE_ALWAYS_PRINT`. If you want to compress the rendered summary every time, you can use the variable
20+
`SUMMARIZE_ALWAYS_COMPRESS`. These `SUMMARIZE_ALWAYS_*` environment variables are responsible for customizing the
21+
runtime of the `summarize` application.
22+
23+
When the `summarize` binary runs, it'll do its best to ignore files that it can't render to a text file. This includes
24+
images, videos, binary files, and text files that are commonly linked to secrets.
25+
26+
The developer experience while using `summarize` is designed to enable quick use with just running `summarize` from
27+
where ever you wish to summarize. The `-d` for **source directory** defaults to `.` and the `-o`/`-f` for **output path**
28+
defaults to a new timestamped file (`-f`) in the (`-o`) `summaries/` directory from the `.` context. The `-i` and `-x` are used to
29+
define what to <b>i</b>nclude and e<b>x</b>clude various file extensions like `go,ts,py` etc.. The `-s` is used to
30+
**skip** over substrings within a scanned path. Dotfiles can completely be ignored by all paths by using `-ndf` as a flag.
31+
32+
Performance of the application can be tuned using the `-mf=<int>` to assign **Max Files** that will concurrently be
33+
processed. The default is 369. The `-max=<int64>` represents a limit on how large the rendered summary can become.
34+
35+
Once the program finishes running, the rendered file will look similar to:
536

637
```md
738
# Project Summary
839

9-
### `filename.ext`
40+
<AI prompt description>
41+
42+
### `filename.go`
43+
44+
<File Info>
1045

1146
<full source code>
1247

13-
### `filename.ext`
48+
### `filename.cs`
49+
50+
<File Info>
51+
52+
<full source code>
1453

1554
... etc.
1655

@@ -49,19 +88,139 @@ cd ~/work/anotherProject
4988
summarize -d anotherProject -o /home/user/summaries/anotherProject
5089
```
5190

52-
Since `figtree` is designed to be very functional, its lightweight but feature
53-
intense design through simple biology memetics makes it well suited for this program.
54-
5591
## Options
5692

57-
| Name | Argument | Type | Usage |
58-
|-----------------|----------|----------|--------------------------------------------------------|
59-
| `kSourceDir` | -d` | `string` | Source directory path. |
60-
| `kOutputDir` | -o` | `string` | Summary destination output directory path. |
61-
| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. |
62-
| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. |
63-
| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. |
64-
| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). |
93+
| Name | Argument | Type | Usage |
94+
|------------------|----------|----------|-------------------------------------------------------------------|
95+
| `kSourceDir` | `-d` | `string` | Source directory path. |
96+
| `kOutputDir` | `-o` | `string` | Summary destination output directory path. |
97+
| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. |
98+
| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. |
99+
| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. |
100+
| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). |
101+
| `kVersion` | `-v` | `bool` | When `true`, the binary version is shown |
102+
| `kCompress` | `-gz` | `bool` | When `true`, **gzip** is used on the contents of the summary |
103+
| `kMaxOutputSize` | `-max` | `int64` | Maximum size of the generated summary allowed |
104+
| `kPrint` | `-print` | `bool` | Uses STDOUT to write contents of summary |
105+
| `kWrite` | `-write` | `bool` | Uses the filesystem to save contents of summary |
106+
| `kDebug` | `-debug` | `bool` | When `true`, extra content is written to STDOUT aside from report |
107+
108+
109+
## Environment
110+
111+
| Environment Variable | Type | Default Value | Usage |
112+
|-----------------------------|----------|------------------------|-------------------------------------------------------------------------------------------------------------|
113+
| `SUMMARIZE_CONFIG_FILE` | `String` | `./config.yaml` | Contents of the YAML Configuration to use for [figtree](https://github.com/andreimerlescu/figtree). |
114+
| `SUMMARIZE_IGNORE_CONTAINS` | `List` | \* see below | Add items to this default list by creating your own new list here, they get concatenated. |
115+
| `SUMMARIZE_INCLUDE_EXT` | `List` | \*\* see below \* | Add extensions to include in the summary in this environment variable, comma separated. |
116+
| `SUMMARIZE_EXCLUDE_EXT` | `List` | \*\*\* see below \* \* | Add exclusionary extensions to ignore to this environment variable, comma separated. |
117+
| `SUMMARIZE_ALWAYS_PRINT` | `Bool` | `false` | When `true`, the `-print` will write the summary to STDOUT. |
118+
| `SUMMARIZE_ALWAYS_WRITE` | `Bool` | `false` | When `true`, the `-write` will write to a new file on the disk. |
119+
| `SUMMARIZE_ALWAYS_JSON` | `Bool` | `false` | When `true`, the `-json` flag will render JSON output to the console. |
120+
| `SUMMARIZE_ALWAYS_COMPRESS` | `Bool` | `false` | When `true`, the `-gz` flag will use gzip to compress the summary contents and appends `.gz` to the output. |
121+
122+
123+
### \* Default `SUMMARIZE_IGNORE_CONTAINS` Value
124+
125+
```json
126+
7z,gz,xz,zst,zstd,bz,bz2,bzip2,zip,tar,rar,lz4,lzma,cab,arj,crt,cert,cer,key,pub,asc,pem,p12,pfx,jks,keystore,id_rsa,id_dsa,id_ed25519,id_ecdsa,gpg,pgp,exe,dll,so,dylib,bin,out,o,obj,a,lib,dSYM,class,pyc,pyo,__pycache__,jar,war,ear,apk,ipa,dex,odex,wasm,node,beam,elc,iso,img,dmg,vhd,vdi,vmdk,qcow2,db,sqlite,sqlite3,db3,mdb,accdb,sdf,ldb,log,trace,dump,crash,jpg,jpeg,png,gif,bmp,tiff,tif,webp,ico,svg,heic,heif,raw,cr2,nef,dng,mp3,wav,flac,aac,ogg,wma,m4a,opus,aiff,mp4,avi,mov,mkv,webm,flv,wmv,m4v,3gp,ogv,ttf,otf,woff,woff2,eot,fon,pfb,pfm,pdf,doc,docx,xls,xlsx,ppt,pptx,odt,ods,odp,rtf,suo,sln,user,ncb,pdb,ipch,ilk,tlog,idb,aps,res,iml,idea,vscode,project,classpath,factorypath,prefs,vcxproj,vcproj,filters,xcworkspace,xcuserstate,xcscheme,pbxproj,DS_Store,Thumbs.db,desktop.ini,lock,sum,resolved,tmp,temp,swp,swo,bak,backup,orig,rej,patch,~,old,new,part,incomplete,map,min.js,min.css,bundle.js,bundle.css,chunk.js,dat,data,cache,pid,sock,pack,idx,rev,pickle,pkl,npy,npz,mat,rdata,rds
127+
```
128+
129+
```go
130+
131+
// defaultExclude are the -exc list of extensions that will be skipped automatically
132+
defaultExclude = []string{
133+
// Compressed archives
134+
"7z", "gz", "xz", "zst", "zstd", "bz", "bz2", "bzip2", "zip", "tar", "rar", "lz4", "lzma", "cab", "arj",
135+
136+
// Encryption, certificates, and sensitive keys
137+
"crt", "cert", "cer", "key", "pub", "asc", "pem", "p12", "pfx", "jks", "keystore",
138+
"id_rsa", "id_dsa", "id_ed25519", "id_ecdsa", "gpg", "pgp",
139+
140+
// Binary & executable artifacts
141+
"exe", "dll", "so", "dylib", "bin", "out", "o", "obj", "a", "lib", "dSYM",
142+
"class", "pyc", "pyo", "__pycache__",
143+
"jar", "war", "ear", "apk", "ipa", "dex", "odex",
144+
"wasm", "node", "beam", "elc",
145+
146+
// System and disk images
147+
"iso", "img", "dmg", "vhd", "vdi", "vmdk", "qcow2",
148+
149+
// Database files
150+
"db", "sqlite", "sqlite3", "db3", "mdb", "accdb", "sdf", "ldb",
151+
152+
// Log files
153+
"log", "trace", "dump", "crash",
154+
155+
// Media files - Images
156+
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "tif", "webp", "ico", "svg", "heic", "heif", "raw", "cr2", "nef", "dng",
157+
158+
// Media files - Audio
159+
"mp3", "wav", "flac", "aac", "ogg", "wma", "m4a", "opus", "aiff",
160+
161+
// Media files - Video
162+
"mp4", "avi", "mov", "mkv", "webm", "flv", "wmv", "m4v", "3gp", "ogv",
163+
164+
// Font files
165+
"ttf", "otf", "woff", "woff2", "eot", "fon", "pfb", "pfm",
166+
167+
// Document formats (typically not source code)
168+
"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "rtf",
169+
170+
// IDE/Editor/Tooling artifacts
171+
"suo", "sln", "user", "ncb", "pdb", "ipch", "ilk", "tlog", "idb", "aps", "res",
172+
"iml", "idea", "vscode", "project", "classpath", "factorypath", "prefs",
173+
"vcxproj", "vcproj", "filters", "xcworkspace", "xcuserstate", "xcscheme", "pbxproj",
174+
"DS_Store", "Thumbs.db", "desktop.ini",
175+
176+
// Package manager and build artifacts
177+
"lock", "sum", "resolved", // package-lock.json, go.sum, yarn.lock, etc.
178+
179+
// Temporary and backup files
180+
"tmp", "temp", "swp", "swo", "bak", "backup", "orig", "rej", "patch",
181+
"~", "old", "new", "part", "incomplete",
182+
183+
// Source maps and minified files (usually generated)
184+
"map", "min.js", "min.css", "bundle.js", "bundle.css", "chunk.js",
185+
186+
// Configuration that's typically binary or generated
187+
"dat", "data", "cache", "pid", "sock",
188+
189+
// Version control artifacts (though usually in ignored directories)
190+
"pack", "idx", "rev",
191+
192+
// Other binary formats
193+
"pickle", "pkl", "npy", "npz", "mat", "rdata", "rds",
194+
}
195+
196+
```
197+
198+
### \* \* Default `SUMMARIZE_INCLUDE_EXT`
199+
200+
```json
201+
go,ts,tf,sh,py,js,Makefile,mod,Dockerfile,dockerignore,gitignore,esconfigs,md
202+
```
203+
204+
```go
205+
// defaultInclude are the -inc list of extensions that will be included in the summary
206+
defaultInclude = []string{
207+
"go", "ts", "tf", "sh", "py", "js", "Makefile", "mod", "Dockerfile", "dockerignore", "gitignore", "esconfigs", "md",
208+
}
209+
```
210+
211+
### \* \* \* Default `SUMMARIZE_EXCLUDE_EXT`
212+
213+
```json
214+
.min.js,.min.css,.git/,.svn/,.vscode/,.vs/,.idea/,logs/,secrets/,.venv/,/site-packages,.terraform/,summaries/,node_modules/,/tmp,tmp/,logs/
215+
```
216+
217+
```go
218+
// defaultAvoid are the -avoid list of substrings in file path names to avoid in the summary
219+
defaultAvoid = []string{
220+
".min.js", ".min.css", ".git/", ".svn/", ".vscode/", ".vs/", ".idea/", "logs/", "secrets/",
221+
".venv/", "/site-packages", ".terraform/", "summaries/", "node_modules/", "/tmp", "tmp/", "logs/",
222+
}
223+
```
65224

66225
## Contribution
67226

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
v1.0.0
1+
v1.0.2

go.mod

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
module github.com/andreimerlescu/summarize
22

3-
go 1.23.7
3+
go 1.24.5
44

55
require (
6-
github.com/andreimerlescu/checkfs v1.0.2
7-
github.com/andreimerlescu/figtree/v2 v2.0.3
6+
github.com/andreimerlescu/checkfs v1.0.4
7+
github.com/andreimerlescu/figtree/v2 v2.0.14
88
github.com/andreimerlescu/sema v1.0.0
99
)
1010

1111
require (
12+
github.com/andreimerlescu/bump v1.0.3 // indirect
1213
github.com/go-ini/ini v1.67.0 // indirect
13-
golang.org/x/sys v0.31.0 // indirect
14-
golang.org/x/term v0.30.0 // indirect
14+
golang.org/x/sys v0.33.0 // indirect
15+
golang.org/x/term v0.32.0 // indirect
1516
gopkg.in/yaml.v3 v3.0.1 // indirect
1617
)

0 commit comments

Comments
 (0)