replace files-to-prompt with git ls-files for bloat metrics

files-to-prompt was including untracked files (knowledge/, dev scripts, etc.) which inflated the bloat metrics. now we use git ls-files to only count tracked source files, which is more accurate and removes an external dependency.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Andrej Karpathy
2026-01-04 01:38:15 +00:00
parent da8b7ea4cb
commit ee79f29fbd
3 changed files with 20 additions and 21 deletions
+20 -6
View File
@@ -160,12 +160,26 @@ Generated: {timestamp}
"""
# bloat metrics: package all of the source code and assess its weight
packaged = run_command('files-to-prompt . -e py -e md -e rs -e html -e toml -e sh --ignore "*target*" --cxml')
num_chars = len(packaged)
num_lines = len(packaged.split('\n'))
num_files = len([x for x in packaged.split('\n') if x.startswith('<source>')])
num_tokens = num_chars // 4 # assume approximately 4 chars per token
# bloat metrics: count lines/chars in git-tracked source files only
extensions = ['py', 'md', 'rs', 'html', 'toml', 'sh']
git_patterns = ' '.join(f"'*.{ext}'" for ext in extensions)
files_output = run_command(f"git ls-files -- {git_patterns}")
file_list = [f for f in (files_output or '').split('\n') if f]
num_files = len(file_list)
num_lines = 0
num_chars = 0
if num_files > 0:
wc_output = run_command(f"git ls-files -- {git_patterns} | xargs wc -lc 2>/dev/null")
if wc_output:
total_line = wc_output.strip().split('\n')[-1]
parts = total_line.split()
if 'total' in parts:
num_lines = int(parts[0])
num_chars = int(parts[1])
elif len(parts) >= 2:
num_lines = int(parts[0])
num_chars = int(parts[1])
num_tokens = num_chars // 4 # assume approximately 4 chars per token
# count dependencies via uv.lock
uv_lock_lines = 0