-
Notifications
You must be signed in to change notification settings - Fork 1
/
code_collector.py
129 lines (111 loc) · 4.36 KB
/
code_collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
from typing import List, Any
import click
def should_ignore(filepath: str, ignore_paths: List[str]) -> bool:
"""Check if the file or directory should be ignored."""
basename = os.path.basename(filepath)
if any(
filepath.startswith(ignore_path.rstrip("/") + "/")
for ignore_path in ignore_paths
):
return True
if basename in ignore_paths:
return True
# Ignore hidden files and directories
if basename.startswith(".") and filepath not in ignore_paths:
return True
return False
def tree(
directory: str, prefix: str, ignore_paths: List[str], tree_lines: List[str]
) -> None:
"""A recursive function to generate the tree structure."""
tree_lines.append(f"{prefix}{os.path.basename(directory)}/")
prefix += " "
items = sorted(os.listdir(directory))
for index, item in enumerate(items):
path = os.path.join(directory, item)
if should_ignore(path, ignore_paths):
continue
connector = "└── " if index == len(items) - 1 else "├── "
if os.path.isdir(path):
tree_lines.append(f"{prefix}{connector}{item}/")
tree(
path,
prefix + (" " if connector == "└── " else "│ "),
ignore_paths,
tree_lines,
)
else:
tree_lines.append(f"{prefix}{connector}{item}")
def generate_tree_structure(paths_to_search: List[str], ignore_paths: List[str]) -> str:
"""Generate a tree structure of the project."""
tree_lines = []
for path in paths_to_search:
if os.path.isdir(path):
tree(path, "", ignore_paths, tree_lines)
elif os.path.isfile(path) and not should_ignore(path, ignore_paths):
tree_lines.append(f"{os.path.basename(path)}")
return "\n".join(tree_lines)
def process_file(filepath: str, outfile: Any) -> None:
"""Write the contents of the file to the output file."""
with open(filepath, "r", encoding="utf-8") as infile:
code = infile.read()
outfile.write(f"Filepath: {filepath}\n")
outfile.write(code)
outfile.write("\n================\n")
def process_directory(
directory: str, file_formats: List[str], ignore_paths: List[str], outfile: Any
) -> None:
"""Recursively process a directory, writing file contents to the output file."""
for root, dirs, files in os.walk(directory):
dirs[:] = [
d for d in dirs if not should_ignore(os.path.join(root, d), ignore_paths)
]
for file in files:
filepath = os.path.join(root, file)
if not should_ignore(filepath, ignore_paths):
if not file_formats or filepath.endswith(tuple(file_formats)):
process_file(filepath, outfile)
def read_code_files(
paths_to_search: List[str],
file_formats: List[str],
ignore_paths: List[str],
output_file: str,
) -> None:
"""Read code files and write their contents to the output file."""
with open(output_file, "w", encoding="utf-8") as outfile:
tree_structure = generate_tree_structure(paths_to_search, ignore_paths)
outfile.write("Project Structure:\n")
outfile.write(tree_structure)
outfile.write("\n\nCode Files:\n")
outfile.write("================\n")
for path in paths_to_search:
if os.path.isfile(path):
if not should_ignore(path, ignore_paths):
if not file_formats or path.endswith(tuple(file_formats)):
process_file(path, outfile)
elif os.path.isdir(path):
process_directory(path, file_formats, ignore_paths, outfile)
@click.command()
@click.argument("paths", nargs=-1, type=click.Path(exists=True))
@click.option(
"--formats",
"-f",
multiple=True,
help="File formats to include. If not specified, all files are included.",
)
@click.option(
"--ignore-paths",
"-i",
multiple=True,
type=click.Path(),
help="Paths (files or directories) to ignore.",
)
@click.option("--output", "-o", default="collected_code.txt", help="Output file name.")
def cli(
paths: List[str], formats: List[str], ignore_paths: List[str], output: str
) -> None:
"""Collect code from files and directories."""
read_code_files(paths, formats, ignore_paths, output)
if __name__ == "__main__":
cli()