-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
106 lines (85 loc) · 2.92 KB
/
Copy pathcli.py
File metadata and controls
106 lines (85 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import argparse
import json
import os
import sys
import db
def parse_args(argv: list[str]):
p = argparse.ArgumentParser(prog="job-scraper")
p.add_argument("--config", default=os.environ.get("CONFIG_DIR", "./config"))
p.add_argument("--data", default=os.environ.get("DATA_DIR", "./data"))
sub = p.add_subparsers(dest="cmd", required=True)
run = sub.add_parser("run")
run.add_argument(
"--provider",
choices=["groq", "anthropic", "gemini"],
default="groq",
)
run.add_argument("--api-key", default=os.environ.get("GROQ_API_KEY", ""))
run.add_argument("--groq-api-key", default=os.environ.get("GROQ_API_KEY", ""))
run.add_argument(
"--anthropic-api-key",
default=os.environ.get("ANTHROPIC_API_KEY", ""),
)
run.add_argument("--gemini-api-key", default=os.environ.get("GEMINI_API_KEY", ""))
run.add_argument("--lite", action="store_true")
export = sub.add_parser("export")
export.add_argument("--format", choices=["json", "csv"], default="json")
export.add_argument("--limit", type=int, default=200)
export.add_argument("--out", default="-")
return p.parse_args(argv)
def write_json(rows: list[dict], out_path: str):
payload = json.dumps(rows, ensure_ascii=False, indent=2)
if out_path == "-":
sys.stdout.write(payload)
sys.stdout.write("\n")
return
with open(out_path, "w", encoding="utf-8") as f:
f.write(payload)
f.write("\n")
def write_csv(rows: list[dict], out_path: str):
import csv
fieldnames = []
for r in rows:
for k in r.keys():
if k not in fieldnames:
fieldnames.append(k)
if out_path == "-":
w = csv.DictWriter(sys.stdout, fieldnames=fieldnames)
w.writeheader()
for r in rows:
w.writerow(r)
return
with open(out_path, "w", encoding="utf-8", newline="") as f:
w = csv.DictWriter(f, fieldnames=fieldnames)
w.writeheader()
for r in rows:
w.writerow(r)
def main(argv: list[str] | None = None):
args = parse_args(sys.argv[1:] if argv is None else argv)
os.environ["CONFIG_DIR"] = args.config
os.environ["DATA_DIR"] = args.data
if args.cmd == "run":
import scraper
keys = {
"groq": args.groq_api_key or args.api_key or "",
"anthropic": args.anthropic_api_key or "",
"gemini": args.gemini_api_key or "",
}
scraper.run_scrape(
args.api_key or "",
bool(args.lite),
provider=args.provider,
api_keys=keys,
)
return 0
if args.cmd == "export":
db.init()
rows = db.list_jobs(limit=args.limit)
if args.format == "json":
write_json(rows, args.out)
else:
write_csv(rows, args.out)
return 0
return 2
if __name__ == "__main__":
raise SystemExit(main())