forked from brandonrobertz/nicar2025-ai-py-crash-course
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_extraction.py
81 lines (64 loc) · 1.79 KB
/
run_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from datetime import datetime
import json
import sys
import ollama
from load_data import load_records
try:
INFILE=sys.argv[1]
except IndexError:
INFILE="data/IAPRO_UOF_2010-2020_Pgs.001-350_Requestor_Copy.json"
try:
PROMPT=sys.argv[2]
except IndexError:
PROMPT="prompts/police_files_extract_json.basic.txt"
try:
MODEL = sys.argv[3]
except IndexError:
MODEL = 'llama3.2'
OUTFILE=f"output-extract.{int(datetime.now().timestamp())}.json"
try:
ollama.chat(MODEL)
except ollama.ResponseError as e:
if e.status_code == 404:
print(f"Downloading model: {MODEL}")
ollama.pull(MODEL)
else:
raise(e)
print("Loading prompt", PROMPT)
with open(PROMPT, "r") as f:
prompt_base = f.read()
print("Using prompt template:\n-- BEGIN TEMPLATE --\n", prompt_base, "\n-- END TEMPLATE --")
extracted = []
records = load_records(INFILE, encoding='utf-8')
print("Loaded", len(records), "records")
for rec in records:
prompt = prompt_base.format(rec=rec)
response = ollama.chat(
model=MODEL,
options={
"temperature": 0.0,
},
messages=[
{
'role': 'user',
'content': prompt,
},
],
)
resp_text = response.message.content
print("Response:", resp_text)
extracted_rec = json.loads(resp_text.split(
"```json", 1
)[-1].split("```", 1)[0])
print("Extracted:", extracted_rec)
if isinstance(extracted_rec, list):
for er in extracted_rec:
extracted.append({
"record": rec,
"result": er
})
else:
extracted.append(extracted_rec)
print("Writing", len(extracted), "records to", OUTFILE)
with open(OUTFILE, "w") as f:
f.write(json.dumps(extracted, indent=2))