-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstory.py
78 lines (67 loc) · 3.32 KB
/
story.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import yaml
import datetime
import re
default_match_count_limit = int(1e6)
# for local usage from db (single yaml file)
# * intro_text: string. if set, text to display at start of session (optional)
# * intro_stats: bool. if set, display some stats about the story at start of session (optional)
# * match_count_limit: int. if set, maximum number of entries that can be returned by a search (optional)
# * initial_search: string. if set, start the session with an already executed serach for this term (optional)
# * entries: sequence of mappings w/ keys. this is the meat of the data
# * id: string. if not set, id will be set to entry's position index in list of entries (optional)
# * date: date|datetime. date for the entry. matches will be returned in order of date ascending
# * text: string. the content of the entry to be displayed
def get_story(file_name):
with open(file_name, 'r') as file:
story = yaml.safe_load(file)
# defaults
if 'match_count_limit' not in story or story['match_count_limit'] == 0:
story['match_count_limit'] = default_match_count_limit
# populate entry ids if not set
for idx, entry in enumerate(story['entries']):
if 'id' not in entry:
entry['id'] = str(idx)
# ensure entries sorted for downstream users
story['entries'] = sorted(story['entries'], key=lambda x: x['date'])
return story
# follow "her story" logic
def search_entries(entries, search_term):
# an entry matches search if all tokenized alphanumeric parts exist in entry
# no substring matches of parts are allowed
# examples:
# search "one one" matches "one" (each part of term is searched indpendently)
# search "one two" matches "three two one" (order does not mater)
# search "haven't" matches "haven couldn't" (punctuation splits tokens)
# search "a" does not match "apple" (no partial matches)
# search "cars" matches "car" (unpluralization)
# search "car" matches "cars" (unpluralization)
# search "s" matches "hello" ("s" gets unpluralized into empty list)
def matches_entry(entry, search_term):
entry_parts = tokenize_text(entry['text'])
search_term_parts = tokenize_text(search_term)
for p in search_term_parts:
if p not in entry_parts:
return False
return True
results = [entry for entry in entries if matches_entry(entry, search_term)]
return sorted(results, key=lambda x: x['date'])
# follow "her story" logic
# naive convesion of string into non-plural, alphanumeric parts
def tokenize_text(txt):
ts = [unpluralize(t) for t in re.split('[^a-zA-Z]', txt.lower())]
return [t for t in ts if t != '']
# follow "her story" logic
# naive convesion of string into non-plural parts
def unpluralize(w):
if w.endswith("ss"):
return w
elif w.endswith("es"):
return w[:len(w)-2]
if w.endswith("s"):
return w[:len(w)-1]
return w
def get_short_date_and_text(entry):
date = entry['date'].strftime("%m/%d/%Y")
if type(entry['date']) is datetime.datetime:
date = entry['date'].strftime("%m/%d/%Y, %H:%M:%S")
return date + ": " + (entry['text'][:75] + '..' if len(entry['text']) > 75 else entry['text'])