-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBAMBI.py
91 lines (67 loc) · 2.35 KB
/
BAMBI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 13 17:18:16 2015
@author: jeff
download the last few years and see which are trending. add that as an
additional value to dictionary
"""
### user setable variables ###
get = 100 # how many names do you want returned?
parent1_bday = 999999
parent2_bday = 999999
due_date = 999999
aatc = 999999 # address at time of conception
start_letters = [] # restrict names to those that start with these letters
sex = 'F' # F or M
### end user setable variables ###
import matplotlib
#matplotlib.use('PS')
import numpy as np
import matplotlib.pyplot as py
import math
import scipy.stats as sps
size = (parent1_bday + parent2_bday) / (due_date / aatc)
p = []
names = []
with open('yob2014.txt', 'r') as names_in:
for line in names_in:
line = line.rstrip()
line = line.split(',')
if line[1] == sex:
if len(start_letters) > 0:
if line[0][0] in start_letters:
n = float(line[2])
p.append(float(n))
names.append(line[0])
else:
n = float(line[2])
p.append(float(n))
names.append(line[0])
### transform the abundance values ###
geo_mean = sps.mstats.gmean(p)
print 'mean name abundance is', geo_mean
def calc_geo_sd(geo_mean, p):
p2 = []
for i in p:
p2.append(math.log(i / geo_mean) ** 2)
sum_p2 = sum(p2)
geo_sd = math.exp(math.sqrt(sum_p2 / len(p)))
return(geo_sd)
geo_sd = calc_geo_sd(geo_mean, p)
print 'the standard deviation of name abundance is', geo_sd
## get a gaussion distribution of mean = geo_mean and sd = geo_sd
## of length len(p)
dist_param = sps.norm(loc = geo_mean, scale = geo_sd)
dist = dist_param.rvs(size = sum(p))
## now get the probability of these values
print 'wait for it, generating name probabilities...'
temp_hist = py.hist(dist, bins = len(p))
probs = temp_hist[0]
probs = probs / sum(probs) # potentially max(probs)
### generate a pool of possible names ###
possible_names = np.random.choice(names, size = size, p = probs, replace = True)
final_names = np.random.choice(possible_names, size = get, replace = False)
with open('pick_your_kids_name.txt', 'w') as output:
for name in final_names:
print name
print >> output, name