-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRockland_Tests.py
180 lines (132 loc) · 5.01 KB
/
Rockland_Tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from Materia import *
# Create our datasets
# Performs automatic detection of column datatypes
# Sets datetime index automatically
# Stores data in column major format
DS = Dataset("data/rockland.csv", numHeaderLines=9)
# DS2 = Dataset("data/evergreen.csv")
# DS3 = Dataset("data/bigelow_soilMTP_2017.csv", numHeaderLines=2)
# Use existing metadata in header rows to generate unique column headers
DS.genHeadersFromMetadataRows((1,6))
# Load the flag codes assocaited with our DataSet
# Supports any flag code standard (IOOS, NOAA, etc.)
DS.flagcodes().are({"None":0,
"RV":"Repeat Value",
"MV": "Missing",
"CT": "Failed Complex Test"
"Hrdwr Rng": "Exceeds Hardware Specified Range",
"SI": "Incosistent (Spatial)",
"LI": "Inconsistent (Logical)",
"Spk": "Spike"})
# Pull multiple time series objects out of our DataSet objects
series_max = DS['Air temperature (2-meter) monitor_Maximum']
series_min = DS['Air temperature (2-meter) monitor_Minimum']
series_max_10 = DS['Air temperature (10-meter) monitor_Maximum']
# Define the expected time step for our time series
# Used to find missing values and align series which are between the same dates but may have different numbers of values
series_max.timestep((series_max.beginning(+1)) - series_max.beginning())
series_min.timestep((series_min.beginning(+1)) - series_min.beginning())
'''
----------- TEST DEFINTIONS ---------------
'''
'''
Repeat Value Test.
Checks if there is a flat line in our time series.
The "value" argument is required for a test defintion.
Abstracts out a loop over our time series and allows users
to specify tests in terms of any scalar value in our time series.
'''
def repeat_value_test(value):
n = 3
if not value.isnan():
if value == value.prior(n):
return True
return False
'''
Range test.
Checks if a value is in a user specified range.
'''
def range_test(value):
USR_MAX = 28
USR_MIN = -7.8
return (value < USR_MIN or value > USR_MAX)
'''
Spatial Inconsistency test.
Checks if two datapoints between two spatially similar series are sufficently similar within a certian threshold.
"i" is an optional argument which gives the current index of value
'''
def spatial_inconsistency(value, i):
comp_val = series_max_10.value().at(value)
diff = value - comp_val
avg = (value + comp_val) / 2
threshold_p = 75
exceeds_threshold = abs(diff / avg) * 100.0 > threshold_p
if exceeds_threshold:
return True
return False
'''
Logical inconsistency.
Checks if two values are logically inconsistent. Eg. is a max measurement less than a min measurement.
'''
def logical_inconsistency(max_value):
min_val = series_min.value().at(max_value)
if min_val > max_value:
return True
return False
def test(value, i):
p_5 = value.prior(5)
min_val = series_min.value().at(value)
m_p_5 = min_val.prior(5)
cmp = ((p_5 * (m_p_5 - (value + .5)))/2) + min_val
if value < cmp:
return True
return False
'''
Slope test.
Checks if the slope of a current value and it's value at t-1 is signficantly different than the slope between t-1 and t-n.
'''
def slope_test(value, i):
p_a = value.prior(2)[0]
p_b = value.prior(2)[1]
p_c = value
priorslp = 0.0
if p_a.isScalar() and p_b.isScalar():
# x values
x1 = p_a
x2 = p_b
# y values
y1 = p_a.intIndex()
y2 = p_b.intIndex()
# current slope
priorslp = (y2-y1)/(x2-x1)
# x ad y values for next point
x3 = p_c
y3 = p_c.intIndex()
# next slope
nextslp = (y3-y2)/(x3-x2)
if (abs(nextslp) < .1 #very sharp slope
and abs(nextslp) < abs(priorslp*.01) #big difference between the two slopes
and abs(nextslp) != float("inf") #slope is not a flat line
and abs(priorslp) != float("inf")): #slope is not a flat line
return True
return False
# checks for missing values
series_max.datapoint().flag('Missing Value').missingValueTest(-9999)
# appends flag to our time series at time t when tests return true
series_max.datapoint().flag("CT").when(test)
#
# series_max.datapoint().flag("Repeat Value").when(rv_test)
# series_max.datapoint().flag("Outlier").when(range_test).when( lambda value: (value < -100 or value > 100) )
# series_max.datapoint().flag("Spatial Inconsistency").when(spatial_inconsistency)
# series_max.datapoint().flag("Logical Inconsistency").when(logical_inconsistency)
# series_max.datapoint().flag("Spike").when(slope_test)
# writing out results to csv
with open('rockland_flags.csv', 'w') as f:
f.write('{},{},{}\n'.format("index", series_max._header, "{}_flags".format(series_max._header)))
for i, dt in enumerate(series_max._index):
f.write('{},{},{}\n'.format(dt, series_max._values[i], series_max._flags[i]))
with open('rockland_tests.csv', 'w') as f:
for t in series_max._testHist:
f.write('{}-------\n'.format(t[0]))
f.write('\n{}\n'.format(t[1]))
f.write('\n')