1
1
"""
2
2
this is code for forecasting
3
3
but i modified it and used it for safety checker of data
4
- for ex: you have a online shop and for some reason some data are
4
+ for ex: you have an online shop and for some reason some data are
5
5
missing (the amount of data that u expected are not supposed to be)
6
6
then we can use it
7
7
*ps : 1. ofc we can use normal statistic method but in this case
@@ -91,14 +91,14 @@ def interquartile_range_checker(train_user: list) -> float:
91
91
return low_lim
92
92
93
93
94
- def data_safety_checker (list_vote : list , actual_result : float ) -> None :
94
+ def data_safety_checker (list_vote : list , actual_result : float ) -> bool :
95
95
"""
96
96
Used to review all the votes (list result prediction)
97
97
and compare it to the actual result.
98
98
input : list of predictions
99
99
output : print whether it's safe or not
100
- >>> data_safety_checker([2,3, 4],5.0)
101
- Today's data is not safe.
100
+ >>> data_safety_checker([2, 3, 4], 5.0)
101
+ False
102
102
"""
103
103
safe = 0
104
104
not_safe = 0
@@ -107,50 +107,54 @@ def data_safety_checker(list_vote: list, actual_result: float) -> None:
107
107
safe = not_safe + 1
108
108
else :
109
109
if abs (abs (i ) - abs (actual_result )) <= 0.1 :
110
- safe = safe + 1
110
+ safe += 1
111
111
else :
112
- not_safe = not_safe + 1
113
- print ( f"Today's data is { 'not ' if safe <= not_safe else '' } safe." )
112
+ not_safe += 1
113
+ return safe > not_safe
114
114
115
115
116
- # data_input_df = pd.read_csv("ex_data.csv", header=None)
117
- data_input = [[18231 , 0.0 , 1 ], [22621 , 1.0 , 2 ], [15675 , 0.0 , 3 ], [23583 , 1.0 , 4 ]]
118
- data_input_df = pd .DataFrame (data_input , columns = ["total_user" , "total_even" , "days" ])
116
+ if __name__ == "__main__" :
117
+ # data_input_df = pd.read_csv("ex_data.csv", header=None)
118
+ data_input = [[18231 , 0.0 , 1 ], [22621 , 1.0 , 2 ], [15675 , 0.0 , 3 ], [23583 , 1.0 , 4 ]]
119
+ data_input_df = pd .DataFrame (
120
+ data_input , columns = ["total_user" , "total_even" , "days" ]
121
+ )
119
122
120
- """
121
- data column = total user in a day, how much online event held in one day,
122
- what day is that(sunday-saturday)
123
- """
123
+ """
124
+ data column = total user in a day, how much online event held in one day,
125
+ what day is that(sunday-saturday)
126
+ """
124
127
125
- # start normalization
126
- normalize_df = Normalizer ().fit_transform (data_input_df .values )
127
- # split data
128
- total_date = normalize_df [:, 2 ].tolist ()
129
- total_user = normalize_df [:, 0 ].tolist ()
130
- total_match = normalize_df [:, 1 ].tolist ()
131
-
132
- # for svr (input variable = total date and total match)
133
- x = normalize_df [:, [1 , 2 ]].tolist ()
134
- x_train = x [: len (x ) - 1 ]
135
- x_test = x [len (x ) - 1 :]
136
-
137
- # for linear reression & sarimax
138
- trn_date = total_date [: len (total_date ) - 1 ]
139
- trn_user = total_user [: len (total_user ) - 1 ]
140
- trn_match = total_match [: len (total_match ) - 1 ]
141
-
142
- tst_date = total_date [len (total_date ) - 1 :]
143
- tst_user = total_user [len (total_user ) - 1 :]
144
- tst_match = total_match [len (total_match ) - 1 :]
145
-
146
-
147
- # voting system with forecasting
148
- res_vote = []
149
- res_vote .append (
150
- linear_regression_prediction (trn_date , trn_user , trn_match , tst_date , tst_match )
151
- )
152
- res_vote .append (sarimax_predictor (trn_user , trn_match , tst_match ))
153
- res_vote .append (support_vector_regressor (x_train , x_test , trn_user ))
154
-
155
- # check the safety of todays'data^^
156
- data_safety_checker (res_vote , tst_user )
128
+ # start normalization
129
+ normalize_df = Normalizer ().fit_transform (data_input_df .values )
130
+ # split data
131
+ total_date = normalize_df [:, 2 ].tolist ()
132
+ total_user = normalize_df [:, 0 ].tolist ()
133
+ total_match = normalize_df [:, 1 ].tolist ()
134
+
135
+ # for svr (input variable = total date and total match)
136
+ x = normalize_df [:, [1 , 2 ]].tolist ()
137
+ x_train = x [: len (x ) - 1 ]
138
+ x_test = x [len (x ) - 1 :]
139
+
140
+ # for linear regression & sarimax
141
+ trn_date = total_date [: len (total_date ) - 1 ]
142
+ trn_user = total_user [: len (total_user ) - 1 ]
143
+ trn_match = total_match [: len (total_match ) - 1 ]
144
+
145
+ tst_date = total_date [len (total_date ) - 1 :]
146
+ tst_user = total_user [len (total_user ) - 1 :]
147
+ tst_match = total_match [len (total_match ) - 1 :]
148
+
149
+ # voting system with forecasting
150
+ res_vote = [
151
+ linear_regression_prediction (
152
+ trn_date , trn_user , trn_match , tst_date , tst_match
153
+ ),
154
+ sarimax_predictor (trn_user , trn_match , tst_match ),
155
+ support_vector_regressor (x_train , x_test , trn_user ),
156
+ ]
157
+
158
+ # check the safety of today's data
159
+ not_str = "" if data_safety_checker (res_vote , tst_user ) else "not "
160
+ print ("Today's data is {not_str}safe." )
0 commit comments