1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
1
5
from typing import List , Union
2
6
3
7
import pandas as pd
@@ -12,9 +16,6 @@ def __init__(
12
16
* ,
13
17
fill_na_method : str = FillNAMethod .Linear .name ,
14
18
fill_na_value : float = 0.0 ,
15
- window : Union [int , float , str ],
16
- start_time : str = None ,
17
- end_time : str = None ,
18
19
):
19
20
if not hasattr (FillNAMethod , fill_na_method ):
20
21
raise InvalidParameterError (
@@ -25,32 +26,14 @@ def __init__(
25
26
26
27
self .fill_na_method = FillNAMethod [fill_na_method ]
27
28
self .fill_na_value = fill_na_value
28
- try :
29
- self .window = int (window )
30
- except TypeError :
31
- raise InvalidParameterError (f"Cannot convert window to int." )
32
- try :
33
- self .start_time = pd .to_datetime (start_time ).tz_localize (None )
34
- self .end_time = pd .to_datetime (end_time ).tz_localize (None )
35
- except Exception as e :
36
- raise InvalidParameterError (
37
- f"Cannot convert start_time or end_time. { str (e )} ."
38
- )
39
- if self .start_time > self .end_time :
40
- raise InvalidParameterError (f"start_time cannot be later than end_time." )
41
29
42
30
def process (self , data : pd .DataFrame ):
43
31
if not isinstance (data , pd .DataFrame ):
44
32
raise DataFormatError (f"data must be a pandas.DataFrame" )
45
- if TIMESTAMP not in data .columns :
46
- raise DataFormatError (f"data must has a { TIMESTAMP } column." )
47
- data = data .set_index (TIMESTAMP , drop = True ).sort_index () # sort indices
48
- data .index = pd .to_datetime (data .index ).tz_localize (None )
33
+ data = data .sort_index () # sort indices
49
34
data = data [sorted (data .columns )] # sort columns
50
35
data = self .fill_na (data )
51
- data , effective_timestamps = self .truncate_data (data )
52
- effective_timestamps = [dt_to_str (x ) for x in effective_timestamps ]
53
- return data , effective_timestamps
36
+ return data
54
37
55
38
def fill_na (self , data : pd .DataFrame ):
56
39
if not isinstance (data , pd .DataFrame ):
@@ -72,20 +55,7 @@ def fill_na(self, data: pd.DataFrame):
72
55
method = "linear" , limit_direction = "both" , axis = 0 , limit = len (data )
73
56
)
74
57
elif self .fill_na_method == FillNAMethod .Fixed :
75
- output_series = data .fillna (self .fill_merge_na_value )
58
+ output_series = data .fillna (self .fill_na_value )
76
59
else :
77
60
output_series = data
78
61
return output_series .fillna (0 )
79
-
80
- def truncate_data (self , data : pd .DataFrame ):
81
- if not isinstance (data , pd .DataFrame ):
82
- raise DataFormatError (f"data must be a pandas.DataFrame" )
83
- effective_df = data .loc [self .start_time : self .end_time ]
84
- if len (effective_df ) == 0 :
85
- raise DataFormatError (f"no effective data." )
86
- first_index = effective_df .index [0 ]
87
- start_loc = max (0 , data .index .get_loc (first_index ) - self .window + 1 )
88
- start_index = data .index [start_loc ]
89
- end_index = self .end_time
90
- data = data .loc [start_index :end_index ]
91
- return data , effective_df .index .to_list ()
0 commit comments