Skip to content

Latest commit

 

History

History
308 lines (219 loc) · 8.65 KB

19-aggregration.md

File metadata and controls

308 lines (219 loc) · 8.65 KB

Python Pandas - Aggregations

Once the rolling, expanding and ewm objects are created, several methods are available to perform aggregations on data.

Applying Aggregations on DataFrame

Let us create a DataFrame and apply aggregations on it.

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
      index = pd.date_range('1/1/2000', periods=10),
      columns = ['A', 'B', 'C', 'D'])

print(df)
                   A         B         C         D
2000-01-01 -0.791015 -0.891116  1.179140  0.575973
2000-01-02 -0.128975  0.685837 -1.722596  0.931329
2000-01-03 -0.291138 -2.729723 -1.280399 -1.385130
2000-01-04 -0.036785  0.016236 -0.848175  0.822850
2000-01-05  1.503470  0.246917 -0.687813 -0.123178
2000-01-06  0.278860  0.589813 -0.454337  0.129977
2000-01-07 -0.149007  0.340031  0.424967 -0.087554
2000-01-08  1.086857  1.811954  2.096407  0.054886
2000-01-09  1.078021 -0.218671 -1.565469 -1.425709
2000-01-10  0.082319 -0.202146  0.965826 -0.415810
r = df.rolling(window=3,min_periods=1)
print(r)
Rolling [window=3,min_periods=1,center=False,axis=0]

We can aggregate by passing a function to the entire DataFrame, or select a column via the standard get item method.

Apply Aggregation on a Whole Dataframe

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
      index = pd.date_range('1/1/2000', periods=10),
      columns = ['A', 'B', 'C', 'D'])
print(df)
                   A         B         C         D
2000-01-01 -0.266193 -1.438066  0.152435  1.365189
2000-01-02  0.251260 -0.919796  1.515478 -0.525154
2000-01-03  1.059793 -0.559768  0.642240  0.280902
2000-01-04  0.177042  0.011196  1.878813  0.210461
2000-01-05 -0.178306 -0.666430  0.698904 -0.125190
2000-01-06  2.042165 -0.480374 -0.747845  0.698602
2000-01-07 -0.395439 -0.115239 -1.445039 -0.246990
2000-01-08 -1.263854  0.224963  1.413404  1.162996
2000-01-09  1.017146  0.422710 -1.043923 -0.810124
2000-01-10  0.159880  0.490542 -0.187107 -1.715829
r = df.rolling(window=3,min_periods=1)
print(r.aggregate(np.sum))
                   A         B         C         D
2000-01-01 -0.266193 -1.438066  0.152435  1.365189
2000-01-02 -0.014932 -2.357863  1.667913  0.840034
2000-01-03  1.044860 -2.917631  2.310153  1.120937
2000-01-04  1.488094 -1.468368  4.036531 -0.033791
2000-01-05  1.058529 -1.215002  3.219957  0.366173
2000-01-06  2.040901 -1.135608  1.829871  0.783873
2000-01-07  1.468421 -1.262044 -1.493980  0.326423
2000-01-08  0.382873 -0.370651 -0.779480  1.614608
2000-01-09 -0.642146  0.532434 -1.075558  0.105883
2000-01-10 -0.086827  1.138216  0.182374 -1.362956

Apply Aggregation on a Single Column of a Dataframe

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
      index = pd.date_range('1/1/2000', periods=10),
      columns = ['A', 'B', 'C', 'D'])
print(df)
                   A         B         C         D
2000-01-01  2.403442 -0.298850 -0.772055  0.692327
2000-01-02  0.569511 -0.397439  1.408041  0.211867
2000-01-03 -1.359236  2.071963  0.381805  0.141859
2000-01-04 -1.301256 -1.316403 -1.391709  0.141661
2000-01-05  0.644010 -0.059554  0.942052 -1.851403
2000-01-06 -0.968880  0.993466 -1.375534  0.500339
2000-01-07 -0.003384  0.520629 -0.167359  1.022379
2000-01-08  1.047418  0.007508  1.131528 -0.870450
2000-01-09  0.318589  0.418239  0.904083 -0.556089
2000-01-10 -0.734602  0.119719 -1.079453  0.325532
r = df.rolling(window=3,min_periods=1)
print( r['A'].aggregate(np.sum))
2000-01-01    2.403442
2000-01-02    2.972952
2000-01-03    1.613717
2000-01-04   -2.090981
2000-01-05   -2.016481
2000-01-06   -1.626125
2000-01-07   -0.328253
2000-01-08    0.075155
2000-01-09    1.362623
2000-01-10    0.631405
Freq: D, Name: A, dtype: float64

Apply Aggregation on Multiple Columns of a DataFrame

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
      index = pd.date_range('1/1/2000', periods=10),
      columns = ['A', 'B', 'C', 'D'])
print(df)
                   A         B         C         D
2000-01-01  0.715572 -0.718455 -0.947755  0.360172
2000-01-02 -0.706988  0.444163 -0.509705  1.184052
2000-01-03 -0.825585  0.761838  0.487173  0.806122
2000-01-04 -1.563731 -1.005096  0.096712 -0.808190
2000-01-05 -0.755445 -1.457809  0.417160 -1.086240
2000-01-06  1.132278 -0.913999 -1.314896 -0.971240
2000-01-07  1.875460 -0.292512 -0.169374 -0.385097
2000-01-08 -0.947759 -0.881101  0.285449 -0.243586
2000-01-09 -0.264749  0.813067  3.178820 -0.547630
2000-01-10  1.029142  0.610409  1.793412 -0.072775
r = df.rolling(window=3,min_periods=1)
print(r[['A','B']].aggregate(np.sum))
                   A         B
2000-01-01  0.715572 -0.718455
2000-01-02  0.008584 -0.274292
2000-01-03 -0.817001  0.487546
2000-01-04 -3.096304  0.200905
2000-01-05 -3.144761 -1.701067
2000-01-06 -1.186898 -3.376904
2000-01-07  2.252292 -2.664320
2000-01-08  2.059978 -2.087611
2000-01-09  0.662951 -0.360546
2000-01-10 -0.183366  0.542375

Apply Multiple Functions on a Single Column of a DataFrame

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
      index = pd.date_range('1/1/2000', periods=10),
      columns = ['A', 'B', 'C', 'D'])
print(df)
                   A         B         C         D
2000-01-01  0.259099  0.599357 -0.350738  0.757141
2000-01-02  0.719671 -1.084129  1.286505  1.727894
2000-01-03 -0.659442  2.852627  0.418711  1.175051
2000-01-04 -0.238729  0.539222  0.778531  0.538870
2000-01-05  0.119386  1.827120  0.818790  0.410655
2000-01-06  0.509554  1.193381  1.396668  0.156037
2000-01-07 -0.816783 -0.014754  0.720887 -0.136856
2000-01-08  0.516237 -0.574611 -0.407661  0.671428
2000-01-09 -2.150275  1.712370  0.009917 -1.270771
2000-01-10  0.071425  0.447530  0.298403 -0.254088
r = df.rolling(window=3,min_periods=1)
print(r['A'].aggregate([np.sum,np.mean]))
                 sum      mean
2000-01-01  0.259099  0.259099
2000-01-02  0.978770  0.489385
2000-01-03  0.319328  0.106443
2000-01-04 -0.178499 -0.059500
2000-01-05 -0.778784 -0.259595
2000-01-06  0.390211  0.130070
2000-01-07 -0.187843 -0.062614
2000-01-08  0.209008  0.069669
2000-01-09 -2.450821 -0.816940
2000-01-10 -1.562613 -0.520871

Apply Multiple Functions on Multiple Columns of a DataFrame

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
      index = pd.date_range('1/1/2000', periods=10),
      columns = ['A', 'B', 'C', 'D'])

print(df)
                   A         B         C         D
2000-01-01 -0.145920  1.132413  0.511370 -0.409158
2000-01-02  0.337770  0.313042 -0.683319 -0.173656
2000-01-03  0.418944 -0.815721 -0.122863 -0.087668
2000-01-04  1.314895 -0.409348 -1.098669  0.967980
2000-01-05  0.510874  1.101084  0.055265 -0.461382
2000-01-06  1.600351  1.072330 -1.857883  1.361481
2000-01-07 -0.165195 -1.443047  0.724313  1.313937
2000-01-08  0.862601  0.774186  0.416693 -0.215918
2000-01-09 -0.380396  1.217961  0.573319 -1.106125
2000-01-10 -0.422808 -1.589851  0.289967 -0.700435
r = df.rolling(window=3,min_periods=1)
print(r[['A','B']].aggregate([np.sum,np.mean]))
                   A                   B          
                 sum      mean       sum      mean
2000-01-01 -0.145920 -0.145920  1.132413  1.132413
2000-01-02  0.191849  0.095925  1.445456  0.722728
2000-01-03  0.610793  0.203598  0.629734  0.209911
2000-01-04  2.071609  0.690536 -0.912027 -0.304009
2000-01-05  2.244713  0.748238 -0.123985 -0.041328
2000-01-06  3.426120  1.142040  1.764066  0.588022
2000-01-07  1.946030  0.648677  0.730367  0.243456
2000-01-08  2.297757  0.765919  0.403469  0.134490
2000-01-09  0.317010  0.105670  0.549100  0.183033
2000-01-10  0.059397  0.019799  0.402296  0.134099

Apply Different Functions to Different Columns of a Dataframe

import pandas as pd
import numpy as np
 
df = pd.DataFrame(np.random.randn(3, 4),
      index = pd.date_range('1/1/2000', periods=3),
      columns = ['A', 'B', 'C', 'D'])
print(df)
r = df.rolling(window=3,min_periods=1)
print r.aggregate({'A' : np.sum,'B' : np.mean})