diff --git a/docs/source/readers/bcb.rst b/docs/source/readers/bcb.rst new file mode 100644 index 00000000..d8c6f3c1 --- /dev/null +++ b/docs/source/readers/bcb.rst @@ -0,0 +1,10 @@ +Brazilian Central Bank (BCB) +------------------------------------ + +.. py:module:: pandas_datareader.bcb + +.. autoclass:: BCBReader + :members: + :inherited-members: read + + diff --git a/docs/source/readers/index.rst b/docs/source/readers/index.rst index 310f4175..d7a7693b 100644 --- a/docs/source/readers/index.rst +++ b/docs/source/readers/index.rst @@ -22,3 +22,4 @@ Data Readers tsp world-bank yahoo + bcb diff --git a/docs/source/remote_data.rst b/docs/source/remote_data.rst index 70793d62..09c0faf5 100644 --- a/docs/source/remote_data.rst +++ b/docs/source/remote_data.rst @@ -44,6 +44,7 @@ Currently the following sources are supported: - :ref:`Tiingo` - :ref:`World Bank` - :ref:`Yahoo Finance` + - :ref:`Brazilian Central Bank (BCB)` It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ. @@ -762,3 +763,27 @@ The following endpoints are available: dividends = web.DataReader('IBM', 'yahoo-dividends', start, end) dividends.head() + +.. _remote_data.bcb: + +BCB +==== + +An interface to structure the information provided by the +`Brazilian Central Bank `__. +This interfaces the `Brazilian Central Bank web services `__ +to provide data formatted into pandas' DataFrame. + +.. ipython:: python + + import pandas_datareader.data as web + from datetime import datetime + start = datetime(2021, 1, 4) + end = datetime(2021, 9, 1) + USD_BRL = web.DataReader(1, "bcb", start, end) + USD_BRL.head() + + # Multiple series: + # IPCA: 433 IGPM: 189 + inflation = web.DataReader(["433", "189"], "bcb", start, end) + inflation.head() diff --git a/pandas_datareader/__init__.py b/pandas_datareader/__init__.py index a792a806..678af089 100644 --- a/pandas_datareader/__init__.py +++ b/pandas_datareader/__init__.py @@ -28,6 +28,7 @@ get_records_iex, get_summary_iex, get_tops_iex, + get_data_bcb, ) PKG = os.path.dirname(__file__) @@ -63,6 +64,7 @@ "get_iex_data_tiingo", "get_data_alphavantage", "test", + "get_data_bcb", ] diff --git a/pandas_datareader/bcb.py b/pandas_datareader/bcb.py new file mode 100644 index 00000000..a3305680 --- /dev/null +++ b/pandas_datareader/bcb.py @@ -0,0 +1,101 @@ +import pandas as pd + +from pandas_datareader.base import _BaseReader +from pandas_datareader.compat import is_list_like +from pandas_datareader._utils import RemoteDataError + + +class BCBReader(_BaseReader): + """ + Returns DataFrame of historical data from the Brazilian Central Bank + open data service, over date range, start to end. + + + Parameters + ---------- + symbols : {int, str, List[str], List[int]} + The symbols are integer codes related to available time series. + This argument can be obtained in the SGS system site. + In this site searches can be executed in order to find out the desired + series and use the series code in the symbols argument. + start : string, int, date, datetime, Timestamp + Starting date. Parses many different kind of date + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, int, date, datetime, Timestamp + Ending date + retry_count : int, default 3 + Number of times to retry query request. + pause : float, default 0.1 + Time, in seconds, of the pause between retries. + session : Session, default None + requests.sessions.Session instance to be used. + freq : {str, None} + Ignored + """ + + @property + def url(self): + """API URL""" + return "http://api.bcb.gov.br/dados/serie/bcdata.sgs.{}/dados" + + @property + def params(self): + """Parameters to use in API calls""" + params = { + "formato": "json", + "dataInicial": self.start.strftime("%d/%m/%Y"), + "dataFinal": self.end.strftime("%d/%m/%Y"), + } + return params + + def read(self): + """Read data from connector + + Returns + ------- + data : DataFrame + If multiple names are passed for "series" then the index of the + DataFrame is the outer join of the indicies of each series. + """ + try: + return self._read() + finally: + self.close() + + def _read(self): + """read data from many URLs if necessary and joins into one DataFrame""" + if not is_list_like(self.symbols): + names = [self.symbols] + else: + names = self.symbols + + urls = [self.url.format(n) for n in names] + + def _req(url, n): + return self._read_single_request(n, url, self.params) + + dfs = [_req(url, n) for url, n in zip(urls, names)] + df = pd.concat(dfs, axis=1, join="outer") + return df + + def _read_single_request(self, symbol, url, params): + """read one data from specified URL""" + out = self._read_url_as_StringIO(url, params=params) + try: + df = pd.read_json(out) + except ValueError: + out.seek(0) + msg = out.read() + raise RemoteDataError( + "message: {}, symbol: {}".format(msg, symbol) + ) from None + + cns = {"data": "date", "valor": str(symbol), "datafim": "end_date"} + df = df.rename(columns=cns) + + if "date" in df: + df["date"] = pd.to_datetime(df["date"], format="%d/%m/%Y") + if "end_date" in df: + df["end_date"] = pd.to_datetime(df["end_date"], format="%d/%m/%Y") + + return df.set_index("date") diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index c2d6223a..427d43bb 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -34,6 +34,7 @@ TiingoQuoteReader, ) from pandas_datareader.yahoo.actions import YahooActionReader, YahooDivReader +from pandas_datareader.bcb import BCBReader from pandas_datareader.yahoo.components import _get_data as get_components_yahoo from pandas_datareader.yahoo.daily import YahooDailyReader from pandas_datareader.yahoo.options import Options as YahooOptions @@ -61,6 +62,7 @@ "get_dailysummary_iex", "get_data_stooq", "DataReader", + "get_data_bcb", ] @@ -270,6 +272,10 @@ def get_iex_book(*args, **kwargs): return IEXDeep(*args, **kwargs).read() +def get_data_bcb(*args, **kwargs): + return BCBReader(*args, **kwargs).read() + + @deprecate_kwarg("access_key", "api_key") def DataReader( name, @@ -360,6 +366,7 @@ def DataReader( "av-intraday", "econdb", "naver", + "bcb", ] if data_source not in expected_source: @@ -668,6 +675,16 @@ def DataReader( session=session, ).read() + elif data_source == "bcb": + return BCBReader( + symbols=name, + start=start, + end=end, + retry_count=retry_count, + pause=pause, + session=session, + ).read() + else: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg) diff --git a/pandas_datareader/tests/test_bcb.py b/pandas_datareader/tests/test_bcb.py new file mode 100644 index 00000000..95a97db7 --- /dev/null +++ b/pandas_datareader/tests/test_bcb.py @@ -0,0 +1,78 @@ +from datetime import datetime + +import pandas as pd +import pytest + +from pandas_datareader import data as web +from pandas_datareader._utils import RemoteDataError + +pytestmark = pytest.mark.stable + + +class TestBCB(object): + def test_bcb(self): + + start = datetime(2021, 1, 4) + end = datetime(2021, 9, 1) + + df = web.DataReader("1", "bcb", start, end) + ts = df["1"] + + assert ts.index.name == "date" + assert ts.index[0] == pd.to_datetime("2021-01-04") + assert ts.index[-1] == pd.to_datetime("2021-09-01") + assert ts.name == "1" + assert len(ts) == 168 + + def test_bcb_int_symbol(self): + + start = datetime(2021, 1, 4) + end = datetime(2021, 9, 1) + + df = web.DataReader(1, "bcb", start, end) + ts = df["1"] + + assert ts.index.name == "date" + assert ts.index[0] == pd.to_datetime("2021-01-04") + assert ts.index[-1] == pd.to_datetime("2021-09-01") + assert ts.name == "1" + assert len(ts) == 168 + + def test_bcb_multi(self): + names = ["433", "189"] + start = datetime(2002, 1, 1) + end = datetime(2021, 1, 1) + + df = web.DataReader(names, "bcb", start, end) + + assert df.index.name == "date" + assert df.index[0] == pd.to_datetime("2002-01-01") + assert df.index[-1] == pd.to_datetime("2021-01-01") + assert list(df.columns) == names + assert df.shape[0] == 229 + + def test_bcb_multi_bad_series(self): + names = ["NOTAREALSERIES", "1", "ALSOFAKE"] + with pytest.raises(RemoteDataError): + web.DataReader(names, data_source="bcb") + + def test_bcb_raises_exception(self): + # Raises an exception when DataReader can't + # get the series. + + start = datetime(2021, 1, 4) + end = datetime(2021, 9, 1) + + with pytest.raises(RemoteDataError): + web.DataReader("NON EXISTENT SERIES", "bcb", start, end) + + def test_bcb_helper(self): + start = datetime(2021, 1, 4) + end = datetime(2021, 9, 1) + df = web.get_data_bcb("1", start, end) + ts = df["1"] + assert ts.index.name == "date" + assert ts.index[0] == pd.to_datetime("2021-01-04") + assert ts.index[-1] == pd.to_datetime("2021-09-01") + assert ts.name == "1" + assert len(ts) == 168