diff --git a/docs/source/whatsnew/v.0.9.1.txt b/docs/source/whatsnew/v.0.9.1.txt index 3ec1cb44..3e149ffd 100644 --- a/docs/source/whatsnew/v.0.9.1.txt +++ b/docs/source/whatsnew/v.0.9.1.txt @@ -19,8 +19,9 @@ Enhancements - Added read_all_boards() method for MOEX that returns data from every trading board (ver. 0.9.0 behaviour) - Docs for MOEX reedited +- Added CSV response serialisation for Tiingo APIs (:issue:`821`) -.. _whatsnew_080.bug_fixes: +.. _whatsnew_091.bug_fixes: Bug Fixes ~~~~~~~~~ @@ -29,4 +30,5 @@ Bug Fixes Contributors ~~~~~~~~~~~~ -- Dmitry Alekseev \ No newline at end of file +- Dmitry Alekseev +- Tomasz Chodakowski \ No newline at end of file diff --git a/pandas_datareader/base.py b/pandas_datareader/base.py index 4d4148df..251ba8cb 100644 --- a/pandas_datareader/base.py +++ b/pandas_datareader/base.py @@ -111,11 +111,11 @@ def _read_one_data(self, url, params): raise NotImplementedError(self._format) return self._read_lines(out) - def _read_url_as_StringIO(self, url, params=None): + def _read_url_as_StringIO(self, url, params=None, headers=None): """ Open url (and retry) """ - response = self._get_response(url, params=params) + response = self._get_response(url, params=params, headers=headers) text = self._sanitize_response(response) out = StringIO() if len(text) == 0: diff --git a/pandas_datareader/tests/test_tiingo.py b/pandas_datareader/tests/test_tiingo.py index a3f559ae..0fd27a63 100644 --- a/pandas_datareader/tests/test_tiingo.py +++ b/pandas_datareader/tests/test_tiingo.py @@ -26,9 +26,18 @@ def symbols(request): return request.param +formats = ["csv", "json"] +format_ids = list(map(str, formats)) + + +@pytest.fixture(params=formats, ids=format_ids) +def formats(request): + return request.param + + @pytest.mark.skipif(TEST_API_KEY is None, reason="TIINGO_API_KEY not set") -def test_tiingo_quote(symbols): - df = TiingoQuoteReader(symbols=symbols).read() +def test_tiingo_quote(symbols, formats): + df = TiingoQuoteReader(symbols=symbols, response_format=formats).read() assert isinstance(df, pd.DataFrame) if isinstance(symbols, str): symbols = [symbols] @@ -36,8 +45,8 @@ def test_tiingo_quote(symbols): @pytest.mark.skipif(TEST_API_KEY is None, reason="TIINGO_API_KEY not set") -def test_tiingo_historical(symbols): - df = TiingoDailyReader(symbols=symbols).read() +def test_tiingo_historical(symbols, formats): + df = TiingoDailyReader(symbols=symbols, response_format=formats).read() assert isinstance(df, pd.DataFrame) if isinstance(symbols, str): symbols = [symbols] @@ -45,8 +54,8 @@ def test_tiingo_historical(symbols): @pytest.mark.skipif(TEST_API_KEY is None, reason="TIINGO_API_KEY not set") -def test_tiingo_iex_historical(symbols): - df = TiingoIEXHistoricalReader(symbols=symbols).read() +def test_tiingo_iex_historical(symbols, formats): + df = TiingoIEXHistoricalReader(symbols=symbols, response_format=formats).read() df.head() assert isinstance(df, pd.DataFrame) if isinstance(symbols, str): @@ -74,7 +83,9 @@ def test_tiingo_no_api_key(symbols): @pytest.mark.skipif( - pd.__version__ == "0.19.2", reason="pandas 0.19.2 does not like this file format" + pd.__version__ == "0.19.2", + reason="pandas 0.19.2 does not like\ + this file format", ) def test_tiingo_stock_symbols(): sym = get_tiingo_symbols() diff --git a/pandas_datareader/tiingo.py b/pandas_datareader/tiingo.py index 1ab5af56..6b375956 100644 --- a/pandas_datareader/tiingo.py +++ b/pandas_datareader/tiingo.py @@ -4,6 +4,8 @@ from pandas_datareader.base import _BaseReader +TIINGO_API_URL_BASE = "https://api.tiingo.com" + def get_tiingo_symbols(): """ @@ -34,11 +36,11 @@ class TiingoIEXHistoricalReader(_BaseReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Starting date. Parses many different kind of date - representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to - 20 years before current date. - end : string, int, date, datetime, Timestamp + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). + Defaults to 20 years before current date. + end : {string, int, date, datetime, Timestamp} Ending date retry_count : int, default 3 Number of times to retry query request. @@ -47,9 +49,14 @@ class TiingoIEXHistoricalReader(_BaseReader): session : Session, default None requests.sessions.Session instance to be used freq : {str, None} - Re-sample frequency. Format is # + (min/hour); e.g. "15min" or "4hour". - If no value is provided, defaults to 5min. The minimum value is "1min". - Units in minutes (min) and hours (hour) are accepted. + Re-sample frequency. Format is #min/hour; e.g. "15min" or "4hour". + If no value is provided, defaults to 5min. The minimum value is "1min". + Units in minutes (min) and hours (hour) are accepted. + response_format : str, default 'json' + Specifies format of response data returned by the underlying + Tiingo REST API. Acceptable values are 'json' and 'csv'. + Use of 'csv' results in smaller message payload, less bandwidth, + and may delay the time when client hits API's bandwidth limit. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -65,6 +72,7 @@ def __init__( timeout=30, session=None, freq=None, + response_format="json", api_key=None, ): super().__init__( @@ -83,12 +91,15 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key + if response_format not in ["json", "csv"]: + raise ValueError("Acceptable values are 'json' and 'csv'") + self.response_format = response_format self._concat_axis = 0 @property def url(self): """API URL""" - _url = "https://api.tiingo.com/iex/{ticker}/prices" + _url = TIINGO_API_URL_BASE + "/iex/{ticker}/prices" return _url.format(ticker=self._symbol) @property @@ -98,7 +109,7 @@ def params(self): "startDate": self.start.strftime("%Y-%m-%d"), "endDate": self.end.strftime("%Y-%m-%d"), "resampleFreq": self.freq, - "format": "json", + "format": self.response_format, } def _get_crumb(self, *args): @@ -106,15 +117,22 @@ def _get_crumb(self, *args): def _read_one_data(self, url, params): """ read one data from specified URL """ + content_type = ( + "application/json" if self.response_format == "json" else "text/csv" + ) headers = { - "Content-Type": "application/json", + "Content-Type": content_type, "Authorization": "Token " + self.api_key, } - out = self._get_response(url, params=params, headers=headers).json() + out = None + if self.response_format == "json": + out = self._get_response(url, params=params, headers=headers).json() + elif self.response_format == "csv": + out = self._read_url_as_StringIO(url, params=params, headers=headers) return self._read_lines(out) def _read_lines(self, out): - df = pd.DataFrame(out) + df = pd.DataFrame(out) if self.response_format == "json" else pd.read_csv(out) df["symbol"] = self._symbol df["date"] = pd.to_datetime(df["date"]) df = df.set_index(["symbol", "date"]) @@ -140,11 +158,11 @@ class TiingoDailyReader(_BaseReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Starting date, timestamp. Parses many different kind of date representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Default starting date is 5 years before current date. - end : string, int, date, datetime, Timestamp + end : {string, int, date, datetime, Timestamp} Ending date, timestamp. Same format as starting date. retry_count : int, default 3 Number of times to retry query request. @@ -154,6 +172,11 @@ class TiingoDailyReader(_BaseReader): requests.sessions.Session instance to be used freq : {str, None} Not used. + response_format : str, default 'json' + Specifies format of response data returned by the underlying + Tiingo REST API. Acceptable values are 'json' and 'csv'. + Use of 'csv' results in smaller message payload, less bandwidth, + and may delay the time when client hits API's bandwidth limit. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -169,6 +192,7 @@ def __init__( timeout=30, session=None, freq=None, + response_format="json", api_key=None, ): super(TiingoDailyReader, self).__init__( @@ -186,12 +210,15 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key + if response_format not in ["json", "csv"]: + raise ValueError("Acceptable values are 'json' and 'csv'") + self.response_format = response_format self._concat_axis = 0 @property def url(self): """API URL""" - _url = "https://api.tiingo.com/tiingo/daily/{ticker}/prices" + _url = TIINGO_API_URL_BASE + "/tiingo/daily/{ticker}/prices" return _url.format(ticker=self._symbol) @property @@ -200,7 +227,7 @@ def params(self): return { "startDate": self.start.strftime("%Y-%m-%d"), "endDate": self.end.strftime("%Y-%m-%d"), - "format": "json", + "format": self.response_format, } def _get_crumb(self, *args): @@ -208,15 +235,22 @@ def _get_crumb(self, *args): def _read_one_data(self, url, params): """ read one data from specified URL """ + content_type = ( + "application/json" if self.response_format == "json" else "text/csv" + ) headers = { - "Content-Type": "application/json", + "Content-Type": content_type, "Authorization": "Token " + self.api_key, } - out = self._get_response(url, params=params, headers=headers).json() + out = None + if self.response_format == "json": + out = self._get_response(url, params=params, headers=headers).json() + elif self.response_format == "csv": + out = self._read_url_as_StringIO(url, params=params, headers=headers) return self._read_lines(out) def _read_lines(self, out): - df = pd.DataFrame(out) + df = pd.DataFrame(out) if self.response_format == "json" else pd.read_csv(out) df["symbol"] = self._symbol df["date"] = pd.to_datetime(df["date"]) df = df.set_index(["symbol", "date"]) @@ -242,9 +276,9 @@ class TiingoMetaDataReader(TiingoDailyReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Not used. - end : string, int, date, datetime, Timestamp + end : {string, int, date, datetime, Timestamp} Not used. retry_count : int, default 3 Number of times to retry query request. @@ -272,14 +306,23 @@ def __init__( api_key=None, ): super(TiingoMetaDataReader, self).__init__( - symbols, start, end, retry_count, pause, timeout, session, freq, api_key + symbols, + start, + end, + retry_count, + pause, + timeout, + session, + freq, + response_format="json", + api_key=api_key, ) self._concat_axis = 1 @property def url(self): """API URL""" - _url = "https://api.tiingo.com/tiingo/daily/{ticker}" + _url = TIINGO_API_URL_BASE + "/tiingo/daily/{ticker}" return _url.format(ticker=self._symbol) @property @@ -300,9 +343,9 @@ class TiingoQuoteReader(TiingoDailyReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Not used. - end : string, int, date, datetime, Timestamp + end : {string, int, date, datetime, Timestamp} Not used. retry_count : int, default 3 Number of times to retry query request. @@ -312,6 +355,11 @@ class TiingoQuoteReader(TiingoDailyReader): requests.sessions.Session instance to be used freq : {str, None} Not used. + response_format : str, default 'json' + Specifies format of response data returned by the underlying + Tiingo REST API. Acceptable values are 'json' and 'csv'. + Use of 'csv' results in smaller message payload, less bandwidth, + and may delay the time when client hits API's bandwidth limit. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -322,6 +370,33 @@ class TiingoQuoteReader(TiingoDailyReader): the latest data available for each symbol. """ + def __init__( + self, + symbols, + start=None, + end=None, + retry_count=3, + pause=0.1, + timeout=30, + session=None, + freq=None, + response_format="json", + api_key=None, + ): + super(TiingoQuoteReader, self).__init__( + symbols, + start, + end, + retry_count, + pause, + timeout, + session, + freq, + response_format, + api_key, + ) + @property def params(self): - return None + """Parameters to use in API calls""" + return {"format": self.response_format}