diff --git a/docs/source/readers/index.rst b/docs/source/readers/index.rst index 310f4175..cb30a714 100644 --- a/docs/source/readers/index.rst +++ b/docs/source/readers/index.rst @@ -22,3 +22,4 @@ Data Readers tsp world-bank yahoo + tse diff --git a/docs/source/readers/tse.rst b/docs/source/readers/tse.rst new file mode 100644 index 00000000..40ff8289 --- /dev/null +++ b/docs/source/readers/tse.rst @@ -0,0 +1,7 @@ +Tehran Stock Exchange +------------------------------------ + +.. py:module:: pandas_datareader.tse +.. autoclass:: TSEReader + :members: + :inherited-members: read diff --git a/docs/source/remote_data.rst b/docs/source/remote_data.rst index 70793d62..9ead45e0 100644 --- a/docs/source/remote_data.rst +++ b/docs/source/remote_data.rst @@ -44,6 +44,7 @@ Currently the following sources are supported: - :ref:`Tiingo` - :ref:`World Bank` - :ref:`Yahoo Finance` + - :ref:`Tehran Stock Exchange` It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ. @@ -762,3 +763,31 @@ The following endpoints are available: dividends = web.DataReader('IBM', 'yahoo-dividends', start, end) dividends.head() + +.. _remote_data.tse: + +Tehran Stock Exchange +===================== +An interface to structure the information provided by +`Tehran Stock Exchange `_ + +.. ipython:: python + + import pandas_datareader.data as web + from datetime import datetime + start = datetime(2021, 1, 1) + end = dt.datetime.today() + f = web.DataReader("نوری", "tse", start, end) + f.head() + + # Adjust prices + f = web.get_data_tse("نوری", start, end, adjust_price=True) + f.head() + + # Resamle Close price weekly or monthly + f = web.get_data_tse("نوری", start, end, interval="m") + f.head() + + # Multiple series: + multi = web.DataReader(["نوری", "برکت"], "tse", start, end) + multi.head() \ No newline at end of file diff --git a/pandas_datareader/__init__.py b/pandas_datareader/__init__.py index a792a806..69c61016 100644 --- a/pandas_datareader/__init__.py +++ b/pandas_datareader/__init__.py @@ -15,6 +15,7 @@ get_data_quandl, get_data_stooq, get_data_tiingo, + get_data_tse, get_data_yahoo, get_data_yahoo_actions, get_iex_book, @@ -40,6 +41,7 @@ "get_components_yahoo", "get_data_enigma", "get_data_famafrench", + "get_data_tse", "get_data_yahoo", "get_data_yahoo_actions", "get_quote_yahoo", diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index c2d6223a..4c8a6c45 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -33,6 +33,7 @@ TiingoIEXHistoricalReader, TiingoQuoteReader, ) +from pandas_datareader.tse import TSEReader from pandas_datareader.yahoo.actions import YahooActionReader, YahooDivReader from pandas_datareader.yahoo.components import _get_data as get_components_yahoo from pandas_datareader.yahoo.daily import YahooDailyReader @@ -46,6 +47,7 @@ "get_data_fred", "get_data_moex", "get_data_quandl", + "get_data_tse", "get_data_yahoo", "get_data_yahoo_actions", "get_nasdaq_symbols", @@ -270,6 +272,38 @@ def get_iex_book(*args, **kwargs): return IEXDeep(*args, **kwargs).read() +def get_data_tse(*args, **kwargs): + """ + Tehran stock exchange daily data + + Returns DataFrame of historical data from the Tehran Stock Exchange + open data service, over date range, start to end. + + Parameters + ---------- + symbols : {int, str, List[str], List[int]} + The symbols can be persian symbol code or instrument id. + This argument can be obtained from tsetmc.com site. + start : string, int, date, datetime, Timestamp + Starting date. Parses many different kind of date + default value is 5 years ago + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, int, date, datetime, Timestamp + Ending date + retry_count : int, default 3 + Number of times to retry query request. + pause : float, default 0.1 + Time, in seconds, of the pause between retries. + session : Session, default None + requests.sessions.Session instance to be used. + adjust_price : bool, default False + If True, adjusts all prices in hist_data ('Open', 'High', 'Low', + 'Close') based on 'Adj Close' and 'Yesterday' price. + interval: string, d, w, m for daily, weekly, monthly + """ + return TSEReader(*args, **kwargs).read() + + @deprecate_kwarg("access_key", "api_key") def DataReader( name, @@ -360,6 +394,7 @@ def DataReader( "av-intraday", "econdb", "naver", + "tse", ] if data_source not in expected_source: @@ -668,6 +703,19 @@ def DataReader( session=session, ).read() + elif data_source == "tse": + return TSEReader( + symbols=name, + start=start, + end=end, + retry_count=retry_count, + pause=pause, + session=session, + adjust_price=False, + chunksize=10, + interval="d", + ).read() + else: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg) diff --git a/pandas_datareader/tests/test_tse.py b/pandas_datareader/tests/test_tse.py new file mode 100644 index 00000000..92062ddd --- /dev/null +++ b/pandas_datareader/tests/test_tse.py @@ -0,0 +1,57 @@ +from datetime import datetime + +import pandas as pd +import pytest + +from pandas_datareader import data as web + +pytestmark = pytest.mark.stable + + +class TestTSE(object): + @property + def start(self): + return datetime(2021, 3, 1) + + @property + def end(self): + return datetime(2021, 9, 15) + + def test_tse(self): + df = web.DataReader("نوری", "tse", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert len(df) == 123 + + def test_tse_int_symbol(self): + df = web.DataReader("19040514831923530", "tse", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert len(df) == 123 + + def test_tse_multi(self): + names = ["خصدرا", "زاگرس"] + df = web.DataReader(names, "tse", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert list(df.columns.get_level_values(1)[0 : len(names)]) == names + assert len(df) == 126 + + def test_tse_multi_bad_series(self): + names = ["NOTAREALSERIES", "نوری", "ALSOFAKE"] + with pytest.raises(Exception): + web.DataReader(names, data_source="tse") + + def test_tse_raises_exception(self): + with pytest.raises(Exception): + web.DataReader("NON EXISTENT SERIES", "tse", self.start, self.end) + + def test_tse_helper(self): + df = web.get_data_tse("نوری", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert len(df) == 123 diff --git a/pandas_datareader/tse.py b/pandas_datareader/tse.py new file mode 100644 index 00000000..88dfb078 --- /dev/null +++ b/pandas_datareader/tse.py @@ -0,0 +1,213 @@ +import pandas as pd + +from pandas_datareader.base import _DailyBaseReader +from pandas_datareader._utils import RemoteDataError, SymbolWarning + +_TSE_TICKER_URL = "http://www.tsetmc.com/tsev2/data/Export-txt.aspx" +_TSE_MARKET_WATCH_INIT_URL = ( + "http://www.tsetmc.com/tsev2/data/MarketWatchInit.aspx?h=0&r=0" +) +_TSE_FIELD_MAPPINGS = { + "": "Date", + "": "Open", + "": "High", + "": "Low", + "": "Close", + "": "Volume", + "": "Value", + "": "Count", + "": "AdjClose", + "": "Yesterday", +} +_tse_ticker_cache = None + + +class TSEReader(_DailyBaseReader): + """ + Tehran stock exchange daily data + + Returns DataFrame of historical data from the Tehran Stock Exchange + open data service, over date range, start to end. + + Parameters + ---------- + symbols : {int, str, List[str], List[int]} + The symbols can be persian symbol code or instrument id. + This argument can be obtained from tsetmc.com site. + start : string, int, date, datetime, Timestamp + Starting date. Parses many different kind of date + default value is 5 years ago + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, int, date, datetime, Timestamp + Ending date + retry_count : int, default 3 + Number of times to retry query request. + pause : float, default 0.1 + Time, in seconds, of the pause between retries. + session : Session, default None + requests.sessions.Session instance to be used. + adjust_price : bool, default False + If True, adjusts all prices in hist_data ('Open', 'High', 'Low', + 'Close') based on 'Adj Close' and 'Yesterday' price. + interval: string, d, w, m for daily, weekly, monthly + """ + + def __init__( + self, + symbols=None, + start=None, + end=None, + retry_count=3, + pause=0.1, + session=None, + adjust_price=False, + chunksize=1, + interval="d", + ): + super().__init__( + symbols=symbols, + start=start, + end=end, + retry_count=retry_count, + pause=pause, + session=session, + chunksize=chunksize, + ) + + # Ladder up the wait time between subsequent requests to improve + # probability of a successful retry + self.pause_multiplier = 2.5 + + self.adjust_price = adjust_price + self.interval = interval + + if self.interval not in ["d", "w", "m"]: + raise ValueError("Invalid interval: valid values are 'd', 'w' and 'm'. ") + + @property + def url(self): + """API URL""" + return _TSE_TICKER_URL + + def _get_params(self, symbol): + # This needed because yahoo returns data shifted by 4 hours ago. + index = self._symbol_search_request(symbol) + + params = { + "t": "i", + "a": 1, + "b": 0, + "i": index, + } + return params + + def _read_one_data(self, url, params): + """read one data from specified URL""" + + out = self._read_url_as_StringIO(url, params) + try: + df = pd.read_csv(out) + except ValueError: + out.seek(0) + msg = out.read() + raise RemoteDataError( + "message: {}, symbol: {}".format(msg, params.i) + ) from None + + df = df.iloc[::-1].reset_index(drop=True) + df = df.rename(columns=_TSE_FIELD_MAPPINGS) + df = df.reindex(_TSE_FIELD_MAPPINGS.values(), axis=1) + + if self.adjust_price: + df = _adjust_prices(df) + + if "Date" in df: + df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d") + df = df.set_index("Date") + df = df[self.start : self.end] + if self.interval == "w": + ohlc = df["Close"].resample("w-sat").ohlc() + ohlc["volume"] = df["Volume"].resample("w-sat").sum() + df = ohlc + elif self.interval == "m": + ohlc = df["Close"].resample("m").ohlc() + ohlc["volume"] = df["Volume"].resample("m").sum() + df = ohlc + + return df + + def _symbol_search_request(self, symbol): + """read one data from specified URL""" + global _tse_ticker_cache + + if _tse_ticker_cache is None: + out = self._read_url_as_StringIO(_TSE_MARKET_WATCH_INIT_URL, params=None) + out.seek(0) + msg = out.read() + # response contain different groups for different data + response_groups = msg.split("@") + if len(response_groups) < 3: + raise RemoteDataError( + "response groups: {}, symbol: {}".format( + len(response_groups), symbol + ) + ) from None + + symbols_data = response_groups[2].split(";") + + _tse_ticker_cache = {} + for symbol_data in symbols_data: + data = symbol_data.split(",") + _tse_ticker_cache[ + self._replace_arabic(data[2]).replace("\u200c", "") + ] = self._replace_arabic(data[0]) + + try: + if symbol.isnumeric(): + index = symbol + else: + index = _tse_ticker_cache[symbol] + except KeyError: + raise SymbolWarning("{} not found".format(symbol)) from None + + return index + + def _replace_arabic(self, string: str): + return string.replace("ك", "ک").replace("ي", "ی").strip() + + +def _adjust_prices(hist_data, price_list=None): + """ + Return modifed DataFrame with adjusted prices based on + 'Adj Close' and 'Yesterday' price + """ + if hist_data.empty: + return hist_data + if not isinstance(hist_data.index, pd.core.indexes.range.RangeIndex): + raise TypeError( + "Error in adjusting price; index type must be RangeIndex" + ) from None + if price_list is None: + price_list = ["Open", "High", "Low", "Close", "AdjClose", "Yesterday"] + + data = hist_data.copy() + step = data.index.step + diff = list(data.index[data.shift(1).AdjClose != data.Yesterday]) + if len(diff) > 0: + diff.pop(0) + ratio = 1 + ratio_list = [] + for i in diff[::-1]: + ratio *= data.loc[i, "Yesterday"] / data.shift(1).loc[i, "AdjClose"] + ratio_list.insert(0, ratio) + for i, k in enumerate(diff): + if i == 0: + start = data.index.start + else: + start = diff[i - 1] + end = diff[i] - step + data.loc[start:end, price_list] = round( + data.loc[start:end, price_list] * ratio_list[i] + ) + + return data