diff --git a/.coveragerc b/.coveragerc index cac94c3..96752f1 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,6 +2,9 @@ branch=True source=unasync +[paths] +source = src/unasync + [report] precision = 1 exclude_lines = diff --git a/.coveragerc-py2 b/.coveragerc-py2 index 571cf8a..c69e158 100644 --- a/.coveragerc-py2 +++ b/.coveragerc-py2 @@ -2,6 +2,9 @@ branch=True source=unasync +[paths] +source = src/unasync + [report] precision = 1 exclude_lines = diff --git a/src/unasync/__init__.py b/src/unasync/__init__.py index 52af030..24bd971 100644 --- a/src/unasync/__init__.py +++ b/src/unasync/__init__.py @@ -52,13 +52,18 @@ def isidentifier(s): StringIO = io.StringIO +if hasattr(os, "fspath"): # PY3 + fspath = os.fspath +else: # PY2 + fspath = str + class Rule: """A single set of rules for 'unasync'ing file(s)""" def __init__(self, fromdir, todir, additional_replacements=None): - self.fromdir = fromdir.replace("/", os.sep) - self.todir = todir.replace("/", os.sep) + self.fromdir = fspath(fromdir).replace("/", os.sep) + self.todir = fspath(todir).replace("/", os.sep) # Add any additional user-defined token replacements to our list. self.token_replacements = _ASYNC_TO_SYNC.copy() @@ -69,6 +74,8 @@ def _match(self, filepath): """Determines if a Rule matches a given filepath and if so returns a higher comparable value if the match is more specific. """ + filepath = fspath(filepath) + file_segments = [x for x in filepath.split(os.sep) if x] from_segments = [x for x in self.fromdir.split(os.sep) if x] len_from_segments = len(from_segments) @@ -83,6 +90,7 @@ def _match(self, filepath): return False def _unasync_file(self, filepath): + filepath = fspath(filepath) with open(filepath, "rb") as f: write_kwargs = {} if sys.version_info[0] >= 3: # PY3 # pragma: no branch @@ -100,7 +108,57 @@ def _unasync_file(self, filepath): def _unasync_tokens(self, tokens): # TODO __await__, ...? used_space = None + context = None # Can be `None`, `"func_decl"`, `"func_name"`, `"arg_list"`, `"arg_list_end"`, `"return_type"` + brace_depth = 0 + typing_ctx = False + for space, toknum, tokval in tokens: + # Update context state tracker + if context is None and toknum == std_tokenize.NAME and tokval == "def": + context = "func_decl" + elif context == "func_decl" and toknum == std_tokenize.NAME: + context = "func_name" + elif context == "func_name" and toknum == std_tokenize.OP and tokval == "(": + context = "arg_list" + elif context == "arg_list": + if toknum == std_tokenize.OP and tokval in ("(", "["): + brace_depth += 1 + elif ( + toknum == std_tokenize.OP + and tokval in (")", "]") + and brace_depth >= 1 + ): + brace_depth -= 1 + elif toknum == std_tokenize.OP and tokval == ")": + context = "arg_list_end" + elif toknum == std_tokenize.OP and tokval == ":" and brace_depth < 1: + typing_ctx = True + elif toknum == std_tokenize.OP and tokval == "," and brace_depth < 1: + typing_ctx = False + elif ( + context == "arg_list_end" + and toknum == std_tokenize.OP + and tokval == "->" + ): + context = "return_type" + typing_ctx = True + elif context == "return_type": + if toknum == std_tokenize.OP and tokval in ("(", "["): + brace_depth += 1 + elif ( + toknum == std_tokenize.OP + and tokval in (")", "]") + and brace_depth >= 1 + ): + brace_depth -= 1 + elif toknum == std_tokenize.OP and tokval == ":": + context = None + typing_ctx = False + else: # Something unexpected happend - reset state + context = None + brace_depth = 0 + typing_ctx = False + if tokval in ["async", "await"]: # When removing async or await, we want to use the whitespace that # was before async/await before the next token so that @@ -111,8 +169,34 @@ def _unasync_tokens(self, tokens): if toknum == std_tokenize.NAME: tokval = self._unasync_name(tokval) elif toknum == std_tokenize.STRING: - left_quote, name, right_quote = tokval[0], tokval[1:-1], tokval[-1] - tokval = left_quote + self._unasync_name(name) + right_quote + # Strings in typing context are forward-references and should be unasyncified + quote = "" + prefix = "" + while ord(tokval[0]) in range(ord("a"), ord("z") + 1): + prefix += tokval[0] + tokval = tokval[1:] + + if tokval.startswith('"""') and tokval.endswith('"""'): + quote = '"""' # Broken syntax highlighters workaround: """ + elif tokval.startswith("'''") and tokval.endswith("'''"): + quote = "'''" # Broken syntax highlighters wokraround: ''' + elif tokval.startswith('"') and tokval.endswith('"'): + quote = '"' + elif tokval.startswith( # pragma: no branch + "'" + ) and tokval.endswith("'"): + quote = "'" + assert ( + len(quote) > 0 + ), "Quoting style of string {0!r} unknown".format(tokval) + stringval = tokval[len(quote) : -len(quote)] + if typing_ctx: + stringval = _untokenize( + self._unasync_tokens(_tokenize(StringIO(stringval))) + ) + else: + stringval = self._unasync_name(stringval) + tokval = prefix + quote + stringval + quote elif toknum == std_tokenize.COMMENT and tokval.startswith( _TYPE_COMMENT_PREFIX ): @@ -193,7 +277,7 @@ def _tokenize(f): # Somehow Python 3.5 and below produce the ENDMARKER in a way that # causes superfluous continuation lines to be generated if tok.type != std_tokenize.ENDMARKER: - yield ("", std_tokenize.STRING, " \\\n") + yield (" ", std_tokenize.NEWLINE, "\\\n") last_end = (tok.start[0], 0) space = "" diff --git a/test-requirements.txt b/test-requirements.txt index 2c7cf23..55b2caa 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,3 @@ pytest>=4.3.0 -pytest-cov \ No newline at end of file +pytest-cov +pathlib2 ; python_version < '3.5' \ No newline at end of file diff --git a/tests/data/async/typing_py3.py b/tests/data/async/typing_py3.py new file mode 100644 index 0000000..cd4e213 --- /dev/null +++ b/tests/data/async/typing_py3.py @@ -0,0 +1,13 @@ +# fmt: off +# A forward-reference typed function that returns an iterator for an (a)sync iterable +async def aiter1(a: "typing.AsyncIterable[int]") -> 'typing.AsyncIterable[int]': + return a.__aiter__() + +# Same as the above but using tripple-quoted strings +async def aiter2(a: """typing.AsyncIterable[int]""") -> r'''typing.AsyncIterable[int]''': + return a.__aiter__() + +# Same as the above but without forward-references +async def aiter3(a: typing.AsyncIterable[int]) -> typing.AsyncIterable[int]: + return a.__aiter__() +# fmt: on diff --git a/tests/data/sync/typing_py3.py b/tests/data/sync/typing_py3.py new file mode 100644 index 0000000..cfad1f0 --- /dev/null +++ b/tests/data/sync/typing_py3.py @@ -0,0 +1,13 @@ +# fmt: off +# A forward-reference typed function that returns an iterator for an (a)sync iterable +def aiter1(a: "typing.Iterable[int]") -> 'typing.Iterable[int]': + return a.__iter__() + +# Same as the above but using tripple-quoted strings +def aiter2(a: """typing.Iterable[int]""") -> r'''typing.Iterable[int]''': + return a.__iter__() + +# Same as the above but without forward-references +def aiter3(a: typing.Iterable[int]) -> typing.Iterable[int]: + return a.__iter__() +# fmt: on diff --git a/tests/test_unasync.py b/tests/test_unasync.py index f4b08eb..511b956 100644 --- a/tests/test_unasync.py +++ b/tests/test_unasync.py @@ -2,6 +2,11 @@ import errno import io import os + +try: + import pathlib +except ImportError: + import pathlib2 as pathlib import shutil import subprocess import sys @@ -15,6 +20,9 @@ SYNC_DIR = os.path.join(TEST_DIR, "sync") TEST_FILES = sorted([f for f in os.listdir(ASYNC_DIR) if f.endswith(".py")]) +if sys.version_info[0] == 2: + TEST_FILES.remove("typing_py3.py") + def list_files(startpath): output = "" @@ -40,6 +48,12 @@ def test_rule_on_short_path(): assert rule._match("/ahip/") is False +def test_rule_with_pathlib_path(): + path_async_base = pathlib.Path("/ahip") + path_sync_base = pathlib.Path("/hip") + unasync.Rule(path_async_base / "tests", path_sync_base / "tests") + + @pytest.mark.parametrize("source_file", TEST_FILES) def test_unasync(tmpdir, source_file):