diff --git a/ci.sh b/ci.sh index c1c2a18..9934548 100755 --- a/ci.sh +++ b/ci.sh @@ -4,7 +4,7 @@ set -ex BLACK_VERSION=22.6.0 -pip install -U pip setuptools wheel +python -m pip install -U pip setuptools wheel python setup.py sdist --formats=zip pip install dist/*.zip diff --git a/setup.py b/setup.py index 6a668e4..88f3ecc 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ include_package_data=True, packages=find_packages("src"), package_dir={"": "src"}, - install_requires=[], + install_requires=["tokenize_rt"], keywords=["async"], python_requires=">=3.7", classifiers=[ diff --git a/src/unasync/__init__.py b/src/unasync/__init__.py index 2e59707..ffc9205 100644 --- a/src/unasync/__init__.py +++ b/src/unasync/__init__.py @@ -6,6 +6,7 @@ import sys import tokenize as std_tokenize +import tokenize_rt from setuptools.command import build_py as orig from ._version import __version__ # NOQA @@ -65,35 +66,41 @@ def _match(self, filepath): def _unasync_file(self, filepath): with open(filepath, "rb") as f: encoding, _ = std_tokenize.detect_encoding(f.readline) - f.seek(0) - tokens = _tokenize(f) + + with open(filepath, "rt", encoding=encoding) as f: + tokens = tokenize_rt.src_to_tokens(f.read()) tokens = self._unasync_tokens(tokens) - result = _untokenize(tokens) + result = tokenize_rt.tokens_to_src(tokens) outfilepath = filepath.replace(self.fromdir, self.todir) os.makedirs(os.path.dirname(outfilepath), exist_ok=True) with open(outfilepath, "wb") as f: f.write(result.encode(encoding)) def _unasync_tokens(self, tokens): - # TODO __await__, ...? - used_space = None - for space, toknum, tokval in tokens: - if tokval in ["async", "await"]: - # When removing async or await, we want to use the whitespace that - # was before async/await before the next token so that - # `print(await stuff)` becomes `print(stuff)` and not - # `print( stuff)` - used_space = space + skip_next = False + for i, token in enumerate(tokens): + if skip_next: + skip_next = False + continue + + if token.src in ["async", "await"]: + # When removing async or await, we want to skip the following whitespace + # so that `print(await stuff)` becomes `print(stuff)` and not `print( stuff)` + skip_next = True else: - if toknum == std_tokenize.NAME: - tokval = self._unasync_name(tokval) - elif toknum == std_tokenize.STRING: - left_quote, name, right_quote = tokval[0], tokval[1:-1], tokval[-1] - tokval = left_quote + self._unasync_name(name) + right_quote - if used_space is None: - used_space = space - yield (used_space, tokval) - used_space = None + if token.name == "NAME": + token = token._replace(src=self._unasync_name(token.src)) + elif token.name == "STRING": + left_quote, name, right_quote = ( + token.src[0], + token.src[1:-1], + token.src[-1], + ) + token = token._replace( + src=left_quote + self._unasync_name(name) + right_quote + ) + + yield token def _unasync_name(self, name): if name in self.token_replacements: @@ -122,31 +129,6 @@ def unasync_files(fpath_list, rules): Token = collections.namedtuple("Token", ["type", "string", "start", "end", "line"]) -def _tokenize(f): - last_end = (1, 0) - for tok in std_tokenize.tokenize(f.readline): - if tok.type == std_tokenize.ENCODING: - continue - - if last_end[0] < tok.start[0]: - yield ("", std_tokenize.STRING, " \\\n") - last_end = (tok.start[0], 0) - - space = "" - if tok.start > last_end: - assert tok.start[0] == last_end[0] - space = " " * (tok.start[1] - last_end[1]) - yield (space, tok.type, tok.string) - - last_end = tok.end - if tok.type in [std_tokenize.NEWLINE, std_tokenize.NL]: - last_end = (tok.end[0] + 1, 0) - - -def _untokenize(tokens): - return "".join(space + tokval for space, tokval in tokens) - - _DEFAULT_RULE = Rule(fromdir="/_async/", todir="/_sync/")