Skip to content

Commit 41fa87a

Browse files
committed
Fix os.expanduser to work with a bytes path
- adapted documentation for pandas support - added test for pandas.read_table()
1 parent 520914f commit 41fa87a

File tree

5 files changed

+61
-47
lines changed

5 files changed

+61
-47
lines changed

CHANGES.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ The released versions correspond to PyPi releases.
44
## Version 4.1.0 (as yet unreleased)
55

66
#### New Features
7-
* Added some support for pandas (`read_csv`, `read_excel`) to work with
8-
the fake filesystem (see [#531](../../issues/531))
7+
* Added some support for pandas (`read_csv`, `read_excel` and more) to work
8+
with the fake filesystem (see [#531](../../issues/531))
99

1010
#### Fixes
11+
* `os.expanduser` now works with a bytes path
1112
* Do not override global warnings setting in `Deprecator`
1213
(see [#526](../../issues/526))
1314
* Make sure filesystem modules in `pathlib` are patched

docs/usage.rst

+9-4
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,10 @@ want to set this to ``False``.
359359
use_known_patches
360360
~~~~~~~~~~~~~~~~~
361361
If this is set to ``True`` (the default), ``pyfakefs`` patches some
362-
libraries that are known to not work out of the box, to be able work with the
363-
fake filesystem. Currently, this includes patches for the ``pandas`` methods
364-
``read_csv`` and ``read_excel`` - more may follow. This flag is
365-
there to be able to disable this functionality in case it causes any
362+
libraries that are known to not work out of the box, to be able to work with
363+
the fake filesystem. Currently, this includes patches for some ``pandas``
364+
read methods like ``read_csv`` and ``read_excel`` - more may follow. This
365+
flag is there to allow to disable this functionality in case it causes any
366366
problems. It may be removed or replaced by a more fine-grained argument in
367367
future releases.
368368

@@ -616,6 +616,11 @@ A list of Python modules that are known to not work correctly with
616616
sufficient demand.
617617
- the ``Pillow`` image library does not work with pyfakefs at least if writing
618618
JPEG files (see `this issue <https://github.com/jmcgeheeiv/pyfakefs/issues/529>`__)
619+
- ``pandas`` (the Python data analysis library) uses its own internal file
620+
system access, written in C, and does therefore not work with pyfakefs out
621+
of the box. ``pyfakefs`` adds some patches so that many of the
622+
``read_xxx`` functions will work with the fake system (including
623+
``read_csv`` and ``read_excel``).
619624
620625
If you are not sure if a module can be handled, or how to do it, you can
621626
always write a new issue, of course!

pyfakefs/fake_filesystem.py

+31-40
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@
112112
from pyfakefs.helpers import (
113113
FakeStatResult, FileBufferIO, NullFileBufferIO,
114114
is_int_type, is_byte_string, is_unicode_string,
115-
make_string_path, IS_WIN, to_string)
115+
make_string_path, IS_WIN, to_string, matching_string
116+
)
116117
from pyfakefs import __version__ # noqa: F401 for upwards compatibility
117118

118119
__pychecker__ = 'no-reimportself'
@@ -958,24 +959,13 @@ def raise_os_error(self, errno, filename=None, winerror=None):
958959
raise OSError(errno, message, filename, winerror)
959960
raise OSError(errno, message, filename)
960961

961-
@staticmethod
962-
def _matching_string(matched, string):
963-
"""Return the string as byte or unicode depending
964-
on the type of matched, assuming string is an ASCII string.
965-
"""
966-
if string is None:
967-
return string
968-
if isinstance(matched, bytes) and isinstance(string, str):
969-
return string.encode(locale.getpreferredencoding(False))
970-
return string
971-
972962
def _path_separator(self, path):
973963
"""Return the path separator as the same type as path"""
974-
return self._matching_string(path, self.path_separator)
964+
return matching_string(path, self.path_separator)
975965

976966
def _alternative_path_separator(self, path):
977967
"""Return the alternative path separator as the same type as path"""
978-
return self._matching_string(path, self.alternative_path_separator)
968+
return matching_string(path, self.alternative_path_separator)
979969

980970
def _starts_with_sep(self, path):
981971
"""Return True if path starts with a path separator."""
@@ -1035,10 +1025,10 @@ def to_str(string):
10351025
path = self.absnormpath(self._original_path(path))
10361026
if path in self.mount_points:
10371027
return self.mount_points[path]
1038-
mount_path = self._matching_string(path, '')
1028+
mount_path = matching_string(path, '')
10391029
drive = self.splitdrive(path)[:1]
10401030
for root_path in self.mount_points:
1041-
root_path = self._matching_string(path, root_path)
1031+
root_path = matching_string(path, root_path)
10421032
if drive and not root_path.startswith(drive):
10431033
continue
10441034
if path.startswith(root_path) and len(root_path) > len(mount_path):
@@ -1377,8 +1367,8 @@ def normpath(self, path):
13771367
is_absolute_path = path.startswith(sep)
13781368
path_components = path.split(sep)
13791369
collapsed_path_components = []
1380-
dot = self._matching_string(path, '.')
1381-
dotdot = self._matching_string(path, '..')
1370+
dot = matching_string(path, '.')
1371+
dotdot = matching_string(path, '..')
13821372
for component in path_components:
13831373
if (not component) or (component == dot):
13841374
continue
@@ -1453,18 +1443,18 @@ def absnormpath(self, path):
14531443
or the root directory if path is empty.
14541444
"""
14551445
path = self.normcase(path)
1456-
cwd = self._matching_string(path, self.cwd)
1446+
cwd = matching_string(path, self.cwd)
14571447
if not path:
14581448
path = self.path_separator
1459-
if path == self._matching_string(path, '.'):
1449+
if path == matching_string(path, '.'):
14601450
path = cwd
14611451
elif not self._starts_with_root_path(path):
14621452
# Prefix relative paths with cwd, if cwd is not root.
1463-
root_name = self._matching_string(path, self.root.name)
1464-
empty = self._matching_string(path, '')
1453+
root_name = matching_string(path, self.root.name)
1454+
empty = matching_string(path, '')
14651455
path = self._path_separator(path).join(
14661456
(cwd != root_name and cwd or empty, path))
1467-
if path == self._matching_string(path, '.'):
1457+
if path == matching_string(path, '.'):
14681458
path = cwd
14691459
return self.normpath(path)
14701460

@@ -1489,7 +1479,7 @@ def splitpath(self, path):
14891479

14901480
starts_with_drive = self._starts_with_drive_letter(path)
14911481
basename = path_components.pop()
1492-
colon = self._matching_string(path, ':')
1482+
colon = matching_string(path, ':')
14931483
if not path_components:
14941484
if starts_with_drive:
14951485
components = basename.split(colon)
@@ -1545,7 +1535,7 @@ def splitdrive(self, path):
15451535
if sep_index2 == -1:
15461536
sep_index2 = len(path)
15471537
return path[:sep_index2], path[sep_index2:]
1548-
if path[1:2] == self._matching_string(path, ':'):
1538+
if path[1:2] == matching_string(path, ':'):
15491539
return path[:2], path[2:]
15501540
return path[:0], path
15511541

@@ -1579,7 +1569,7 @@ def _join_paths_with_drive_support(self, *all_paths):
15791569
result_path = result_path + sep
15801570
result_path = result_path + path_part
15811571
# add separator between UNC and non-absolute path
1582-
colon = self._matching_string(base_path, ':')
1572+
colon = matching_string(base_path, ':')
15831573
if (result_path and result_path[:1] not in seps and
15841574
result_drive and result_drive[-1:] != colon):
15851575
return result_drive + sep + result_path
@@ -1613,7 +1603,7 @@ def joinpaths(self, *paths):
16131603
joined_path_segments.append(sep)
16141604
if path_segment:
16151605
joined_path_segments.append(path_segment)
1616-
return self._matching_string(paths[0], '').join(joined_path_segments)
1606+
return matching_string(paths[0], '').join(joined_path_segments)
16171607

16181608
def _path_components(self, path):
16191609
"""Breaks the path into a list of component names.
@@ -1664,20 +1654,20 @@ def _starts_with_drive_letter(self, file_path):
16641654
`True` if drive letter support is enabled in the filesystem and
16651655
the path starts with a drive letter.
16661656
"""
1667-
colon = self._matching_string(file_path, ':')
1657+
colon = matching_string(file_path, ':')
16681658
return (self.is_windows_fs and len(file_path) >= 2 and
16691659
file_path[:1].isalpha and (file_path[1:2]) == colon)
16701660

16711661
def _starts_with_root_path(self, file_path):
1672-
root_name = self._matching_string(file_path, self.root.name)
1662+
root_name = matching_string(file_path, self.root.name)
16731663
file_path = self._normalize_path_sep(file_path)
16741664
return (file_path.startswith(root_name) or
16751665
not self.is_case_sensitive and file_path.lower().startswith(
16761666
root_name.lower()) or
16771667
self._starts_with_drive_letter(file_path))
16781668

16791669
def _is_root_path(self, file_path):
1680-
root_name = self._matching_string(file_path, self.root.name)
1670+
root_name = matching_string(file_path, self.root.name)
16811671
return (file_path == root_name or not self.is_case_sensitive and
16821672
file_path.lower() == root_name.lower() or
16831673
2 <= len(file_path) <= 3 and
@@ -1860,7 +1850,7 @@ def _resolve_components(self, path_components, raw_io):
18601850
def _valid_relative_path(self, file_path):
18611851
if self.is_windows_fs:
18621852
return True
1863-
slash_dotdot = self._matching_string(
1853+
slash_dotdot = matching_string(
18641854
file_path, self.path_separator + '..')
18651855
while file_path and slash_dotdot in file_path:
18661856
file_path = file_path[:file_path.rfind(slash_dotdot)]
@@ -2026,7 +2016,7 @@ def lresolve(self, path):
20262016

20272017
# remove trailing separator
20282018
path = self._path_without_trailing_separators(path)
2029-
if path == self._matching_string(path, '.'):
2019+
if path == matching_string(path, '.'):
20302020
path = self.cwd
20312021
path = self._original_path(path)
20322022

@@ -2260,8 +2250,8 @@ def remove_object(self, file_path):
22602250

22612251
def make_string_path(self, path):
22622252
path = make_string_path(path)
2263-
os_sep = self._matching_string(path, os.sep)
2264-
fake_sep = self._matching_string(path, self.path_separator)
2253+
os_sep = matching_string(path, os.sep)
2254+
fake_sep = matching_string(path, self.path_separator)
22652255
return path.replace(os_sep, fake_sep)
22662256

22672257
def create_dir(self, directory_path, perm_bits=PERM_DEF):
@@ -2756,8 +2746,7 @@ def makedir(self, dir_name, mode=PERM_DEF):
27562746
parent_dir, _ = self.splitpath(dir_name)
27572747
if parent_dir:
27582748
base_dir = self.normpath(parent_dir)
2759-
ellipsis = self._matching_string(
2760-
parent_dir, self.path_separator + '..')
2749+
ellipsis = matching_string(parent_dir, self.path_separator + '..')
27612750
if parent_dir.endswith(ellipsis) and not self.is_windows_fs:
27622751
base_dir, dummy_dotdot, _ = parent_dir.partition(ellipsis)
27632752
if not self.exists(base_dir):
@@ -3333,8 +3322,8 @@ def _joinrealpath(self, path, rest, seen):
33333322
encountered in the second path.
33343323
Taken from Python source and adapted.
33353324
"""
3336-
curdir = self.filesystem._matching_string(path, '.')
3337-
pardir = self.filesystem._matching_string(path, '..')
3325+
curdir = matching_string(path, '.')
3326+
pardir = matching_string(path, '..')
33383327

33393328
sep = self.filesystem._path_separator(path)
33403329
if self.isabs(rest):
@@ -3385,8 +3374,10 @@ def expanduser(self, path):
33853374
"""Return the argument with an initial component of ~ or ~user
33863375
replaced by that user's home directory.
33873376
"""
3388-
return self._os_path.expanduser(path).replace(
3389-
self._os_path.sep, self.sep)
3377+
path = self._os_path.expanduser(path)
3378+
return path.replace(
3379+
matching_string(path, self._os_path.sep),
3380+
matching_string(path, self.sep))
33903381

33913382
def ismount(self, path):
33923383
"""Return true if the given path is a mount point.

pyfakefs/helpers.py

+11
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,17 @@ def to_string(path):
5757
return path
5858

5959

60+
def matching_string(matched, string):
61+
"""Return the string as byte or unicode depending
62+
on the type of matched, assuming string is an ASCII string.
63+
"""
64+
if string is None:
65+
return string
66+
if isinstance(matched, bytes) and isinstance(string, str):
67+
return string.encode(locale.getpreferredencoding(False))
68+
return string
69+
70+
6071
class FakeStatResult:
6172
"""Mimics os.stat_result for use as return type of `stat()` and similar.
6273
This is needed as `os.stat_result` has no possibility to set

pyfakefs/tests/patched_packages_test.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,18 @@ def setUp(self):
3434
self.setUpPyfakefs()
3535

3636
if pd is not None:
37-
def test_load_csv(self):
37+
def test_read_csv(self):
3838
path = '/foo/bar.csv'
3939
self.fs.create_file(path, contents='1,2,3,4')
4040
df = pd.read_csv(path)
4141
assert (df.columns == ['1', '2', '3', '4']).all()
4242

43+
def test_read_table(fs):
44+
path = '/foo/bar.csv'
45+
fs.create_file(path, contents='1|2|3|4')
46+
df = pd.read_table(path, delimiter='|')
47+
assert (df.columns == ['1', '2', '3', '4']).all()
48+
4349
if pd is not None and xlrd is not None:
4450
def test_load_excel(self):
4551
path = '/foo/bar.xlsx'

0 commit comments

Comments
 (0)