You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
344 lines
11 KiB
344 lines
11 KiB
5 months ago
|
"""
|
||
|
This module contains SMBFileSystem class responsible for handling access to
|
||
|
Windows Samba network shares by using package smbprotocol
|
||
|
"""
|
||
|
|
||
|
import datetime
|
||
|
import uuid
|
||
|
from stat import S_ISDIR, S_ISLNK
|
||
|
|
||
|
import smbclient
|
||
|
|
||
|
from .. import AbstractFileSystem
|
||
|
from ..utils import infer_storage_options
|
||
|
|
||
|
# ! pylint: disable=bad-continuation
|
||
|
|
||
|
|
||
|
class SMBFileSystem(AbstractFileSystem):
|
||
|
"""Allow reading and writing to Windows and Samba network shares.
|
||
|
|
||
|
When using `fsspec.open()` for getting a file-like object the URI
|
||
|
should be specified as this format:
|
||
|
``smb://workgroup;user:password@server:port/share/folder/file.csv``.
|
||
|
|
||
|
Example::
|
||
|
|
||
|
>>> import fsspec
|
||
|
>>> with fsspec.open(
|
||
|
... 'smb://myuser:mypassword@myserver.com/' 'share/folder/file.csv'
|
||
|
... ) as smbfile:
|
||
|
... df = pd.read_csv(smbfile, sep='|', header=None)
|
||
|
|
||
|
Note that you need to pass in a valid hostname or IP address for the host
|
||
|
component of the URL. Do not use the Windows/NetBIOS machine name for the
|
||
|
host component.
|
||
|
|
||
|
The first component of the path in the URL points to the name of the shared
|
||
|
folder. Subsequent path components will point to the directory/folder/file.
|
||
|
|
||
|
The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
|
||
|
optional.
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
For working this source require `smbprotocol`_ to be installed, e.g.::
|
||
|
|
||
|
$ pip install smbprotocol
|
||
|
# or
|
||
|
# pip install smbprotocol[kerberos]
|
||
|
|
||
|
.. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
|
||
|
|
||
|
Note: if using this with the ``open`` or ``open_files``, with full URLs,
|
||
|
there is no way to tell if a path is relative, so all paths are assumed
|
||
|
to be absolute.
|
||
|
"""
|
||
|
|
||
|
protocol = "smb"
|
||
|
|
||
|
# pylint: disable=too-many-arguments
|
||
|
def __init__(
|
||
|
self,
|
||
|
host,
|
||
|
port=None,
|
||
|
username=None,
|
||
|
password=None,
|
||
|
timeout=60,
|
||
|
encrypt=None,
|
||
|
share_access=None,
|
||
|
register_session_retries=5,
|
||
|
auto_mkdir=False,
|
||
|
**kwargs,
|
||
|
):
|
||
|
"""
|
||
|
You can use _get_kwargs_from_urls to get some kwargs from
|
||
|
a reasonable SMB url.
|
||
|
|
||
|
Authentication will be anonymous or integrated if username/password are not
|
||
|
given.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
host: str
|
||
|
The remote server name/ip to connect to
|
||
|
port: int or None
|
||
|
Port to connect with. Usually 445, sometimes 139.
|
||
|
username: str or None
|
||
|
Username to connect with. Required if Kerberos auth is not being used.
|
||
|
password: str or None
|
||
|
User's password on the server, if using username
|
||
|
timeout: int
|
||
|
Connection timeout in seconds
|
||
|
encrypt: bool
|
||
|
Whether to force encryption or not, once this has been set to True
|
||
|
the session cannot be changed back to False.
|
||
|
share_access: str or None
|
||
|
Specifies the default access applied to file open operations
|
||
|
performed with this file system object.
|
||
|
This affects whether other processes can concurrently open a handle
|
||
|
to the same file.
|
||
|
|
||
|
- None (the default): exclusively locks the file until closed.
|
||
|
- 'r': Allow other handles to be opened with read access.
|
||
|
- 'w': Allow other handles to be opened with write access.
|
||
|
- 'd': Allow other handles to be opened with delete access.
|
||
|
auto_mkdir: bool
|
||
|
Whether, when opening a file, the directory containing it should
|
||
|
be created (if it doesn't already exist). This is assumed by pyarrow
|
||
|
and zarr-python code.
|
||
|
"""
|
||
|
super().__init__(**kwargs)
|
||
|
self.host = host
|
||
|
self.port = port
|
||
|
self.username = username
|
||
|
self.password = password
|
||
|
self.timeout = timeout
|
||
|
self.encrypt = encrypt
|
||
|
self.temppath = kwargs.pop("temppath", "")
|
||
|
self.share_access = share_access
|
||
|
self.register_session_retries = register_session_retries
|
||
|
self.auto_mkdir = auto_mkdir
|
||
|
self._connect()
|
||
|
|
||
|
@property
|
||
|
def _port(self):
|
||
|
return 445 if self.port is None else self.port
|
||
|
|
||
|
def _connect(self):
|
||
|
import time
|
||
|
|
||
|
for _ in range(self.register_session_retries):
|
||
|
try:
|
||
|
smbclient.register_session(
|
||
|
self.host,
|
||
|
username=self.username,
|
||
|
password=self.password,
|
||
|
port=self._port,
|
||
|
encrypt=self.encrypt,
|
||
|
connection_timeout=self.timeout,
|
||
|
)
|
||
|
break
|
||
|
except Exception:
|
||
|
time.sleep(0.1)
|
||
|
|
||
|
@classmethod
|
||
|
def _strip_protocol(cls, path):
|
||
|
return infer_storage_options(path)["path"]
|
||
|
|
||
|
@staticmethod
|
||
|
def _get_kwargs_from_urls(path):
|
||
|
# smb://workgroup;user:password@host:port/share/folder/file.csv
|
||
|
out = infer_storage_options(path)
|
||
|
out.pop("path", None)
|
||
|
out.pop("protocol", None)
|
||
|
return out
|
||
|
|
||
|
def mkdir(self, path, create_parents=True, **kwargs):
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
if create_parents:
|
||
|
smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
|
||
|
else:
|
||
|
smbclient.mkdir(wpath, port=self._port, **kwargs)
|
||
|
|
||
|
def makedirs(self, path, exist_ok=False):
|
||
|
if _share_has_path(path):
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
|
||
|
|
||
|
def rmdir(self, path):
|
||
|
if _share_has_path(path):
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
smbclient.rmdir(wpath, port=self._port)
|
||
|
|
||
|
def info(self, path, **kwargs):
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
stats = smbclient.stat(wpath, port=self._port, **kwargs)
|
||
|
if S_ISDIR(stats.st_mode):
|
||
|
stype = "directory"
|
||
|
elif S_ISLNK(stats.st_mode):
|
||
|
stype = "link"
|
||
|
else:
|
||
|
stype = "file"
|
||
|
res = {
|
||
|
"name": path + "/" if stype == "directory" else path,
|
||
|
"size": stats.st_size,
|
||
|
"type": stype,
|
||
|
"uid": stats.st_uid,
|
||
|
"gid": stats.st_gid,
|
||
|
"time": stats.st_atime,
|
||
|
"mtime": stats.st_mtime,
|
||
|
}
|
||
|
return res
|
||
|
|
||
|
def created(self, path):
|
||
|
"""Return the created timestamp of a file as a datetime.datetime"""
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
stats = smbclient.stat(wpath, port=self._port)
|
||
|
return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
|
||
|
|
||
|
def modified(self, path):
|
||
|
"""Return the modified timestamp of a file as a datetime.datetime"""
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
stats = smbclient.stat(wpath, port=self._port)
|
||
|
return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
|
||
|
|
||
|
def ls(self, path, detail=True, **kwargs):
|
||
|
unc = _as_unc_path(self.host, path)
|
||
|
listed = smbclient.listdir(unc, port=self._port, **kwargs)
|
||
|
dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
|
||
|
if detail:
|
||
|
dirs = [self.info(d) for d in dirs]
|
||
|
return dirs
|
||
|
|
||
|
# pylint: disable=too-many-arguments
|
||
|
def _open(
|
||
|
self,
|
||
|
path,
|
||
|
mode="rb",
|
||
|
block_size=-1,
|
||
|
autocommit=True,
|
||
|
cache_options=None,
|
||
|
**kwargs,
|
||
|
):
|
||
|
"""
|
||
|
block_size: int or None
|
||
|
If 0, no buffering, 1, line buffering, >1, buffer that many bytes
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
By specifying 'share_access' in 'kwargs' it is possible to override the
|
||
|
default shared access setting applied in the constructor of this object.
|
||
|
"""
|
||
|
if self.auto_mkdir and "w" in mode:
|
||
|
self.makedirs(self._parent(path), exist_ok=True)
|
||
|
bls = block_size if block_size is not None and block_size >= 0 else -1
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
share_access = kwargs.pop("share_access", self.share_access)
|
||
|
if "w" in mode and autocommit is False:
|
||
|
temp = _as_temp_path(self.host, path, self.temppath)
|
||
|
return SMBFileOpener(
|
||
|
wpath, temp, mode, port=self._port, block_size=bls, **kwargs
|
||
|
)
|
||
|
return smbclient.open_file(
|
||
|
wpath,
|
||
|
mode,
|
||
|
buffering=bls,
|
||
|
share_access=share_access,
|
||
|
port=self._port,
|
||
|
**kwargs,
|
||
|
)
|
||
|
|
||
|
def copy(self, path1, path2, **kwargs):
|
||
|
"""Copy within two locations in the same filesystem"""
|
||
|
wpath1 = _as_unc_path(self.host, path1)
|
||
|
wpath2 = _as_unc_path(self.host, path2)
|
||
|
if self.auto_mkdir:
|
||
|
self.makedirs(self._parent(path2), exist_ok=True)
|
||
|
smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
|
||
|
|
||
|
def _rm(self, path):
|
||
|
if _share_has_path(path):
|
||
|
wpath = _as_unc_path(self.host, path)
|
||
|
stats = smbclient.stat(wpath, port=self._port)
|
||
|
if S_ISDIR(stats.st_mode):
|
||
|
smbclient.rmdir(wpath, port=self._port)
|
||
|
else:
|
||
|
smbclient.remove(wpath, port=self._port)
|
||
|
|
||
|
def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
|
||
|
wpath1 = _as_unc_path(self.host, path1)
|
||
|
wpath2 = _as_unc_path(self.host, path2)
|
||
|
smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
|
||
|
|
||
|
|
||
|
def _as_unc_path(host, path):
|
||
|
rpath = path.replace("/", "\\")
|
||
|
unc = f"\\\\{host}{rpath}"
|
||
|
return unc
|
||
|
|
||
|
|
||
|
def _as_temp_path(host, path, temppath):
|
||
|
share = path.split("/")[1]
|
||
|
temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
|
||
|
unc = _as_unc_path(host, temp_file)
|
||
|
return unc
|
||
|
|
||
|
|
||
|
def _share_has_path(path):
|
||
|
parts = path.count("/")
|
||
|
if path.endswith("/"):
|
||
|
return parts > 2
|
||
|
return parts > 1
|
||
|
|
||
|
|
||
|
class SMBFileOpener:
|
||
|
"""writes to remote temporary file, move on commit"""
|
||
|
|
||
|
def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
|
||
|
self.path = path
|
||
|
self.temp = temp
|
||
|
self.mode = mode
|
||
|
self.block_size = block_size
|
||
|
self.kwargs = kwargs
|
||
|
self.smbfile = None
|
||
|
self._incontext = False
|
||
|
self.port = port
|
||
|
self._open()
|
||
|
|
||
|
def _open(self):
|
||
|
if self.smbfile is None or self.smbfile.closed:
|
||
|
self.smbfile = smbclient.open_file(
|
||
|
self.temp,
|
||
|
self.mode,
|
||
|
port=self.port,
|
||
|
buffering=self.block_size,
|
||
|
**self.kwargs,
|
||
|
)
|
||
|
|
||
|
def commit(self):
|
||
|
"""Move temp file to definitive on success."""
|
||
|
# TODO: use transaction support in SMB protocol
|
||
|
smbclient.replace(self.temp, self.path, port=self.port)
|
||
|
|
||
|
def discard(self):
|
||
|
"""Remove the temp file on failure."""
|
||
|
smbclient.remove(self.temp, port=self.port)
|
||
|
|
||
|
def __fspath__(self):
|
||
|
return self.path
|
||
|
|
||
|
def __iter__(self):
|
||
|
return self.smbfile.__iter__()
|
||
|
|
||
|
def __getattr__(self, item):
|
||
|
return getattr(self.smbfile, item)
|
||
|
|
||
|
def __enter__(self):
|
||
|
self._incontext = True
|
||
|
return self.smbfile.__enter__()
|
||
|
|
||
|
def __exit__(self, exc_type, exc_value, traceback):
|
||
|
self._incontext = False
|
||
|
self.smbfile.__exit__(exc_type, exc_value, traceback)
|