Source code for ubelt._win32_links

"""
For dealing with symlinks, junctions, and hard-links on windows.

Note:
    The terminology used here was written before I really understood the
    difference between symlinks, hardlinks, and junctions. As such it may be
    inconsistent or incorrect in some places. This might be fixed in the
    future.

References:
    .. [SO18883892] https://stackoverflow.com/questions/18883892/batch-file-windows-cmd-exe-test-if-a-directory-is-a-link-symlink
    .. [SO21561850] https://stackoverflow.com/questions/21561850/python-test-for-junction-point-target
    .. [WinTwoFilesSame] http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html
    .. [SO6260149] https://stackoverflow.com/questions/6260149/os-symlink-support-in-windows
    .. [WinDesktopAA365006] https://msdn.microsoft.com/en-us/library/windows/desktop/aa365006(v=vs.85).aspx
    .. [SU902082] https://superuser.com/a/902082/215232

Weird Behavior:
    - [ ] In many cases using the win32 API seems to result in privilege errors
          but using shell commands does not have this problem.
"""

from __future__ import annotations

import os
import platform
import sys
import typing
import warnings
from os.path import exists, join

from ubelt import util_io, util_path

if sys.platform.startswith('win32'):
    try:
        import jaraco.windows.filesystem as jwfs
    except ImportError:
        # Use vendored subset of jaraco.windows
        from ubelt import _win32_jaraco as jwfs
else:
    jwfs = None

__win32_can_symlink__: bool | None = None










[docs] def _win32_symlink2( path: str | os.PathLike, link: str | os.PathLike, allow_fallback: bool = True, verbose: int = 0, ) -> str | os.PathLike: """ Perform a real symbolic link if possible. However, on most versions of windows you need special privileges to create a real symlink. Therefore, we try to create a symlink, but if that fails we fallback to using a junction. AFAIK, the main difference between symlinks and junctions are that symlinks can reference relative or absolute paths, where as junctions always reference absolute paths. Not 100% on this though. Windows is weird. Note that junctions will not register as links via `islink`, but I believe real symlinks will. """ if _win32_can_symlink(): return _win32_symlink(path, link, verbose) else: return _win32_junction(path, link, verbose)
[docs] def _win32_junction( path: str | os.PathLike, link: str | os.PathLike, verbose: int = 0, ) -> str | os.PathLike: """ On older (pre 10) versions of windows we need admin privileges to make symlinks, however junctions seem to work. For paths we do a junction (softlink) and for files we use a hard link Example: >>> # xdoc: +REQUIRES(WIN32) >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_junction').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> fpath = join(root, 'fpath.txt') >>> dpath = join(root, 'dpath') >>> fjunc = join(root, 'fjunc.txt') >>> djunc = join(root, 'djunc') >>> ub.touch(fpath) >>> ub.ensuredir(dpath) >>> ub.ensuredir(join(root, 'djunc_fake')) >>> ub.ensuredir(join(root, 'djunc_fake with space')) >>> ub.touch(join(root, 'djunc_fake with space file')) >>> _win32_junction(fpath, fjunc) >>> _win32_junction(dpath, djunc) >>> # thank god colons are not allowed >>> djunc2 = join(root, 'djunc2 [with pathological attrs]') >>> _win32_junction(dpath, djunc2) >>> _win32_is_junction(djunc) >>> ub.writeto(join(djunc, 'afile.txt'), 'foo') >>> assert ub.readfrom(join(dpath, 'afile.txt')) == 'foo' >>> ub.writeto(fjunc, 'foo') """ # junctions store absolute paths path = os.path.abspath(path) link = os.path.abspath(link) if verbose >= 3: print(f'_win32_junction {link} -> {path}') from ubelt import util_cmd if os.path.isdir(path): # try using a junction (soft link) if verbose: print('... as soft link (junction)') # TODO: what is the windows api for this? command = 'mklink /J "{}" "{}"'.format(link, path) else: # try using a hard link if verbose: print('... as hard link') # command = 'mklink /H "{}" "{}"'.format(link, path) if jwfs is None: raise ImportError( 'jaraco.windows.filesystem is required to run _win32_junction' ) try: jwfs.link(path, link) # this seems to be allowed except Exception: print('Failed to hardlink link={} to path={}'.format(link, path)) raise command = None if command is not None: cmd_verbose = 3 * verbose >= 3 info = util_cmd.cmd(command, shell=True, verbose=cmd_verbose) if info['ret'] != 0: from ubelt import util_repr print('Failed command:') print(info['command']) print(util_repr.urepr(info, nl=1)) raise OSError(str(info)) return link
[docs] def _win32_is_junction(path: str | os.PathLike) -> bool: """ Determines if a path is a win32 junction Note: on PyPy this is bugged and will currently return True for a symlinked directory. Returns: bool: Example: >>> # xdoctest: +REQUIRES(WIN32) >>> from ubelt._win32_links import _win32_junction, _win32_is_junction >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_junction').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> dpath = root / 'dpath' >>> djunc = root / 'djunc' >>> dpath.ensuredir() >>> _win32_junction(dpath, djunc) >>> assert _win32_is_junction(djunc) is True >>> assert _win32_is_junction(dpath) is False >>> assert _win32_is_junction('notafile') is False """ path = os.fspath(path) if not exists(path): if os.path.isdir(path): if not os.path.islink(path): return True return False if platform.python_implementation() == 'PyPy': # Workaround for pypy where os.path.islink will return True # for a junction. Can we just rely on it being a reparse point? # https://github.com/pypy/pypy/issues/4976 return _is_reparse_point(path) else: return _is_reparse_point(path) and not os.path.islink(path)
[docs] def _is_reparse_point(path: str | os.PathLike) -> bool: """ Check if a directory is a reparse point in windows. Note: a reparse point seems like it could be a junction or symlink. .. [SO54678399] https://stackoverflow.com/a/54678399/887074 """ if jwfs is None: raise ImportError( 'jaraco.windows.filesystem is required to run _is_reparse_point' ) # if jwfs is not None: return jwfs.is_reparse_point(os.fspath(path))
# else: # # Fallback without jaraco: TODO: test this is 1-to-1 # # Seems to break on pypy? # import subprocess # child = subprocess.Popen(f'fsutil reparsepoint query "{path}"', # stdout=subprocess.PIPE) # child.communicate()[0] # return child.returncode == 0
[docs] def _win32_read_junction(path: str | os.PathLike) -> str: """ Returns the location that the junction points, raises ValueError if path is not a junction. Example: >>> # xdoc: +REQUIRES(WIN32) >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_junction').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> dpath = join(root, 'dpath') >>> djunc = join(root, 'djunc') >>> ub.ensuredir(dpath) >>> _win32_junction(dpath, djunc) >>> path = djunc >>> pointed = _win32_read_junction(path) >>> print('pointed = {!r}'.format(pointed)) """ import ctypes path = os.fspath(path) if jwfs is None: raise ImportError( 'jaraco.windows.filesystem is required to run _win32_read_junction' ) if not jwfs.is_reparse_point(path): raise ValueError('not a junction') # new version using the windows api handle = jwfs.api.CreateFile( path, 0, 0, None, jwfs.api.OPEN_EXISTING, jwfs.api.FILE_FLAG_OPEN_REPARSE_POINT | jwfs.api.FILE_FLAG_BACKUP_SEMANTICS, None, ) if handle == jwfs.api.INVALID_HANDLE_VALUE: raise OSError() res = jwfs.reparse.DeviceIoControl( handle, jwfs.api.FSCTL_GET_REPARSE_POINT, None, 10240 ) bytes = ctypes.create_string_buffer(res) p_rdb = ctypes.cast(bytes, ctypes.POINTER(jwfs.api.REPARSE_DATA_BUFFER)) rdb = p_rdb.contents if rdb.tag not in [2684354563, jwfs.api.IO_REPARSE_TAG_SYMLINK]: raise RuntimeError( 'Expected <2684354563 or 2684354572>, but got %d' % rdb.tag ) jwfs.handle_nonzero_success(jwfs.api.CloseHandle(handle)) subname = rdb.get_substitute_name() # probably has something to do with long paths, not sure if subname.startswith('?\\'): subname = subname[2:] return subname
[docs] def _win32_rmtree(path: str | os.PathLike, verbose: int = 0) -> None: """ rmtree for win32 that treats junctions like directory symlinks. The junction removal portion may not be safe on race conditions. There is a known issue [CPythonBug31226]_ that prevents :func:`shutil.rmtree` from deleting directories with junctions. References: .. [CPythonBug31226] https://bugs.python.org/issue31226 """ path = os.fspath(path) def _rmjunctions(root: str | os.PathLike) -> None: from os.path import isdir, islink, join for r, ds, fs in os.walk(os.fspath(root)): subdirs = [] for d in ds: path = join(r, d) if isdir(path): if _win32_is_junction(path): # remove any junctions as we encounter them os.rmdir(path) elif not islink(path): subdirs.append(d) if 1: # Not sure if necessary, double check, junctions are odd for name in os.listdir(r): current = join(r, name) if os.path.isdir(current): if _win32_is_junction(current): # remove any junctions as we encounter them os.rmdir(current) # only recurse into real directories ds[:] = subdirs if _win32_is_junction(path): if verbose: print('Deleting <JUNCTION> directory="{}"'.format(path)) os.rmdir(path) else: if verbose: print('Deleting directory="{}"'.format(path)) # first remove all junctions _rmjunctions(path) # now we can rmtree as normal import shutil def onerror( func: typing.Any, path: str | os.PathLike, exc_info: typing.Any ) -> None: print('Error') print('func = {!r}'.format(func)) print('path = {!r}'.format(path)) print('exc_info = {!r}'.format(exc_info)) shutil.rmtree(path, onerror=onerror)
[docs] def _win32_is_hardlinked( fpath1: str | os.PathLike, fpath2: str | os.PathLike, ) -> bool: """ Test if two hard links point to the same location Example: >>> # xdoc: +REQUIRES(WIN32) >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_hardlink').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> fpath1 = join(root, 'fpath1') >>> fpath2 = join(root, 'fpath2') >>> ub.touch(fpath1) >>> ub.touch(fpath2) >>> fjunc1 = _win32_junction(fpath1, join(root, 'fjunc1')) >>> fjunc2 = _win32_junction(fpath2, join(root, 'fjunc2')) >>> assert _win32_is_hardlinked(fjunc1, fpath1) >>> assert _win32_is_hardlinked(fjunc2, fpath2) >>> assert not _win32_is_hardlinked(fjunc2, fpath1) >>> assert not _win32_is_hardlinked(fjunc1, fpath2) """ if jwfs is None: raise ImportError( 'jaraco.windows.filesystem is required to run _win32_is_hardlinked' ) # NOTE: jwf.samefile(fpath1, fpath2) seems to behave differently def get_read_handle(fpath: str | os.PathLike) -> typing.Any: if os.path.isdir(fpath): dwFlagsAndAttributes = jwfs.api.FILE_FLAG_BACKUP_SEMANTICS else: dwFlagsAndAttributes = 0 hFile = jwfs.api.CreateFile( fpath, jwfs.api.GENERIC_READ, jwfs.api.FILE_SHARE_READ, None, jwfs.api.OPEN_EXISTING, dwFlagsAndAttributes, None, ) return hFile def get_unique_id(hFile: typing.Any) -> tuple[int, int, int]: info = jwfs.api.BY_HANDLE_FILE_INFORMATION() res = jwfs.api.GetFileInformationByHandle(hFile, info) jwfs.handle_nonzero_success(res) unique_id = ( info.volume_serial_number, info.file_index_high, info.file_index_low, ) return unique_id hFile1 = get_read_handle(fpath1) hFile2 = get_read_handle(fpath2) try: are_equal = get_unique_id(hFile1) == get_unique_id(hFile2) except Exception: raise finally: jwfs.api.CloseHandle(hFile1) jwfs.api.CloseHandle(hFile2) return are_equal
[docs] def _win32_dir( path: str | os.PathLike, star: str = '', ) -> typing.Iterator[tuple[str, str, str | None]]: """ Using the windows cmd shell to get information about a directory """ import re from ubelt import util_cmd wrapper = 'cmd /S /C "{}"' # the /S will preserve all inner quotes command = 'dir /-C "{}"{}'.format(path, star) wrapped = wrapper.format(command) info = util_cmd.cmd(wrapped, shell=True) if info['ret'] != 0: from ubelt import util_repr print('Failed command:') print(info['command']) print(util_repr.urepr(info, nl=1)) raise OSError(str(info)) # parse the output of dir to get some info # Remove header and footer lines = info['out'].split('\n')[5:-3] splitter = re.compile('( +)') for line in lines: parts = splitter.split(line) date, sep, time, sep, ampm, sep, type_or_size, sep = parts[:8] name = ''.join(parts[8:]) # if type is a junction then name will also contain the linked loc if name == '.' or name == '..': continue if type_or_size in ['<JUNCTION>', '<SYMLINKD>', '<SYMLINK>']: # colons cannot be in path names, so use that to find where # the name ends pos = name.find(':') bpos = name[:pos].rfind('[') name = name[: bpos - 1] pointed = name[bpos + 1 : -1] yield type_or_size, name, pointed else: yield type_or_size, name, None