Mise à jour de Monitor.py et autres scripts

This commit is contained in:
Debian
2025-07-23 10:46:27 +02:00
parent 7081418ce0
commit 7de3e0fb50
8604 changed files with 2789953 additions and 295 deletions

View File

@@ -0,0 +1,300 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
# @PydevCodeAnalysisIgnore
__all__ = [
"Actor",
"AmbiguousObjectName",
"BadName",
"BadObject",
"BadObjectType",
"BaseIndexEntry",
"Blob",
"BlobFilter",
"BlockingLockFile",
"CacheError",
"CheckoutError",
"CommandError",
"Commit",
"Diff",
"DiffConstants",
"DiffIndex",
"Diffable",
"FetchInfo",
"Git",
"GitCmdObjectDB",
"GitCommandError",
"GitCommandNotFound",
"GitConfigParser",
"GitDB",
"GitError",
"HEAD",
"Head",
"HookExecutionError",
"INDEX",
"IndexEntry",
"IndexFile",
"IndexObject",
"InvalidDBRoot",
"InvalidGitRepositoryError",
"List", # Deprecated - import this from `typing` instead.
"LockFile",
"NULL_TREE",
"NoSuchPathError",
"ODBError",
"Object",
"Optional", # Deprecated - import this from `typing` instead.
"ParseError",
"PathLike",
"PushInfo",
"RefLog",
"RefLogEntry",
"Reference",
"Remote",
"RemoteProgress",
"RemoteReference",
"Repo",
"RepositoryDirtyError",
"RootModule",
"RootUpdateProgress",
"Sequence", # Deprecated - import from `typing`, or `collections.abc` in 3.9+.
"StageType",
"Stats",
"Submodule",
"SymbolicReference",
"TYPE_CHECKING", # Deprecated - import this from `typing` instead.
"Tag",
"TagObject",
"TagReference",
"Tree",
"TreeModifier",
"Tuple", # Deprecated - import this from `typing` instead.
"Union", # Deprecated - import this from `typing` instead.
"UnmergedEntriesError",
"UnsafeOptionError",
"UnsafeProtocolError",
"UnsupportedOperation",
"UpdateProgress",
"WorkTreeRepositoryUnsupported",
"refresh",
"remove_password_if_present",
"rmtree",
"safe_decode",
"to_hex_sha",
]
__version__ = '3.1.44'
from typing import Any, List, Optional, Sequence, TYPE_CHECKING, Tuple, Union
if TYPE_CHECKING:
from types import ModuleType
import warnings
from gitdb.util import to_hex_sha
from git.exc import (
AmbiguousObjectName,
BadName,
BadObject,
BadObjectType,
CacheError,
CheckoutError,
CommandError,
GitCommandError,
GitCommandNotFound,
GitError,
HookExecutionError,
InvalidDBRoot,
InvalidGitRepositoryError,
NoSuchPathError,
ODBError,
ParseError,
RepositoryDirtyError,
UnmergedEntriesError,
UnsafeOptionError,
UnsafeProtocolError,
UnsupportedOperation,
WorkTreeRepositoryUnsupported,
)
from git.types import PathLike
try:
from git.compat import safe_decode # @NoMove
from git.config import GitConfigParser # @NoMove
from git.objects import ( # @NoMove
Blob,
Commit,
IndexObject,
Object,
RootModule,
RootUpdateProgress,
Submodule,
TagObject,
Tree,
TreeModifier,
UpdateProgress,
)
from git.refs import ( # @NoMove
HEAD,
Head,
RefLog,
RefLogEntry,
Reference,
RemoteReference,
SymbolicReference,
Tag,
TagReference,
)
from git.diff import ( # @NoMove
INDEX,
NULL_TREE,
Diff,
DiffConstants,
DiffIndex,
Diffable,
)
from git.db import GitCmdObjectDB, GitDB # @NoMove
from git.cmd import Git # @NoMove
from git.repo import Repo # @NoMove
from git.remote import FetchInfo, PushInfo, Remote, RemoteProgress # @NoMove
from git.index import ( # @NoMove
BaseIndexEntry,
BlobFilter,
CheckoutError,
IndexEntry,
IndexFile,
StageType,
# NOTE: This tells type checkers what util resolves to. We delete it, and it is
# really resolved by __getattr__, which warns. See below on what to use instead.
util,
)
from git.util import ( # @NoMove
Actor,
BlockingLockFile,
LockFile,
Stats,
remove_password_if_present,
rmtree,
)
except GitError as _exc:
raise ImportError("%s: %s" % (_exc.__class__.__name__, _exc)) from _exc
def _warned_import(message: str, fullname: str) -> "ModuleType":
import importlib
warnings.warn(message, DeprecationWarning, stacklevel=3)
return importlib.import_module(fullname)
def _getattr(name: str) -> Any:
# TODO: If __version__ is made dynamic and lazily fetched, put that case right here.
if name == "util":
return _warned_import(
"The expression `git.util` and the import `from git import util` actually "
"reference git.index.util, and not the git.util module accessed in "
'`from git.util import XYZ` or `sys.modules["git.util"]`. This potentially '
"confusing behavior is currently preserved for compatibility, but may be "
"changed in the future and should not be relied on.",
fullname="git.index.util",
)
for names, prefix in (
({"head", "log", "reference", "symbolic", "tag"}, "git.refs"),
({"base", "fun", "typ"}, "git.index"),
):
if name not in names:
continue
fullname = f"{prefix}.{name}"
return _warned_import(
f"{__name__}.{name} is a private alias of {fullname} and subject to "
f"immediate removal. Use {fullname} instead.",
fullname=fullname,
)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
if not TYPE_CHECKING:
# NOTE: The expression `git.util` gives git.index.util and `from git import util`
# imports git.index.util, NOT git.util. It may not be feasible to change this until
# the next major version, to avoid breaking code inadvertently relying on it.
#
# - If git.index.util *is* what you want, use (or import from) that, to avoid
# confusion.
#
# - To use the "real" git.util module, write `from git.util import ...`, or if
# necessary access it as `sys.modules["git.util"]`.
#
# Note also that `import git.util` technically imports the "real" git.util... but
# the *expression* `git.util` after doing so is still git.index.util!
#
# (This situation differs from that of other indirect-submodule imports that are
# unambiguously non-public and subject to immediate removal. Here, the public
# git.util module, though different, makes less discoverable that the expression
# `git.util` refers to a non-public attribute of the git module.)
#
# This had originally come about by a wildcard import. Now that all intended imports
# are explicit, the intuitive but potentially incompatible binding occurs due to the
# usual rules for Python submodule bindings. So for now we replace that binding with
# git.index.util, delete that, and let __getattr__ handle it and issue a warning.
#
# For the same runtime behavior, it would be enough to forgo importing util, and
# delete util as created naturally; __getattr__ would behave the same. But type
# checkers would not know what util refers to when accessed as an attribute of git.
del util
# This is "hidden" to preserve static checking for undefined/misspelled attributes.
__getattr__ = _getattr
# { Initialize git executable path
GIT_OK = None
def refresh(path: Optional[PathLike] = None) -> None:
"""Convenience method for setting the git executable path.
:param path:
Optional path to the Git executable. If not absolute, it is resolved
immediately, relative to the current directory.
:note:
The `path` parameter is usually omitted and cannot be used to specify a custom
command whose location is looked up in a path search on each call. See
:meth:`Git.refresh <git.cmd.Git.refresh>` for details on how to achieve this.
:note:
This calls :meth:`Git.refresh <git.cmd.Git.refresh>` and sets other global
configuration according to the effect of doing so. As such, this function should
usually be used instead of using :meth:`Git.refresh <git.cmd.Git.refresh>` or
:meth:`FetchInfo.refresh <git.remote.FetchInfo.refresh>` directly.
:note:
This function is called automatically, with no arguments, at import time.
"""
global GIT_OK
GIT_OK = False
if not Git.refresh(path=path):
return
if not FetchInfo.refresh(): # noqa: F405
return # type: ignore[unreachable]
GIT_OK = True
try:
refresh()
except Exception as _exc:
raise ImportError("Failed to initialize: {0}".format(_exc)) from _exc
# } END initialize git executable path

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,165 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Utilities to help provide compatibility with Python 3.
This module exists for historical reasons. Code outside GitPython may make use of public
members of this module, but is unlikely to benefit from doing so. GitPython continues to
use some of these utilities, in some cases for compatibility across different platforms.
"""
import locale
import os
import sys
import warnings
from gitdb.utils.encoding import force_bytes, force_text # noqa: F401
# typing --------------------------------------------------------------------
from typing import (
Any, # noqa: F401
AnyStr,
Dict, # noqa: F401
IO, # noqa: F401
List,
Optional,
TYPE_CHECKING,
Tuple, # noqa: F401
Type, # noqa: F401
Union,
overload,
)
# ---------------------------------------------------------------------------
_deprecated_platform_aliases = {
"is_win": os.name == "nt",
"is_posix": os.name == "posix",
"is_darwin": sys.platform == "darwin",
}
def _getattr(name: str) -> Any:
try:
value = _deprecated_platform_aliases[name]
except KeyError:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from None
warnings.warn(
f"{__name__}.{name} and other is_<platform> aliases are deprecated. "
"Write the desired os.name or sys.platform check explicitly instead.",
DeprecationWarning,
stacklevel=2,
)
return value
if not TYPE_CHECKING: # Preserve static checking for undefined/misspelled attributes.
__getattr__ = _getattr
def __dir__() -> List[str]:
return [*globals(), *_deprecated_platform_aliases]
is_win: bool
"""Deprecated alias for ``os.name == "nt"`` to check for native Windows.
This is deprecated because it is clearer to write out :attr:`os.name` or
:attr:`sys.platform` checks explicitly, especially in cases where it matters which is
used.
:note:
``is_win`` is ``False`` on Cygwin, but is often wrongly assumed ``True``. To detect
Cygwin, use ``sys.platform == "cygwin"``.
"""
is_posix: bool
"""Deprecated alias for ``os.name == "posix"`` to check for Unix-like ("POSIX") systems.
This is deprecated because it clearer to write out :attr:`os.name` or
:attr:`sys.platform` checks explicitly, especially in cases where it matters which is
used.
:note:
For POSIX systems, more detailed information is available in :attr:`sys.platform`,
while :attr:`os.name` is always ``"posix"`` on such systems, including macOS
(Darwin).
"""
is_darwin: bool
"""Deprecated alias for ``sys.platform == "darwin"`` to check for macOS (Darwin).
This is deprecated because it clearer to write out :attr:`os.name` or
:attr:`sys.platform` checks explicitly.
:note:
For macOS (Darwin), ``os.name == "posix"`` as in other Unix-like systems, while
``sys.platform == "darwin"``.
"""
defenc = sys.getfilesystemencoding()
"""The encoding used to convert between Unicode and bytes filenames."""
@overload
def safe_decode(s: None) -> None: ...
@overload
def safe_decode(s: AnyStr) -> str: ...
def safe_decode(s: Union[AnyStr, None]) -> Optional[str]:
"""Safely decode a binary string to Unicode."""
if isinstance(s, str):
return s
elif isinstance(s, bytes):
return s.decode(defenc, "surrogateescape")
elif s is None:
return None
else:
raise TypeError("Expected bytes or text, but got %r" % (s,))
@overload
def safe_encode(s: None) -> None: ...
@overload
def safe_encode(s: AnyStr) -> bytes: ...
def safe_encode(s: Optional[AnyStr]) -> Optional[bytes]:
"""Safely encode a binary string to Unicode."""
if isinstance(s, str):
return s.encode(defenc)
elif isinstance(s, bytes):
return s
elif s is None:
return None
else:
raise TypeError("Expected bytes or text, but got %r" % (s,))
@overload
def win_encode(s: None) -> None: ...
@overload
def win_encode(s: AnyStr) -> bytes: ...
def win_encode(s: Optional[AnyStr]) -> Optional[bytes]:
"""Encode Unicode strings for process arguments on Windows."""
if isinstance(s, str):
return s.encode(locale.getpreferredencoding(False))
elif isinstance(s, bytes):
return s
elif s is not None:
raise TypeError("Expected bytes or text, but got %r" % (s,))
return None

View File

@@ -0,0 +1,944 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Parser for reading and writing configuration files."""
__all__ = ["GitConfigParser", "SectionConstraint"]
import abc
import configparser as cp
import fnmatch
from functools import wraps
import inspect
from io import BufferedReader, IOBase
import logging
import os
import os.path as osp
import re
import sys
from git.compat import defenc, force_text
from git.util import LockFile
# typing-------------------------------------------------------
from typing import (
Any,
Callable,
Generic,
IO,
List,
Dict,
Sequence,
TYPE_CHECKING,
Tuple,
TypeVar,
Union,
cast,
)
from git.types import Lit_config_levels, ConfigLevels_Tup, PathLike, assert_never, _T
if TYPE_CHECKING:
from io import BytesIO
from git.repo.base import Repo
T_ConfigParser = TypeVar("T_ConfigParser", bound="GitConfigParser")
T_OMD_value = TypeVar("T_OMD_value", str, bytes, int, float, bool)
if sys.version_info[:3] < (3, 7, 2):
# typing.Ordereddict not added until Python 3.7.2.
from collections import OrderedDict
OrderedDict_OMD = OrderedDict
else:
from typing import OrderedDict
OrderedDict_OMD = OrderedDict[str, List[T_OMD_value]] # type: ignore[assignment, misc]
# -------------------------------------------------------------
_logger = logging.getLogger(__name__)
CONFIG_LEVELS: ConfigLevels_Tup = ("system", "user", "global", "repository")
"""The configuration level of a configuration file."""
CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch):(.+)\"")
"""Section pattern to detect conditional includes.
See: https://git-scm.com/docs/git-config#_conditional_includes
"""
class MetaParserBuilder(abc.ABCMeta): # noqa: B024
"""Utility class wrapping base-class methods into decorators that assure read-only
properties."""
def __new__(cls, name: str, bases: Tuple, clsdict: Dict[str, Any]) -> "MetaParserBuilder":
"""Equip all base-class methods with a needs_values decorator, and all non-const
methods with a :func:`set_dirty_and_flush_changes` decorator in addition to
that.
"""
kmm = "_mutating_methods_"
if kmm in clsdict:
mutating_methods = clsdict[kmm]
for base in bases:
methods = (t for t in inspect.getmembers(base, inspect.isroutine) if not t[0].startswith("_"))
for name, method in methods:
if name in clsdict:
continue
method_with_values = needs_values(method)
if name in mutating_methods:
method_with_values = set_dirty_and_flush_changes(method_with_values)
# END mutating methods handling
clsdict[name] = method_with_values
# END for each name/method pair
# END for each base
# END if mutating methods configuration is set
new_type = super().__new__(cls, name, bases, clsdict)
return new_type
def needs_values(func: Callable[..., _T]) -> Callable[..., _T]:
"""Return a method for ensuring we read values (on demand) before we try to access
them."""
@wraps(func)
def assure_data_present(self: "GitConfigParser", *args: Any, **kwargs: Any) -> _T:
self.read()
return func(self, *args, **kwargs)
# END wrapper method
return assure_data_present
def set_dirty_and_flush_changes(non_const_func: Callable[..., _T]) -> Callable[..., _T]:
"""Return a method that checks whether given non constant function may be called.
If so, the instance will be set dirty. Additionally, we flush the changes right to
disk.
"""
def flush_changes(self: "GitConfigParser", *args: Any, **kwargs: Any) -> _T:
rval = non_const_func(self, *args, **kwargs)
self._dirty = True
self.write()
return rval
# END wrapper method
flush_changes.__name__ = non_const_func.__name__
return flush_changes
class SectionConstraint(Generic[T_ConfigParser]):
"""Constrains a ConfigParser to only option commands which are constrained to
always use the section we have been initialized with.
It supports all ConfigParser methods that operate on an option.
:note:
If used as a context manager, will release the wrapped ConfigParser.
"""
__slots__ = ("_config", "_section_name")
_valid_attrs_ = (
"get_value",
"set_value",
"get",
"set",
"getint",
"getfloat",
"getboolean",
"has_option",
"remove_section",
"remove_option",
"options",
)
def __init__(self, config: T_ConfigParser, section: str) -> None:
self._config = config
self._section_name = section
def __del__(self) -> None:
# Yes, for some reason, we have to call it explicitly for it to work in PY3 !
# Apparently __del__ doesn't get call anymore if refcount becomes 0
# Ridiculous ... .
self._config.release()
def __getattr__(self, attr: str) -> Any:
if attr in self._valid_attrs_:
return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs)
return super().__getattribute__(attr)
def _call_config(self, method: str, *args: Any, **kwargs: Any) -> Any:
"""Call the configuration at the given method which must take a section name as
first argument."""
return getattr(self._config, method)(self._section_name, *args, **kwargs)
@property
def config(self) -> T_ConfigParser:
"""return: ConfigParser instance we constrain"""
return self._config
def release(self) -> None:
"""Equivalent to :meth:`GitConfigParser.release`, which is called on our
underlying parser instance."""
return self._config.release()
def __enter__(self) -> "SectionConstraint[T_ConfigParser]":
self._config.__enter__()
return self
def __exit__(self, exception_type: str, exception_value: str, traceback: str) -> None:
self._config.__exit__(exception_type, exception_value, traceback)
class _OMD(OrderedDict_OMD):
"""Ordered multi-dict."""
def __setitem__(self, key: str, value: _T) -> None:
super().__setitem__(key, [value])
def add(self, key: str, value: Any) -> None:
if key not in self:
super().__setitem__(key, [value])
return
super().__getitem__(key).append(value)
def setall(self, key: str, values: List[_T]) -> None:
super().__setitem__(key, values)
def __getitem__(self, key: str) -> Any:
return super().__getitem__(key)[-1]
def getlast(self, key: str) -> Any:
return super().__getitem__(key)[-1]
def setlast(self, key: str, value: Any) -> None:
if key not in self:
super().__setitem__(key, [value])
return
prior = super().__getitem__(key)
prior[-1] = value
def get(self, key: str, default: Union[_T, None] = None) -> Union[_T, None]:
return super().get(key, [default])[-1]
def getall(self, key: str) -> List[_T]:
return super().__getitem__(key)
def items(self) -> List[Tuple[str, _T]]: # type: ignore[override]
"""List of (key, last value for key)."""
return [(k, self[k]) for k in self]
def items_all(self) -> List[Tuple[str, List[_T]]]:
"""List of (key, list of values for key)."""
return [(k, self.getall(k)) for k in self]
def get_config_path(config_level: Lit_config_levels) -> str:
# We do not support an absolute path of the gitconfig on Windows.
# Use the global config instead.
if sys.platform == "win32" and config_level == "system":
config_level = "global"
if config_level == "system":
return "/etc/gitconfig"
elif config_level == "user":
config_home = os.environ.get("XDG_CONFIG_HOME") or osp.join(os.environ.get("HOME", "~"), ".config")
return osp.normpath(osp.expanduser(osp.join(config_home, "git", "config")))
elif config_level == "global":
return osp.normpath(osp.expanduser("~/.gitconfig"))
elif config_level == "repository":
raise ValueError("No repo to get repository configuration from. Use Repo._get_config_path")
else:
# Should not reach here. Will raise ValueError if does. Static typing will warn
# about missing elifs.
assert_never( # type: ignore[unreachable]
config_level,
ValueError(f"Invalid configuration level: {config_level!r}"),
)
class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder):
"""Implements specifics required to read git style configuration files.
This variation behaves much like the :manpage:`git-config(1)` command, such that the
configuration will be read on demand based on the filepath given during
initialization.
The changes will automatically be written once the instance goes out of scope, but
can be triggered manually as well.
The configuration file will be locked if you intend to change values preventing
other instances to write concurrently.
:note:
The config is case-sensitive even when queried, hence section and option names
must match perfectly.
:note:
If used as a context manager, this will release the locked file.
"""
# { Configuration
t_lock = LockFile
"""The lock type determines the type of lock to use in new configuration readers.
They must be compatible to the :class:`~git.util.LockFile` interface.
A suitable alternative would be the :class:`~git.util.BlockingLockFile`.
"""
re_comment = re.compile(r"^\s*[#;]")
# } END configuration
optvalueonly_source = r"\s*(?P<option>[^:=\s][^:=]*)"
OPTVALUEONLY = re.compile(optvalueonly_source)
OPTCRE = re.compile(optvalueonly_source + r"\s*(?P<vi>[:=])\s*" + r"(?P<value>.*)$")
del optvalueonly_source
_mutating_methods_ = ("add_section", "remove_section", "remove_option", "set")
"""Names of :class:`~configparser.RawConfigParser` methods able to change the
instance."""
def __init__(
self,
file_or_files: Union[None, PathLike, "BytesIO", Sequence[Union[PathLike, "BytesIO"]]] = None,
read_only: bool = True,
merge_includes: bool = True,
config_level: Union[Lit_config_levels, None] = None,
repo: Union["Repo", None] = None,
) -> None:
"""Initialize a configuration reader to read the given `file_or_files` and to
possibly allow changes to it by setting `read_only` False.
:param file_or_files:
A file path or file object, or a sequence of possibly more than one of them.
:param read_only:
If ``True``, the ConfigParser may only read the data, but not change it.
If ``False``, only a single file path or file object may be given. We will
write back the changes when they happen, or when the ConfigParser is
released. This will not happen if other configuration files have been
included.
:param merge_includes:
If ``True``, we will read files mentioned in ``[include]`` sections and
merge their contents into ours. This makes it impossible to write back an
individual configuration file. Thus, if you want to modify a single
configuration file, turn this off to leave the original dataset unaltered
when reading it.
:param repo:
Reference to repository to use if ``[includeIf]`` sections are found in
configuration files.
"""
cp.RawConfigParser.__init__(self, dict_type=_OMD)
self._dict: Callable[..., _OMD]
self._defaults: _OMD
self._sections: _OMD
# Used in Python 3. Needs to stay in sync with sections for underlying
# implementation to work.
if not hasattr(self, "_proxies"):
self._proxies = self._dict()
if file_or_files is not None:
self._file_or_files: Union[PathLike, "BytesIO", Sequence[Union[PathLike, "BytesIO"]]] = file_or_files
else:
if config_level is None:
if read_only:
self._file_or_files = [
get_config_path(cast(Lit_config_levels, f)) for f in CONFIG_LEVELS if f != "repository"
]
else:
raise ValueError("No configuration level or configuration files specified")
else:
self._file_or_files = [get_config_path(config_level)]
self._read_only = read_only
self._dirty = False
self._is_initialized = False
self._merge_includes = merge_includes
self._repo = repo
self._lock: Union["LockFile", None] = None
self._acquire_lock()
def _acquire_lock(self) -> None:
if not self._read_only:
if not self._lock:
if isinstance(self._file_or_files, (str, os.PathLike)):
file_or_files = self._file_or_files
elif isinstance(self._file_or_files, (tuple, list, Sequence)):
raise ValueError(
"Write-ConfigParsers can operate on a single file only, multiple files have been passed"
)
else:
file_or_files = self._file_or_files.name
# END get filename from handle/stream
# Initialize lock base - we want to write.
self._lock = self.t_lock(file_or_files)
# END lock check
self._lock._obtain_lock()
# END read-only check
def __del__(self) -> None:
"""Write pending changes if required and release locks."""
# NOTE: Only consistent in Python 2.
self.release()
def __enter__(self) -> "GitConfigParser":
self._acquire_lock()
return self
def __exit__(self, *args: Any) -> None:
self.release()
def release(self) -> None:
"""Flush changes and release the configuration write lock. This instance must
not be used anymore afterwards.
In Python 3, it's required to explicitly release locks and flush changes, as
``__del__`` is not called deterministically anymore.
"""
# Checking for the lock here makes sure we do not raise during write()
# in case an invalid parser was created who could not get a lock.
if self.read_only or (self._lock and not self._lock._has_lock()):
return
try:
self.write()
except IOError:
_logger.error("Exception during destruction of GitConfigParser", exc_info=True)
except ReferenceError:
# This happens in Python 3... and usually means that some state cannot be
# written as the sections dict cannot be iterated. This usually happens when
# the interpreter is shutting down. Can it be fixed?
pass
finally:
if self._lock is not None:
self._lock._release_lock()
def optionxform(self, optionstr: str) -> str:
"""Do not transform options in any way when writing."""
return optionstr
def _read(self, fp: Union[BufferedReader, IO[bytes]], fpname: str) -> None:
"""Originally a direct copy of the Python 2.4 version of
:meth:`RawConfigParser._read <configparser.RawConfigParser._read>`, to ensure it
uses ordered dicts.
The ordering bug was fixed in Python 2.4, and dict itself keeps ordering since
Python 3.7. This has some other changes, especially that it ignores initial
whitespace, since git uses tabs. (Big comments are removed to be more compact.)
"""
cursect = None # None, or a dictionary.
optname = None
lineno = 0
is_multi_line = False
e = None # None, or an exception.
def string_decode(v: str) -> str:
if v and v.endswith("\\"):
v = v[:-1]
# END cut trailing escapes to prevent decode error
return v.encode(defenc).decode("unicode_escape")
# END string_decode
while True:
# We assume to read binary!
line = fp.readline().decode(defenc)
if not line:
break
lineno = lineno + 1
# Comment or blank line?
if line.strip() == "" or self.re_comment.match(line):
continue
if line.split(None, 1)[0].lower() == "rem" and line[0] in "rR":
# No leading whitespace.
continue
# Is it a section header?
mo = self.SECTCRE.match(line.strip())
if not is_multi_line and mo:
sectname: str = mo.group("header").strip()
if sectname in self._sections:
cursect = self._sections[sectname]
elif sectname == cp.DEFAULTSECT:
cursect = self._defaults
else:
cursect = self._dict((("__name__", sectname),))
self._sections[sectname] = cursect
self._proxies[sectname] = None
# So sections can't start with a continuation line.
optname = None
# No section header in the file?
elif cursect is None:
raise cp.MissingSectionHeaderError(fpname, lineno, line)
# An option line?
elif not is_multi_line:
mo = self.OPTCRE.match(line)
if mo:
# We might just have handled the last line, which could contain a quotation we want to remove.
optname, vi, optval = mo.group("option", "vi", "value")
if vi in ("=", ":") and ";" in optval and not optval.strip().startswith('"'):
pos = optval.find(";")
if pos != -1 and optval[pos - 1].isspace():
optval = optval[:pos]
optval = optval.strip()
if optval == '""':
optval = ""
# END handle empty string
optname = self.optionxform(optname.rstrip())
if len(optval) > 1 and optval[0] == '"' and optval[-1] != '"':
is_multi_line = True
optval = string_decode(optval[1:])
# END handle multi-line
# Preserves multiple values for duplicate optnames.
cursect.add(optname, optval)
else:
# Check if it's an option with no value - it's just ignored by git.
if not self.OPTVALUEONLY.match(line):
if not e:
e = cp.ParsingError(fpname)
e.append(lineno, repr(line))
continue
else:
line = line.rstrip()
if line.endswith('"'):
is_multi_line = False
line = line[:-1]
# END handle quotations
optval = cursect.getlast(optname)
cursect.setlast(optname, optval + string_decode(line))
# END parse section or option
# END while reading
# If any parsing errors occurred, raise an exception.
if e:
raise e
def _has_includes(self) -> Union[bool, int]:
return self._merge_includes and len(self._included_paths())
def _included_paths(self) -> List[Tuple[str, str]]:
"""List all paths that must be included to configuration.
:return:
The list of paths, where each path is a tuple of (option, value).
"""
paths = []
for section in self.sections():
if section == "include":
paths += self.items(section)
match = CONDITIONAL_INCLUDE_REGEXP.search(section)
if match is None or self._repo is None:
continue
keyword = match.group(1)
value = match.group(2).strip()
if keyword in ["gitdir", "gitdir/i"]:
value = osp.expanduser(value)
if not any(value.startswith(s) for s in ["./", "/"]):
value = "**/" + value
if value.endswith("/"):
value += "**"
# Ensure that glob is always case insensitive if required.
if keyword.endswith("/i"):
value = re.sub(
r"[a-zA-Z]",
lambda m: "[{}{}]".format(m.group().lower(), m.group().upper()),
value,
)
if self._repo.git_dir:
if fnmatch.fnmatchcase(str(self._repo.git_dir), value):
paths += self.items(section)
elif keyword == "onbranch":
try:
branch_name = self._repo.active_branch.name
except TypeError:
# Ignore section if active branch cannot be retrieved.
continue
if fnmatch.fnmatchcase(branch_name, value):
paths += self.items(section)
return paths
def read(self) -> None: # type: ignore[override]
"""Read the data stored in the files we have been initialized with.
This will ignore files that cannot be read, possibly leaving an empty
configuration.
:raise IOError:
If a file cannot be handled.
"""
if self._is_initialized:
return
self._is_initialized = True
files_to_read: List[Union[PathLike, IO]] = [""]
if isinstance(self._file_or_files, (str, os.PathLike)):
# For str or Path, as str is a type of Sequence.
files_to_read = [self._file_or_files]
elif not isinstance(self._file_or_files, (tuple, list, Sequence)):
# Could merge with above isinstance once runtime type known.
files_to_read = [self._file_or_files]
else: # For lists or tuples.
files_to_read = list(self._file_or_files)
# END ensure we have a copy of the paths to handle
seen = set(files_to_read)
num_read_include_files = 0
while files_to_read:
file_path = files_to_read.pop(0)
file_ok = False
if hasattr(file_path, "seek"):
# Must be a file-object.
# TODO: Replace cast with assert to narrow type, once sure.
file_path = cast(IO[bytes], file_path)
self._read(file_path, file_path.name)
else:
# Assume a path if it is not a file-object.
file_path = cast(PathLike, file_path)
try:
with open(file_path, "rb") as fp:
file_ok = True
self._read(fp, fp.name)
except IOError:
continue
# Read includes and append those that we didn't handle yet. We expect all
# paths to be normalized and absolute (and will ensure that is the case).
if self._has_includes():
for _, include_path in self._included_paths():
if include_path.startswith("~"):
include_path = osp.expanduser(include_path)
if not osp.isabs(include_path):
if not file_ok:
continue
# END ignore relative paths if we don't know the configuration file path
file_path = cast(PathLike, file_path)
assert osp.isabs(file_path), "Need absolute paths to be sure our cycle checks will work"
include_path = osp.join(osp.dirname(file_path), include_path)
# END make include path absolute
include_path = osp.normpath(include_path)
if include_path in seen or not os.access(include_path, os.R_OK):
continue
seen.add(include_path)
# Insert included file to the top to be considered first.
files_to_read.insert(0, include_path)
num_read_include_files += 1
# END each include path in configuration file
# END handle includes
# END for each file object to read
# If there was no file included, we can safely write back (potentially) the
# configuration file without altering its meaning.
if num_read_include_files == 0:
self._merge_includes = False
def _write(self, fp: IO) -> None:
"""Write an .ini-format representation of the configuration state in
git compatible format."""
def write_section(name: str, section_dict: _OMD) -> None:
fp.write(("[%s]\n" % name).encode(defenc))
values: Sequence[str] # Runtime only gets str in tests, but should be whatever _OMD stores.
v: str
for key, values in section_dict.items_all():
if key == "__name__":
continue
for v in values:
fp.write(("\t%s = %s\n" % (key, self._value_to_string(v).replace("\n", "\n\t"))).encode(defenc))
# END if key is not __name__
# END section writing
if self._defaults:
write_section(cp.DEFAULTSECT, self._defaults)
value: _OMD
for name, value in self._sections.items():
write_section(name, value)
def items(self, section_name: str) -> List[Tuple[str, str]]: # type: ignore[override]
""":return: list((option, value), ...) pairs of all items in the given section"""
return [(k, v) for k, v in super().items(section_name) if k != "__name__"]
def items_all(self, section_name: str) -> List[Tuple[str, List[str]]]:
""":return: list((option, [values...]), ...) pairs of all items in the given section"""
rv = _OMD(self._defaults)
for k, vs in self._sections[section_name].items_all():
if k == "__name__":
continue
if k in rv and rv.getall(k) == vs:
continue
for v in vs:
rv.add(k, v)
return rv.items_all()
@needs_values
def write(self) -> None:
"""Write changes to our file, if there are changes at all.
:raise IOError:
If this is a read-only writer instance or if we could not obtain a file
lock.
"""
self._assure_writable("write")
if not self._dirty:
return
if isinstance(self._file_or_files, (list, tuple)):
raise AssertionError(
"Cannot write back if there is not exactly a single file to write to, have %i files"
% len(self._file_or_files)
)
# END assert multiple files
if self._has_includes():
_logger.debug(
"Skipping write-back of configuration file as include files were merged in."
+ "Set merge_includes=False to prevent this."
)
return
# END stop if we have include files
fp = self._file_or_files
# We have a physical file on disk, so get a lock.
is_file_lock = isinstance(fp, (str, os.PathLike, IOBase)) # TODO: Use PathLike (having dropped 3.5).
if is_file_lock and self._lock is not None: # Else raise error?
self._lock._obtain_lock()
if not hasattr(fp, "seek"):
fp = cast(PathLike, fp)
with open(fp, "wb") as fp_open:
self._write(fp_open)
else:
fp = cast("BytesIO", fp)
fp.seek(0)
# Make sure we do not overwrite into an existing file.
if hasattr(fp, "truncate"):
fp.truncate()
self._write(fp)
def _assure_writable(self, method_name: str) -> None:
if self.read_only:
raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name))
def add_section(self, section: str) -> None:
"""Assures added options will stay in order."""
return super().add_section(section)
@property
def read_only(self) -> bool:
""":return: ``True`` if this instance may change the configuration file"""
return self._read_only
# FIXME: Figure out if default or return type can really include bool.
def get_value(
self,
section: str,
option: str,
default: Union[int, float, str, bool, None] = None,
) -> Union[int, float, str, bool]:
"""Get an option's value.
If multiple values are specified for this option in the section, the last one
specified is returned.
:param default:
If not ``None``, the given default value will be returned in case the option
did not exist.
:return:
A properly typed value, either int, float or string
:raise TypeError:
In case the value could not be understood.
Otherwise the exceptions known to the ConfigParser will be raised.
"""
try:
valuestr = self.get(section, option)
except Exception:
if default is not None:
return default
raise
return self._string_to_value(valuestr)
def get_values(
self,
section: str,
option: str,
default: Union[int, float, str, bool, None] = None,
) -> List[Union[int, float, str, bool]]:
"""Get an option's values.
If multiple values are specified for this option in the section, all are
returned.
:param default:
If not ``None``, a list containing the given default value will be returned
in case the option did not exist.
:return:
A list of properly typed values, either int, float or string
:raise TypeError:
In case the value could not be understood.
Otherwise the exceptions known to the ConfigParser will be raised.
"""
try:
self.sections()
lst = self._sections[section].getall(option)
except Exception:
if default is not None:
return [default]
raise
return [self._string_to_value(valuestr) for valuestr in lst]
def _string_to_value(self, valuestr: str) -> Union[int, float, str, bool]:
types = (int, float)
for numtype in types:
try:
val = numtype(valuestr)
# truncated value ?
if val != float(valuestr):
continue
return val
except (ValueError, TypeError):
continue
# END for each numeric type
# Try boolean values as git uses them.
vl = valuestr.lower()
if vl == "false":
return False
if vl == "true":
return True
if not isinstance(valuestr, str):
raise TypeError(
"Invalid value type: only int, long, float and str are allowed",
valuestr,
)
return valuestr
def _value_to_string(self, value: Union[str, bytes, int, float, bool]) -> str:
if isinstance(value, (int, float, bool)):
return str(value)
return force_text(value)
@needs_values
@set_dirty_and_flush_changes
def set_value(self, section: str, option: str, value: Union[str, bytes, int, float, bool]) -> "GitConfigParser":
"""Set the given option in section to the given value.
This will create the section if required, and will not throw as opposed to the
default ConfigParser ``set`` method.
:param section:
Name of the section in which the option resides or should reside.
:param option:
Name of the options whose value to set.
:param value:
Value to set the option to. It must be a string or convertible to a string.
:return:
This instance
"""
if not self.has_section(section):
self.add_section(section)
self.set(section, option, self._value_to_string(value))
return self
@needs_values
@set_dirty_and_flush_changes
def add_value(self, section: str, option: str, value: Union[str, bytes, int, float, bool]) -> "GitConfigParser":
"""Add a value for the given option in section.
This will create the section if required, and will not throw as opposed to the
default ConfigParser ``set`` method. The value becomes the new value of the
option as returned by :meth:`get_value`, and appends to the list of values
returned by :meth:`get_values`.
:param section:
Name of the section in which the option resides or should reside.
:param option:
Name of the option.
:param value:
Value to add to option. It must be a string or convertible to a string.
:return:
This instance
"""
if not self.has_section(section):
self.add_section(section)
self._sections[section].add(option, self._value_to_string(value))
return self
def rename_section(self, section: str, new_name: str) -> "GitConfigParser":
"""Rename the given section to `new_name`.
:raise ValueError:
If:
* `section` doesn't exist.
* A section with `new_name` does already exist.
:return:
This instance
"""
if not self.has_section(section):
raise ValueError("Source section '%s' doesn't exist" % section)
if self.has_section(new_name):
raise ValueError("Destination section '%s' already exists" % new_name)
super().add_section(new_name)
new_section = self._sections[new_name]
for k, vs in self.items_all(section):
new_section.setall(k, vs)
# END for each value to copy
# This call writes back the changes, which is why we don't have the respective
# decorator.
self.remove_section(section)
return self

View File

@@ -0,0 +1,71 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Module with our own gitdb implementation - it uses the git command."""
__all__ = ["GitCmdObjectDB", "GitDB"]
from gitdb.base import OInfo, OStream
from gitdb.db import GitDB, LooseObjectDB
from gitdb.exc import BadObject
from git.util import bin_to_hex, hex_to_bin
from git.exc import GitCommandError
# typing-------------------------------------------------
from typing import TYPE_CHECKING
from git.types import PathLike
if TYPE_CHECKING:
from git.cmd import Git
# --------------------------------------------------------
class GitCmdObjectDB(LooseObjectDB):
"""A database representing the default git object store, which includes loose
objects, pack files and an alternates file.
It will create objects only in the loose object database.
"""
def __init__(self, root_path: PathLike, git: "Git") -> None:
"""Initialize this instance with the root and a git command."""
super().__init__(root_path)
self._git = git
def info(self, binsha: bytes) -> OInfo:
"""Get a git object header (using git itself)."""
hexsha, typename, size = self._git.get_object_header(bin_to_hex(binsha))
return OInfo(hex_to_bin(hexsha), typename, size)
def stream(self, binsha: bytes) -> OStream:
"""Get git object data as a stream supporting ``read()`` (using git itself)."""
hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(binsha))
return OStream(hex_to_bin(hexsha), typename, size, stream)
# { Interface
def partial_to_complete_sha_hex(self, partial_hexsha: str) -> bytes:
"""
:return:
Full binary 20 byte sha from the given partial hexsha
:raise gitdb.exc.AmbiguousObjectName:
:raise gitdb.exc.BadObject:
:note:
Currently we only raise :exc:`~gitdb.exc.BadObject` as git does not
communicate ambiguous objects separately.
"""
try:
hexsha, _typename, _size = self._git.get_object_header(partial_hexsha)
return hex_to_bin(hexsha)
except (GitCommandError, ValueError) as e:
raise BadObject(partial_hexsha) from e
# END handle exceptions
# } END interface

View File

@@ -0,0 +1,775 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["DiffConstants", "NULL_TREE", "INDEX", "Diffable", "DiffIndex", "Diff"]
import enum
import re
import warnings
from git.cmd import handle_process_output
from git.compat import defenc
from git.objects.blob import Blob
from git.objects.util import mode_str_to_int
from git.util import finalize_process, hex_to_bin
# typing ------------------------------------------------------------------
from typing import (
Any,
Iterator,
List,
Match,
Optional,
Tuple,
TYPE_CHECKING,
TypeVar,
Union,
cast,
)
from git.types import Literal, PathLike
if TYPE_CHECKING:
from subprocess import Popen
from git.cmd import Git
from git.objects.base import IndexObject
from git.objects.commit import Commit
from git.objects.tree import Tree
from git.repo.base import Repo
Lit_change_type = Literal["A", "D", "C", "M", "R", "T", "U"]
# ------------------------------------------------------------------------
@enum.unique
class DiffConstants(enum.Enum):
"""Special objects for :meth:`Diffable.diff`.
See the :meth:`Diffable.diff` method's ``other`` parameter, which accepts various
values including these.
:note:
These constants are also available as attributes of the :mod:`git.diff` module,
the :class:`Diffable` class and its subclasses and instances, and the top-level
:mod:`git` module.
"""
NULL_TREE = enum.auto()
"""Stand-in indicating you want to compare against the empty tree in diffs.
Also accessible as :const:`git.NULL_TREE`, :const:`git.diff.NULL_TREE`, and
:const:`Diffable.NULL_TREE`.
"""
INDEX = enum.auto()
"""Stand-in indicating you want to diff against the index.
Also accessible as :const:`git.INDEX`, :const:`git.diff.INDEX`, and
:const:`Diffable.INDEX`, as well as :const:`Diffable.Index`. The latter has been
kept for backward compatibility and made an alias of this, so it may still be used.
"""
NULL_TREE: Literal[DiffConstants.NULL_TREE] = DiffConstants.NULL_TREE
"""Stand-in indicating you want to compare against the empty tree in diffs.
See :meth:`Diffable.diff`, which accepts this as a value of its ``other`` parameter.
This is an alias of :const:`DiffConstants.NULL_TREE`, which may also be accessed as
:const:`git.NULL_TREE` and :const:`Diffable.NULL_TREE`.
"""
INDEX: Literal[DiffConstants.INDEX] = DiffConstants.INDEX
"""Stand-in indicating you want to diff against the index.
See :meth:`Diffable.diff`, which accepts this as a value of its ``other`` parameter.
This is an alias of :const:`DiffConstants.INDEX`, which may also be accessed as
:const:`git.INDEX` and :const:`Diffable.INDEX`, as well as :const:`Diffable.Index`.
"""
_octal_byte_re = re.compile(rb"\\([0-9]{3})")
def _octal_repl(matchobj: Match) -> bytes:
value = matchobj.group(1)
value = int(value, 8)
value = bytes(bytearray((value,)))
return value
def decode_path(path: bytes, has_ab_prefix: bool = True) -> Optional[bytes]:
if path == b"/dev/null":
return None
if path.startswith(b'"') and path.endswith(b'"'):
path = path[1:-1].replace(b"\\n", b"\n").replace(b"\\t", b"\t").replace(b'\\"', b'"').replace(b"\\\\", b"\\")
path = _octal_byte_re.sub(_octal_repl, path)
if has_ab_prefix:
assert path.startswith(b"a/") or path.startswith(b"b/")
path = path[2:]
return path
class Diffable:
"""Common interface for all objects that can be diffed against another object of
compatible type.
:note:
Subclasses require a :attr:`repo` member, as it is the case for
:class:`~git.objects.base.Object` instances. For practical reasons we do not
derive from :class:`~git.objects.base.Object`.
"""
__slots__ = ()
repo: "Repo"
"""Repository to operate on. Must be provided by subclass or sibling class."""
NULL_TREE = NULL_TREE
"""Stand-in indicating you want to compare against the empty tree in diffs.
See the :meth:`diff` method, which accepts this as a value of its ``other``
parameter.
This is the same as :const:`DiffConstants.NULL_TREE`, and may also be accessed as
:const:`git.NULL_TREE` and :const:`git.diff.NULL_TREE`.
"""
INDEX = INDEX
"""Stand-in indicating you want to diff against the index.
See the :meth:`diff` method, which accepts this as a value of its ``other``
parameter.
This is the same as :const:`DiffConstants.INDEX`, and may also be accessed as
:const:`git.INDEX` and :const:`git.diff.INDEX`, as well as :class:`Diffable.INDEX`,
which is kept for backward compatibility (it is now defined an alias of this).
"""
Index = INDEX
"""Stand-in indicating you want to diff against the index
(same as :const:`~Diffable.INDEX`).
This is an alias of :const:`~Diffable.INDEX`, for backward compatibility. See
:const:`~Diffable.INDEX` and :meth:`diff` for details.
:note:
Although always meant for use as an opaque constant, this was formerly defined
as a class. Its usage is unchanged, but static type annotations that attempt
to permit only this object must be changed to avoid new mypy errors. This was
previously not possible to do, though ``Type[Diffable.Index]`` approximated it.
It is now possible to do precisely, using ``Literal[DiffConstants.INDEX]``.
"""
def _process_diff_args(
self,
args: List[Union[PathLike, "Diffable"]],
) -> List[Union[PathLike, "Diffable"]]:
"""
:return:
Possibly altered version of the given args list.
This method is called right before git command execution.
Subclasses can use it to alter the behaviour of the superclass.
"""
return args
def diff(
self,
other: Union[DiffConstants, "Tree", "Commit", str, None] = INDEX,
paths: Union[PathLike, List[PathLike], Tuple[PathLike, ...], None] = None,
create_patch: bool = False,
**kwargs: Any,
) -> "DiffIndex[Diff]":
"""Create diffs between two items being trees, trees and index or an index and
the working tree. Detects renames automatically.
:param other:
This the item to compare us with.
* If ``None``, we will be compared to the working tree.
* If a :class:`~git.types.Tree_ish` or string, it will be compared against
the respective tree.
* If :const:`INDEX`, it will be compared against the index.
* If :const:`NULL_TREE`, it will compare against the empty tree.
This parameter defaults to :const:`INDEX` (rather than ``None``) so that the
method will not by default fail on bare repositories.
:param paths:
This a list of paths or a single path to limit the diff to. It will only
include at least one of the given path or paths.
:param create_patch:
If ``True``, the returned :class:`Diff` contains a detailed patch that if
applied makes the self to other. Patches are somewhat costly as blobs have
to be read and diffed.
:param kwargs:
Additional arguments passed to :manpage:`git-diff(1)`, such as ``R=True`` to
swap both sides of the diff.
:return:
A :class:`DiffIndex` representing the computed diff.
:note:
On a bare repository, `other` needs to be provided as :const:`INDEX`, or as
an instance of :class:`~git.objects.tree.Tree` or
:class:`~git.objects.commit.Commit`, or a git command error will occur.
"""
args: List[Union[PathLike, Diffable]] = []
args.append("--abbrev=40") # We need full shas.
args.append("--full-index") # Get full index paths, not only filenames.
# Remove default '-M' arg (check for renames) if user is overriding it.
if not any(x in kwargs for x in ("find_renames", "no_renames", "M")):
args.append("-M")
if create_patch:
args.append("-p")
args.append("--no-ext-diff")
else:
args.append("--raw")
args.append("-z")
# Ensure we never see colored output.
# Fixes: https://github.com/gitpython-developers/GitPython/issues/172
args.append("--no-color")
if paths is not None and not isinstance(paths, (tuple, list)):
paths = [paths]
diff_cmd = self.repo.git.diff
if other is INDEX:
args.insert(0, "--cached")
elif other is NULL_TREE:
args.insert(0, "-r") # Recursive diff-tree.
args.insert(0, "--root")
diff_cmd = self.repo.git.diff_tree
elif other is not None:
args.insert(0, "-r") # Recursive diff-tree.
args.insert(0, other)
diff_cmd = self.repo.git.diff_tree
args.insert(0, self)
# paths is a list or tuple here, or None.
if paths:
args.append("--")
args.extend(paths)
# END paths handling
kwargs["as_process"] = True
proc = diff_cmd(*self._process_diff_args(args), **kwargs)
diff_method = Diff._index_from_patch_format if create_patch else Diff._index_from_raw_format
index = diff_method(self.repo, proc)
proc.wait()
return index
T_Diff = TypeVar("T_Diff", bound="Diff")
class DiffIndex(List[T_Diff]):
R"""An index for diffs, allowing a list of :class:`Diff`\s to be queried by the diff
properties.
The class improves the diff handling convenience.
"""
change_type = ("A", "C", "D", "R", "M", "T")
"""Change type invariant identifying possible ways a blob can have changed:
* ``A`` = Added
* ``D`` = Deleted
* ``R`` = Renamed
* ``M`` = Modified
* ``T`` = Changed in the type
"""
def iter_change_type(self, change_type: Lit_change_type) -> Iterator[T_Diff]:
"""
:return:
Iterator yielding :class:`Diff` instances that match the given `change_type`
:param change_type:
Member of :attr:`DiffIndex.change_type`, namely:
* 'A' for added paths
* 'D' for deleted paths
* 'R' for renamed paths
* 'M' for paths with modified data
* 'T' for changed in the type paths
"""
if change_type not in self.change_type:
raise ValueError("Invalid change type: %s" % change_type)
for diffidx in self:
if diffidx.change_type == change_type:
yield diffidx
elif change_type == "A" and diffidx.new_file:
yield diffidx
elif change_type == "D" and diffidx.deleted_file:
yield diffidx
elif change_type == "C" and diffidx.copied_file:
yield diffidx
elif change_type == "R" and diffidx.renamed_file:
yield diffidx
elif change_type == "M" and diffidx.a_blob and diffidx.b_blob and diffidx.a_blob != diffidx.b_blob:
yield diffidx
# END for each diff
class Diff:
"""A Diff contains diff information between two Trees.
It contains two sides a and b of the diff. Members are prefixed with "a" and "b"
respectively to indicate that.
Diffs keep information about the changed blob objects, the file mode, renames,
deletions and new files.
There are a few cases where ``None`` has to be expected as member variable value:
New File::
a_mode is None
a_blob is None
a_path is None
Deleted File::
b_mode is None
b_blob is None
b_path is None
Working Tree Blobs:
When comparing to working trees, the working tree blob will have a null hexsha
as a corresponding object does not yet exist. The mode will be null as well. The
path will be available, though.
If it is listed in a diff, the working tree version of the file must differ from
the version in the index or tree, and hence has been modified.
"""
# Precompiled regex.
re_header = re.compile(
rb"""
^diff[ ]--git
[ ](?P<a_path_fallback>"?[ab]/.+?"?)[ ](?P<b_path_fallback>"?[ab]/.+?"?)\n
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
(?:^similarity[ ]index[ ]\d+%\n
^rename[ ]from[ ](?P<rename_from>.*)\n
^rename[ ]to[ ](?P<rename_to>.*)(?:\n|$))?
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^similarity[ ]index[ ]\d+%\n
^copy[ ]from[ ].*\n
^copy[ ]to[ ](?P<copied_file_name>.*)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
(?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
(?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
""",
re.VERBOSE | re.MULTILINE,
)
# These can be used for comparisons.
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
__slots__ = (
"a_blob",
"b_blob",
"a_mode",
"b_mode",
"a_rawpath",
"b_rawpath",
"new_file",
"deleted_file",
"copied_file",
"raw_rename_from",
"raw_rename_to",
"diff",
"change_type",
"score",
)
def __init__(
self,
repo: "Repo",
a_rawpath: Optional[bytes],
b_rawpath: Optional[bytes],
a_blob_id: Union[str, bytes, None],
b_blob_id: Union[str, bytes, None],
a_mode: Union[bytes, str, None],
b_mode: Union[bytes, str, None],
new_file: bool,
deleted_file: bool,
copied_file: bool,
raw_rename_from: Optional[bytes],
raw_rename_to: Optional[bytes],
diff: Union[str, bytes, None],
change_type: Optional[Lit_change_type],
score: Optional[int],
) -> None:
assert a_rawpath is None or isinstance(a_rawpath, bytes)
assert b_rawpath is None or isinstance(b_rawpath, bytes)
self.a_rawpath = a_rawpath
self.b_rawpath = b_rawpath
self.a_mode = mode_str_to_int(a_mode) if a_mode else None
self.b_mode = mode_str_to_int(b_mode) if b_mode else None
# Determine whether this diff references a submodule. If it does then
# we need to overwrite "repo" to the corresponding submodule's repo instead.
if repo and a_rawpath:
for submodule in repo.submodules:
if submodule.path == a_rawpath.decode(defenc, "replace"):
if submodule.module_exists():
repo = submodule.module()
break
self.a_blob: Union["IndexObject", None]
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
self.a_blob = None
else:
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)
self.b_blob: Union["IndexObject", None]
if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
self.b_blob = None
else:
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)
self.new_file: bool = new_file
self.deleted_file: bool = deleted_file
self.copied_file: bool = copied_file
# Be clear and use None instead of empty strings.
assert raw_rename_from is None or isinstance(raw_rename_from, bytes)
assert raw_rename_to is None or isinstance(raw_rename_to, bytes)
self.raw_rename_from = raw_rename_from or None
self.raw_rename_to = raw_rename_to or None
self.diff = diff
self.change_type: Union[Lit_change_type, None] = change_type
self.score = score
def __eq__(self, other: object) -> bool:
for name in self.__slots__:
if getattr(self, name) != getattr(other, name):
return False
# END for each name
return True
def __ne__(self, other: object) -> bool:
return not (self == other)
def __hash__(self) -> int:
return hash(tuple(getattr(self, n) for n in self.__slots__))
def __str__(self) -> str:
h = "%s"
if self.a_blob:
h %= self.a_blob.path
elif self.b_blob:
h %= self.b_blob.path
msg = ""
line = None
line_length = 0
for b, n in zip((self.a_blob, self.b_blob), ("lhs", "rhs")):
if b:
line = "\n%s: %o | %s" % (n, b.mode, b.hexsha)
else:
line = "\n%s: None" % n
# END if blob is not None
line_length = max(len(line), line_length)
msg += line
# END for each blob
# Add headline.
h += "\n" + "=" * line_length
if self.deleted_file:
msg += "\nfile deleted in rhs"
if self.new_file:
msg += "\nfile added in rhs"
if self.copied_file:
msg += "\nfile %r copied from %r" % (self.b_path, self.a_path)
if self.rename_from:
msg += "\nfile renamed from %r" % self.rename_from
if self.rename_to:
msg += "\nfile renamed to %r" % self.rename_to
if self.diff:
msg += "\n---"
try:
msg += self.diff.decode(defenc) if isinstance(self.diff, bytes) else self.diff
except UnicodeDecodeError:
msg += "OMITTED BINARY DATA"
# END handle encoding
msg += "\n---"
# END diff info
return h + msg
@property
def a_path(self) -> Optional[str]:
return self.a_rawpath.decode(defenc, "replace") if self.a_rawpath else None
@property
def b_path(self) -> Optional[str]:
return self.b_rawpath.decode(defenc, "replace") if self.b_rawpath else None
@property
def rename_from(self) -> Optional[str]:
return self.raw_rename_from.decode(defenc, "replace") if self.raw_rename_from else None
@property
def rename_to(self) -> Optional[str]:
return self.raw_rename_to.decode(defenc, "replace") if self.raw_rename_to else None
@property
def renamed(self) -> bool:
"""Deprecated, use :attr:`renamed_file` instead.
:return:
``True`` if the blob of our diff has been renamed
:note:
This property is deprecated.
Please use the :attr:`renamed_file` property instead.
"""
warnings.warn(
"Diff.renamed is deprecated, use Diff.renamed_file instead",
DeprecationWarning,
stacklevel=2,
)
return self.renamed_file
@property
def renamed_file(self) -> bool:
""":return: ``True`` if the blob of our diff has been renamed"""
return self.rename_from != self.rename_to
@classmethod
def _pick_best_path(cls, path_match: bytes, rename_match: bytes, path_fallback_match: bytes) -> Optional[bytes]:
if path_match:
return decode_path(path_match)
if rename_match:
return decode_path(rename_match, has_ab_prefix=False)
if path_fallback_match:
return decode_path(path_fallback_match)
return None
@classmethod
def _index_from_patch_format(cls, repo: "Repo", proc: Union["Popen", "Git.AutoInterrupt"]) -> DiffIndex["Diff"]:
"""Create a new :class:`DiffIndex` from the given process output which must be
in patch format.
:param repo:
The repository we are operating on.
:param proc:
:manpage:`git-diff(1)` process to read from
(supports :class:`Git.AutoInterrupt <git.cmd.Git.AutoInterrupt>` wrapper).
:return:
:class:`DiffIndex`
"""
# FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise.
text_list: List[bytes] = []
handle_process_output(proc, text_list.append, None, finalize_process, decode_streams=False)
# For now, we have to bake the stream.
text = b"".join(text_list)
index: "DiffIndex" = DiffIndex()
previous_header: Union[Match[bytes], None] = None
header: Union[Match[bytes], None] = None
a_path, b_path = None, None # For mypy.
a_mode, b_mode = None, None # For mypy.
for _header in cls.re_header.finditer(text):
(
a_path_fallback,
b_path_fallback,
old_mode,
new_mode,
rename_from,
rename_to,
new_file_mode,
deleted_file_mode,
copied_file_name,
a_blob_id,
b_blob_id,
b_mode,
a_path,
b_path,
) = _header.groups()
new_file, deleted_file, copied_file = (
bool(new_file_mode),
bool(deleted_file_mode),
bool(copied_file_name),
)
a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback)
# Our only means to find the actual text is to see what has not been matched
# by our regex, and then retro-actively assign it to our index.
if previous_header is not None:
index[-1].diff = text[previous_header.end() : _header.start()]
# END assign actual diff
# Make sure the mode is set if the path is set. Otherwise the resulting blob
# is invalid. We just use the one mode we should have parsed.
a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
index.append(
Diff(
repo,
a_path,
b_path,
a_blob_id and a_blob_id.decode(defenc),
b_blob_id and b_blob_id.decode(defenc),
a_mode and a_mode.decode(defenc),
b_mode and b_mode.decode(defenc),
new_file,
deleted_file,
copied_file,
rename_from,
rename_to,
None,
None,
None,
)
)
previous_header = _header
header = _header
# END for each header we parse
if index and header:
index[-1].diff = text[header.end() :]
# END assign last diff
return index
@staticmethod
def _handle_diff_line(lines_bytes: bytes, repo: "Repo", index: DiffIndex["Diff"]) -> None:
lines = lines_bytes.decode(defenc)
# Discard everything before the first colon, and the colon itself.
_, _, lines = lines.partition(":")
for line in lines.split("\x00:"):
if not line:
# The line data is empty, skip.
continue
meta, _, path = line.partition("\x00")
path = path.rstrip("\x00")
a_blob_id: Optional[str]
b_blob_id: Optional[str]
old_mode, new_mode, a_blob_id, b_blob_id, _change_type = meta.split(None, 4)
# Change type can be R100
# R: status letter
# 100: score (in case of copy and rename)
change_type: Lit_change_type = cast(Lit_change_type, _change_type[0])
score_str = "".join(_change_type[1:])
score = int(score_str) if score_str.isdigit() else None
path = path.strip("\n")
a_path = path.encode(defenc)
b_path = path.encode(defenc)
deleted_file = False
new_file = False
copied_file = False
rename_from = None
rename_to = None
# NOTE: We cannot conclude from the existence of a blob to change type,
# as diffs with the working do not have blobs yet.
if change_type == "D":
b_blob_id = None # Optional[str]
deleted_file = True
elif change_type == "A":
a_blob_id = None
new_file = True
elif change_type == "C":
copied_file = True
a_path_str, b_path_str = path.split("\x00", 1)
a_path = a_path_str.encode(defenc)
b_path = b_path_str.encode(defenc)
elif change_type == "R":
a_path_str, b_path_str = path.split("\x00", 1)
a_path = a_path_str.encode(defenc)
b_path = b_path_str.encode(defenc)
rename_from, rename_to = a_path, b_path
elif change_type == "T":
# Nothing to do.
pass
# END add/remove handling
diff = Diff(
repo,
a_path,
b_path,
a_blob_id,
b_blob_id,
old_mode,
new_mode,
new_file,
deleted_file,
copied_file,
rename_from,
rename_to,
"",
change_type,
score,
)
index.append(diff)
@classmethod
def _index_from_raw_format(cls, repo: "Repo", proc: "Popen") -> "DiffIndex[Diff]":
"""Create a new :class:`DiffIndex` from the given process output which must be
in raw format.
:param repo:
The repository we are operating on.
:param proc:
Process to read output from.
:return:
:class:`DiffIndex`
"""
# handles
# :100644 100644 687099101... 37c5e30c8... M .gitignore
index: "DiffIndex" = DiffIndex()
handle_process_output(
proc,
lambda byt: cls._handle_diff_line(byt, repo, index),
None,
finalize_process,
decode_streams=False,
)
return index

View File

@@ -0,0 +1,228 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Exceptions thrown throughout the git package."""
__all__ = [
# Defined in gitdb.exc:
"AmbiguousObjectName",
"BadName",
"BadObject",
"BadObjectType",
"InvalidDBRoot",
"ODBError",
"ParseError",
"UnsupportedOperation",
# Introduced in this module:
"GitError",
"InvalidGitRepositoryError",
"WorkTreeRepositoryUnsupported",
"NoSuchPathError",
"UnsafeProtocolError",
"UnsafeOptionError",
"CommandError",
"GitCommandNotFound",
"GitCommandError",
"CheckoutError",
"CacheError",
"UnmergedEntriesError",
"HookExecutionError",
"RepositoryDirtyError",
]
from gitdb.exc import (
AmbiguousObjectName,
BadName,
BadObject,
BadObjectType,
InvalidDBRoot,
ODBError,
ParseError,
UnsupportedOperation,
)
from git.compat import safe_decode
from git.util import remove_password_if_present
# typing ----------------------------------------------------
from typing import List, Sequence, Tuple, TYPE_CHECKING, Union
from git.types import PathLike
if TYPE_CHECKING:
from git.repo.base import Repo
# ------------------------------------------------------------------
class GitError(Exception):
"""Base class for all package exceptions."""
class InvalidGitRepositoryError(GitError):
"""Thrown if the given repository appears to have an invalid format."""
class WorkTreeRepositoryUnsupported(InvalidGitRepositoryError):
"""Thrown to indicate we can't handle work tree repositories."""
class NoSuchPathError(GitError, OSError):
"""Thrown if a path could not be access by the system."""
class UnsafeProtocolError(GitError):
"""Thrown if unsafe protocols are passed without being explicitly allowed."""
class UnsafeOptionError(GitError):
"""Thrown if unsafe options are passed without being explicitly allowed."""
class CommandError(GitError):
"""Base class for exceptions thrown at every stage of :class:`~subprocess.Popen`
execution.
:param command:
A non-empty list of argv comprising the command-line.
"""
_msg = "Cmd('%s') failed%s"
"""Format string with 2 ``%s`` for ``<cmdline>`` and the rest.
For example: ``"'%s' failed%s"``
Subclasses may override this attribute, provided it is still in this form.
"""
def __init__(
self,
command: Union[List[str], Tuple[str, ...], str],
status: Union[str, int, None, Exception] = None,
stderr: Union[bytes, str, None] = None,
stdout: Union[bytes, str, None] = None,
) -> None:
if not isinstance(command, (tuple, list)):
command = command.split()
self.command = remove_password_if_present(command)
self.status = status
if status:
if isinstance(status, Exception):
status = "%s('%s')" % (type(status).__name__, safe_decode(str(status)))
else:
try:
status = "exit code(%s)" % int(status)
except (ValueError, TypeError):
s = safe_decode(str(status))
status = "'%s'" % s if isinstance(status, str) else s
self._cmd = safe_decode(self.command[0])
self._cmdline = " ".join(safe_decode(i) for i in self.command)
self._cause = status and " due to: %s" % status or "!"
stdout_decode = safe_decode(stdout)
stderr_decode = safe_decode(stderr)
self.stdout = stdout_decode and "\n stdout: '%s'" % stdout_decode or ""
self.stderr = stderr_decode and "\n stderr: '%s'" % stderr_decode or ""
def __str__(self) -> str:
return (self._msg + "\n cmdline: %s%s%s") % (
self._cmd,
self._cause,
self._cmdline,
self.stdout,
self.stderr,
)
class GitCommandNotFound(CommandError):
"""Thrown if we cannot find the ``git`` executable in the :envvar:`PATH` or at the
path given by the :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable."""
def __init__(self, command: Union[List[str], Tuple[str], str], cause: Union[str, Exception]) -> None:
super().__init__(command, cause)
self._msg = "Cmd('%s') not found%s"
class GitCommandError(CommandError):
"""Thrown if execution of the git command fails with non-zero status code."""
def __init__(
self,
command: Union[List[str], Tuple[str, ...], str],
status: Union[str, int, None, Exception] = None,
stderr: Union[bytes, str, None] = None,
stdout: Union[bytes, str, None] = None,
) -> None:
super().__init__(command, status, stderr, stdout)
class CheckoutError(GitError):
"""Thrown if a file could not be checked out from the index as it contained
changes.
The :attr:`failed_files` attribute contains a list of relative paths that failed to
be checked out as they contained changes that did not exist in the index.
The :attr:`failed_reasons` attribute contains a string informing about the actual
cause of the issue.
The :attr:`valid_files` attribute contains a list of relative paths to files that
were checked out successfully and hence match the version stored in the index.
"""
def __init__(
self,
message: str,
failed_files: Sequence[PathLike],
valid_files: Sequence[PathLike],
failed_reasons: List[str],
) -> None:
Exception.__init__(self, message)
self.failed_files = failed_files
self.failed_reasons = failed_reasons
self.valid_files = valid_files
def __str__(self) -> str:
return Exception.__str__(self) + ":%s" % self.failed_files
class CacheError(GitError):
"""Base for all errors related to the git index, which is called "cache"
internally."""
class UnmergedEntriesError(CacheError):
"""Thrown if an operation cannot proceed as there are still unmerged
entries in the cache."""
class HookExecutionError(CommandError):
"""Thrown if a hook exits with a non-zero exit code.
This provides access to the exit code and the string returned via standard output.
"""
def __init__(
self,
command: Union[List[str], Tuple[str, ...], str],
status: Union[str, int, None, Exception],
stderr: Union[bytes, str, None] = None,
stdout: Union[bytes, str, None] = None,
) -> None:
super().__init__(command, status, stderr, stdout)
self._msg = "Hook('%s') failed%s"
class RepositoryDirtyError(GitError):
"""Thrown whenever an operation on a repository fails as it has uncommitted changes
that would be overwritten."""
def __init__(self, repo: "Repo", message: str) -> None:
self.repo = repo
self.message = message
def __str__(self) -> str:
return "Operation cannot be performed on %r: %s" % (self.repo, self.message)

View File

@@ -0,0 +1,16 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Initialize the index package."""
__all__ = [
"BaseIndexEntry",
"BlobFilter",
"CheckoutError",
"IndexEntry",
"IndexFile",
"StageType",
]
from .base import CheckoutError, IndexFile
from .typ import BaseIndexEntry, BlobFilter, IndexEntry, StageType

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,465 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Standalone functions to accompany the index implementation and make it more
versatile."""
__all__ = [
"write_cache",
"read_cache",
"write_tree_from_cache",
"entry_key",
"stat_mode_to_index_mode",
"S_IFGITLINK",
"run_commit_hook",
"hook_path",
]
from io import BytesIO
import os
import os.path as osp
from pathlib import Path
from stat import S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, S_ISDIR, S_ISLNK, S_IXUSR
import subprocess
import sys
from gitdb.base import IStream
from gitdb.typ import str_tree_type
from git.cmd import handle_process_output, safer_popen
from git.compat import defenc, force_bytes, force_text, safe_decode
from git.exc import HookExecutionError, UnmergedEntriesError
from git.objects.fun import (
traverse_tree_recursive,
traverse_trees_recursive,
tree_to_stream,
)
from git.util import IndexFileSHA1Writer, finalize_process
from .typ import BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT
from .util import pack, unpack
# typing -----------------------------------------------------------------------------
from typing import Dict, IO, List, Sequence, TYPE_CHECKING, Tuple, Type, Union, cast
from git.types import PathLike
if TYPE_CHECKING:
from git.db import GitCmdObjectDB
from git.objects.tree import TreeCacheTup
from .base import IndexFile
# ------------------------------------------------------------------------------------
S_IFGITLINK = S_IFLNK | S_IFDIR
"""Flags for a submodule."""
CE_NAMEMASK_INV = ~CE_NAMEMASK
def hook_path(name: str, git_dir: PathLike) -> str:
""":return: path to the given named hook in the given git repository directory"""
return osp.join(git_dir, "hooks", name)
def _has_file_extension(path: str) -> str:
return osp.splitext(path)[1]
def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None:
"""Run the commit hook of the given name. Silently ignore hooks that do not exist.
:param name:
Name of hook, like ``pre-commit``.
:param index:
:class:`~git.index.base.IndexFile` instance.
:param args:
Arguments passed to hook file.
:raise git.exc.HookExecutionError:
"""
hp = hook_path(name, index.repo.git_dir)
if not os.access(hp, os.X_OK):
return
env = os.environ.copy()
env["GIT_INDEX_FILE"] = safe_decode(str(index.path))
env["GIT_EDITOR"] = ":"
cmd = [hp]
try:
if sys.platform == "win32" and not _has_file_extension(hp):
# Windows only uses extensions to determine how to open files
# (doesn't understand shebangs). Try using bash to run the hook.
relative_hp = Path(hp).relative_to(index.repo.working_dir).as_posix()
cmd = ["bash.exe", relative_hp]
process = safer_popen(
cmd + list(args),
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=index.repo.working_dir,
)
except Exception as ex:
raise HookExecutionError(hp, ex) from ex
else:
stdout_list: List[str] = []
stderr_list: List[str] = []
handle_process_output(process, stdout_list.append, stderr_list.append, finalize_process)
stdout = "".join(stdout_list)
stderr = "".join(stderr_list)
if process.returncode != 0:
stdout = force_text(stdout, defenc)
stderr = force_text(stderr, defenc)
raise HookExecutionError(hp, process.returncode, stderr, stdout)
# END handle return code
def stat_mode_to_index_mode(mode: int) -> int:
"""Convert the given mode from a stat call to the corresponding index mode and
return it."""
if S_ISLNK(mode): # symlinks
return S_IFLNK
if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules
return S_IFGITLINK
return S_IFREG | (mode & S_IXUSR and 0o755 or 0o644) # blobs with or without executable bit
def write_cache(
entries: Sequence[Union[BaseIndexEntry, "IndexEntry"]],
stream: IO[bytes],
extension_data: Union[None, bytes] = None,
ShaStreamCls: Type[IndexFileSHA1Writer] = IndexFileSHA1Writer,
) -> None:
"""Write the cache represented by entries to a stream.
:param entries:
**Sorted** list of entries.
:param stream:
Stream to wrap into the AdapterStreamCls - it is used for final output.
:param ShaStreamCls:
Type to use when writing to the stream. It produces a sha while writing to it,
before the data is passed on to the wrapped stream.
:param extension_data:
Any kind of data to write as a trailer, it must begin a 4 byte identifier,
followed by its size (4 bytes).
"""
# Wrap the stream into a compatible writer.
stream_sha = ShaStreamCls(stream)
tell = stream_sha.tell
write = stream_sha.write
# Header
version = 2
write(b"DIRC")
write(pack(">LL", version, len(entries)))
# Body
for entry in entries:
beginoffset = tell()
write(entry.ctime_bytes) # ctime
write(entry.mtime_bytes) # mtime
path_str = str(entry.path)
path: bytes = force_bytes(path_str, encoding=defenc)
plen = len(path) & CE_NAMEMASK # Path length
assert plen == len(path), "Path %s too long to fit into index" % entry.path
flags = plen | (entry.flags & CE_NAMEMASK_INV) # Clear possible previous values.
write(
pack(
">LLLLLL20sH",
entry.dev,
entry.inode,
entry.mode,
entry.uid,
entry.gid,
entry.size,
entry.binsha,
flags,
)
)
write(path)
real_size = (tell() - beginoffset + 8) & ~7
write(b"\0" * ((beginoffset + real_size) - tell()))
# END for each entry
# Write previously cached extensions data.
if extension_data is not None:
stream_sha.write(extension_data)
# Write the sha over the content.
stream_sha.write_sha()
def read_header(stream: IO[bytes]) -> Tuple[int, int]:
"""Return tuple(version_long, num_entries) from the given stream."""
type_id = stream.read(4)
if type_id != b"DIRC":
raise AssertionError("Invalid index file header: %r" % type_id)
unpacked = cast(Tuple[int, int], unpack(">LL", stream.read(4 * 2)))
version, num_entries = unpacked
# TODO: Handle version 3: extended data, see read-cache.c.
assert version in (1, 2)
return version, num_entries
def entry_key(*entry: Union[BaseIndexEntry, PathLike, int]) -> Tuple[PathLike, int]:
"""
:return:
Key suitable to be used for the
:attr:`index.entries <git.index.base.IndexFile.entries>` dictionary.
:param entry:
One instance of type BaseIndexEntry or the path and the stage.
"""
# def is_entry_key_tup(entry_key: Tuple) -> TypeGuard[Tuple[PathLike, int]]:
# return isinstance(entry_key, tuple) and len(entry_key) == 2
if len(entry) == 1:
entry_first = entry[0]
assert isinstance(entry_first, BaseIndexEntry)
return (entry_first.path, entry_first.stage)
else:
# assert is_entry_key_tup(entry)
entry = cast(Tuple[PathLike, int], entry)
return entry
# END handle entry
def read_cache(
stream: IO[bytes],
) -> Tuple[int, Dict[Tuple[PathLike, int], "IndexEntry"], bytes, bytes]:
"""Read a cache file from the given stream.
:return:
tuple(version, entries_dict, extension_data, content_sha)
* *version* is the integer version number.
* *entries_dict* is a dictionary which maps IndexEntry instances to a path at a
stage.
* *extension_data* is ``""`` or 4 bytes of type + 4 bytes of size + size bytes.
* *content_sha* is a 20 byte sha on all cache file contents.
"""
version, num_entries = read_header(stream)
count = 0
entries: Dict[Tuple[PathLike, int], "IndexEntry"] = {}
read = stream.read
tell = stream.tell
while count < num_entries:
beginoffset = tell()
ctime = unpack(">8s", read(8))[0]
mtime = unpack(">8s", read(8))[0]
(dev, ino, mode, uid, gid, size, sha, flags) = unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2))
path_size = flags & CE_NAMEMASK
path = read(path_size).decode(defenc)
real_size = (tell() - beginoffset + 8) & ~7
read((beginoffset + real_size) - tell())
entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size))
# entry_key would be the method to use, but we save the effort.
entries[(path, entry.stage)] = entry
count += 1
# END for each entry
# The footer contains extension data and a sha on the content so far.
# Keep the extension footer,and verify we have a sha in the end.
# Extension data format is:
# 4 bytes ID
# 4 bytes length of chunk
# Repeated 0 - N times
extension_data = stream.read(~0)
assert len(extension_data) > 19, (
"Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data)
)
content_sha = extension_data[-20:]
# Truncate the sha in the end as we will dynamically create it anyway.
extension_data = extension_data[:-20]
return (version, entries, extension_data, content_sha)
def write_tree_from_cache(
entries: List[IndexEntry], odb: "GitCmdObjectDB", sl: slice, si: int = 0
) -> Tuple[bytes, List["TreeCacheTup"]]:
R"""Create a tree from the given sorted list of entries and put the respective
trees into the given object database.
:param entries:
**Sorted** list of :class:`~git.index.typ.IndexEntry`\s.
:param odb:
Object database to store the trees in.
:param si:
Start index at which we should start creating subtrees.
:param sl:
Slice indicating the range we should process on the entries list.
:return:
tuple(binsha, list(tree_entry, ...))
A tuple of a sha and a list of tree entries being a tuple of hexsha, mode, name.
"""
tree_items: List["TreeCacheTup"] = []
ci = sl.start
end = sl.stop
while ci < end:
entry = entries[ci]
if entry.stage != 0:
raise UnmergedEntriesError(entry)
# END abort on unmerged
ci += 1
rbound = entry.path.find("/", si)
if rbound == -1:
# It's not a tree.
tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
else:
# Find common base range.
base = entry.path[si:rbound]
xi = ci
while xi < end:
oentry = entries[xi]
orbound = oentry.path.find("/", si)
if orbound == -1 or oentry.path[si:orbound] != base:
break
# END abort on base mismatch
xi += 1
# END find common base
# Enter recursion.
# ci - 1 as we want to count our current item as well.
sha, _tree_entry_list = write_tree_from_cache(entries, odb, slice(ci - 1, xi), rbound + 1)
tree_items.append((sha, S_IFDIR, base))
# Skip ahead.
ci = xi
# END handle bounds
# END for each entry
# Finally create the tree.
sio = BytesIO()
tree_to_stream(tree_items, sio.write) # Writes to stream as bytes, but doesn't change tree_items.
sio.seek(0)
istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
return (istream.binsha, tree_items)
def _tree_entry_to_baseindexentry(tree_entry: "TreeCacheTup", stage: int) -> BaseIndexEntry:
return BaseIndexEntry((tree_entry[1], tree_entry[0], stage << CE_STAGESHIFT, tree_entry[2]))
def aggressive_tree_merge(odb: "GitCmdObjectDB", tree_shas: Sequence[bytes]) -> List[BaseIndexEntry]:
R"""
:return:
List of :class:`~git.index.typ.BaseIndexEntry`\s representing the aggressive
merge of the given trees. All valid entries are on stage 0, whereas the
conflicting ones are left on stage 1, 2 or 3, whereas stage 1 corresponds to the
common ancestor tree, 2 to our tree and 3 to 'their' tree.
:param tree_shas:
1, 2 or 3 trees as identified by their binary 20 byte shas. If 1 or two, the
entries will effectively correspond to the last given tree. If 3 are given, a 3
way merge is performed.
"""
out: List[BaseIndexEntry] = []
# One and two way is the same for us, as we don't have to handle an existing
# index, instrea
if len(tree_shas) in (1, 2):
for entry in traverse_tree_recursive(odb, tree_shas[-1], ""):
out.append(_tree_entry_to_baseindexentry(entry, 0))
# END for each entry
return out
# END handle single tree
if len(tree_shas) > 3:
raise ValueError("Cannot handle %i trees at once" % len(tree_shas))
# Three trees.
for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ""):
if base is not None:
# Base version exists.
if ours is not None:
# Ours exists.
if theirs is not None:
# It exists in all branches. Ff it was changed in both
# its a conflict. Otherwise, we take the changed version.
# This should be the most common branch, so it comes first.
if (base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0]) or (
base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1]
):
# Changed by both.
out.append(_tree_entry_to_baseindexentry(base, 1))
out.append(_tree_entry_to_baseindexentry(ours, 2))
out.append(_tree_entry_to_baseindexentry(theirs, 3))
elif base[0] != ours[0] or base[1] != ours[1]:
# Only we changed it.
out.append(_tree_entry_to_baseindexentry(ours, 0))
else:
# Either nobody changed it, or they did. In either
# case, use theirs.
out.append(_tree_entry_to_baseindexentry(theirs, 0))
# END handle modification
else:
if ours[0] != base[0] or ours[1] != base[1]:
# They deleted it, we changed it, conflict.
out.append(_tree_entry_to_baseindexentry(base, 1))
out.append(_tree_entry_to_baseindexentry(ours, 2))
# else:
# # We didn't change it, ignore.
# pass
# END handle our change
# END handle theirs
else:
if theirs is None:
# Deleted in both, its fine - it's out.
pass
else:
if theirs[0] != base[0] or theirs[1] != base[1]:
# Deleted in ours, changed theirs, conflict.
out.append(_tree_entry_to_baseindexentry(base, 1))
out.append(_tree_entry_to_baseindexentry(theirs, 3))
# END theirs changed
# else:
# # Theirs didn't change.
# pass
# END handle theirs
# END handle ours
else:
# All three can't be None.
if ours is None:
# Added in their branch.
assert theirs is not None
out.append(_tree_entry_to_baseindexentry(theirs, 0))
elif theirs is None:
# Added in our branch.
out.append(_tree_entry_to_baseindexentry(ours, 0))
else:
# Both have it, except for the base, see whether it changed.
if ours[0] != theirs[0] or ours[1] != theirs[1]:
out.append(_tree_entry_to_baseindexentry(ours, 2))
out.append(_tree_entry_to_baseindexentry(theirs, 3))
else:
# It was added the same in both.
out.append(_tree_entry_to_baseindexentry(ours, 0))
# END handle two items
# END handle heads
# END handle base exists
# END for each entries tuple
return out

View File

@@ -0,0 +1,202 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Additional types used by the index."""
__all__ = ["BlobFilter", "BaseIndexEntry", "IndexEntry", "StageType"]
from binascii import b2a_hex
from pathlib import Path
from git.objects import Blob
from .util import pack, unpack
# typing ----------------------------------------------------------------------
from typing import NamedTuple, Sequence, TYPE_CHECKING, Tuple, Union, cast
from git.types import PathLike
if TYPE_CHECKING:
from git.repo import Repo
StageType = int
# ---------------------------------------------------------------------------------
# { Invariants
CE_NAMEMASK = 0x0FFF
CE_STAGEMASK = 0x3000
CE_EXTENDED = 0x4000
CE_VALID = 0x8000
CE_STAGESHIFT = 12
# } END invariants
class BlobFilter:
"""Predicate to be used by
:meth:`IndexFile.iter_blobs <git.index.base.IndexFile.iter_blobs>` allowing to
filter only return blobs which match the given list of directories or files.
The given paths are given relative to the repository.
"""
__slots__ = ("paths",)
def __init__(self, paths: Sequence[PathLike]) -> None:
"""
:param paths:
Tuple or list of paths which are either pointing to directories or to files
relative to the current repository.
"""
self.paths = paths
def __call__(self, stage_blob: Tuple[StageType, Blob]) -> bool:
blob_pathlike: PathLike = stage_blob[1].path
blob_path: Path = blob_pathlike if isinstance(blob_pathlike, Path) else Path(blob_pathlike)
for pathlike in self.paths:
path: Path = pathlike if isinstance(pathlike, Path) else Path(pathlike)
# TODO: Change to use `PosixPath.is_relative_to` once Python 3.8 is no
# longer supported.
filter_parts = path.parts
blob_parts = blob_path.parts
if len(filter_parts) > len(blob_parts):
continue
if all(i == j for i, j in zip(filter_parts, blob_parts)):
return True
return False
class BaseIndexEntryHelper(NamedTuple):
"""Typed named tuple to provide named attribute access for :class:`BaseIndexEntry`.
This is needed to allow overriding ``__new__`` in child class to preserve backwards
compatibility.
"""
mode: int
binsha: bytes
flags: int
path: PathLike
ctime_bytes: bytes = pack(">LL", 0, 0)
mtime_bytes: bytes = pack(">LL", 0, 0)
dev: int = 0
inode: int = 0
uid: int = 0
gid: int = 0
size: int = 0
class BaseIndexEntry(BaseIndexEntryHelper):
R"""Small brother of an index entry which can be created to describe changes
done to the index in which case plenty of additional information is not required.
As the first 4 data members match exactly to the :class:`IndexEntry` type, methods
expecting a :class:`BaseIndexEntry` can also handle full :class:`IndexEntry`\s even
if they use numeric indices for performance reasons.
"""
def __new__(
cls,
inp_tuple: Union[
Tuple[int, bytes, int, PathLike],
Tuple[int, bytes, int, PathLike, bytes, bytes, int, int, int, int, int],
],
) -> "BaseIndexEntry":
"""Override ``__new__`` to allow construction from a tuple for backwards
compatibility."""
return super().__new__(cls, *inp_tuple)
def __str__(self) -> str:
return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path)
def __repr__(self) -> str:
return "(%o, %s, %i, %s)" % (self.mode, self.hexsha, self.stage, self.path)
@property
def hexsha(self) -> str:
"""hex version of our sha"""
return b2a_hex(self.binsha).decode("ascii")
@property
def stage(self) -> int:
"""Stage of the entry, either:
* 0 = default stage
* 1 = stage before a merge or common ancestor entry in case of a 3 way merge
* 2 = stage of entries from the 'left' side of the merge
* 3 = stage of entries from the 'right' side of the merge
:note:
For more information, see :manpage:`git-read-tree(1)`.
"""
return (self.flags & CE_STAGEMASK) >> CE_STAGESHIFT
@classmethod
def from_blob(cls, blob: Blob, stage: int = 0) -> "BaseIndexEntry":
""":return: Fully equipped BaseIndexEntry at the given stage"""
return cls((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path))
def to_blob(self, repo: "Repo") -> Blob:
""":return: Blob using the information of this index entry"""
return Blob(repo, self.binsha, self.mode, self.path)
class IndexEntry(BaseIndexEntry):
"""Allows convenient access to index entry data as defined in
:class:`BaseIndexEntry` without completely unpacking it.
Attributes usually accessed often are cached in the tuple whereas others are
unpacked on demand.
See the properties for a mapping between names and tuple indices.
"""
@property
def ctime(self) -> Tuple[int, int]:
"""
:return:
Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the
file's creation time
"""
return cast(Tuple[int, int], unpack(">LL", self.ctime_bytes))
@property
def mtime(self) -> Tuple[int, int]:
"""See :attr:`ctime` property, but returns modification time."""
return cast(Tuple[int, int], unpack(">LL", self.mtime_bytes))
@classmethod
def from_base(cls, base: "BaseIndexEntry") -> "IndexEntry":
"""
:return:
Minimal entry as created from the given :class:`BaseIndexEntry` instance.
Missing values will be set to null-like values.
:param base:
Instance of type :class:`BaseIndexEntry`.
"""
time = pack(">LL", 0, 0)
return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
@classmethod
def from_blob(cls, blob: Blob, stage: int = 0) -> "IndexEntry":
""":return: Minimal entry resembling the given blob object"""
time = pack(">LL", 0, 0)
return IndexEntry(
(
blob.mode,
blob.binsha,
stage << CE_STAGESHIFT,
blob.path,
time,
time,
0,
0,
0,
0,
blob.size,
)
)

View File

@@ -0,0 +1,121 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Index utilities."""
__all__ = ["TemporaryFileSwap", "post_clear_cache", "default_index", "git_working_dir"]
import contextlib
from functools import wraps
import os
import os.path as osp
import struct
import tempfile
from types import TracebackType
# typing ----------------------------------------------------------------------
from typing import Any, Callable, TYPE_CHECKING, Optional, Type
from git.types import Literal, PathLike, _T
if TYPE_CHECKING:
from git.index import IndexFile
# ---------------------------------------------------------------------------------
# { Aliases
pack = struct.pack
unpack = struct.unpack
# } END aliases
class TemporaryFileSwap:
"""Utility class moving a file to a temporary location within the same directory and
moving it back on to where on object deletion."""
__slots__ = ("file_path", "tmp_file_path")
def __init__(self, file_path: PathLike) -> None:
self.file_path = file_path
dirname, basename = osp.split(file_path)
fd, self.tmp_file_path = tempfile.mkstemp(prefix=basename, dir=dirname)
os.close(fd)
with contextlib.suppress(OSError): # It may be that the source does not exist.
os.replace(self.file_path, self.tmp_file_path)
def __enter__(self) -> "TemporaryFileSwap":
return self
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> Literal[False]:
if osp.isfile(self.tmp_file_path):
os.replace(self.tmp_file_path, self.file_path)
return False
# { Decorators
def post_clear_cache(func: Callable[..., _T]) -> Callable[..., _T]:
"""Decorator for functions that alter the index using the git command.
When a git command alters the index, this invalidates our possibly existing entries
dictionary, which is why it must be deleted to allow it to be lazily reread later.
"""
@wraps(func)
def post_clear_cache_if_not_raised(self: "IndexFile", *args: Any, **kwargs: Any) -> _T:
rval = func(self, *args, **kwargs)
self._delete_entries_cache()
return rval
# END wrapper method
return post_clear_cache_if_not_raised
def default_index(func: Callable[..., _T]) -> Callable[..., _T]:
"""Decorator ensuring the wrapped method may only run if we are the default
repository index.
This is as we rely on git commands that operate on that index only.
"""
@wraps(func)
def check_default_index(self: "IndexFile", *args: Any, **kwargs: Any) -> _T:
if self._file_path != self._index_path():
raise AssertionError(
"Cannot call %r on indices that do not represent the default git index" % func.__name__
)
return func(self, *args, **kwargs)
# END wrapper method
return check_default_index
def git_working_dir(func: Callable[..., _T]) -> Callable[..., _T]:
"""Decorator which changes the current working dir to the one of the git
repository in order to ensure relative paths are handled correctly."""
@wraps(func)
def set_git_working_dir(self: "IndexFile", *args: Any, **kwargs: Any) -> _T:
cur_wd = os.getcwd()
os.chdir(str(self.repo.working_tree_dir))
try:
return func(self, *args, **kwargs)
finally:
os.chdir(cur_wd)
# END handle working dir
# END wrapper
return set_git_working_dir
# } END decorators

View File

@@ -0,0 +1,25 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Import all submodules' main classes into the package space."""
__all__ = [
"IndexObject",
"Object",
"Blob",
"Commit",
"Submodule",
"UpdateProgress",
"RootModule",
"RootUpdateProgress",
"TagObject",
"Tree",
"TreeModifier",
]
from .base import IndexObject, Object
from .blob import Blob
from .commit import Commit
from .submodule import RootModule, RootUpdateProgress, Submodule, UpdateProgress
from .tag import TagObject
from .tree import Tree, TreeModifier

View File

@@ -0,0 +1,301 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["Object", "IndexObject"]
import os.path as osp
import gitdb.typ as dbtyp
from git.exc import WorkTreeRepositoryUnsupported
from git.util import LazyMixin, bin_to_hex, join_path_native, stream_copy
from .util import get_object_type_by_name
# typing ------------------------------------------------------------------
from typing import Any, TYPE_CHECKING, Union
from git.types import AnyGitObject, GitObjectTypeString, PathLike
if TYPE_CHECKING:
from gitdb.base import OStream
from git.refs.reference import Reference
from git.repo import Repo
from .blob import Blob
from .submodule.base import Submodule
from .tree import Tree
IndexObjUnion = Union["Tree", "Blob", "Submodule"]
# --------------------------------------------------------------------------
class Object(LazyMixin):
"""Base class for classes representing git object types.
The following four leaf classes represent specific kinds of git objects:
* :class:`Blob <git.objects.blob.Blob>`
* :class:`Tree <git.objects.tree.Tree>`
* :class:`Commit <git.objects.commit.Commit>`
* :class:`TagObject <git.objects.tag.TagObject>`
See :manpage:`gitglossary(7)` on:
* "object": https://git-scm.com/docs/gitglossary#def_object
* "object type": https://git-scm.com/docs/gitglossary#def_object_type
* "blob": https://git-scm.com/docs/gitglossary#def_blob_object
* "tree object": https://git-scm.com/docs/gitglossary#def_tree_object
* "commit object": https://git-scm.com/docs/gitglossary#def_commit_object
* "tag object": https://git-scm.com/docs/gitglossary#def_tag_object
:note:
See the :class:`~git.types.AnyGitObject` union type of the four leaf subclasses
that represent actual git object types.
:note:
:class:`~git.objects.submodule.base.Submodule` is defined under the hierarchy
rooted at this :class:`Object` class, even though submodules are not really a
type of git object. (This also applies to its
:class:`~git.objects.submodule.root.RootModule` subclass.)
:note:
This :class:`Object` class should not be confused with :class:`object` (the root
of the class hierarchy in Python).
"""
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
TYPES = (
dbtyp.str_blob_type,
dbtyp.str_tree_type,
dbtyp.str_commit_type,
dbtyp.str_tag_type,
)
__slots__ = ("repo", "binsha", "size")
type: Union[GitObjectTypeString, None] = None
"""String identifying (a concrete :class:`Object` subtype for) a git object type.
The subtypes that this may name correspond to the kinds of git objects that exist,
i.e., the objects that may be present in a git repository.
:note:
Most subclasses represent specific types of git objects and override this class
attribute accordingly. This attribute is ``None`` in the :class:`Object` base
class, as well as the :class:`IndexObject` intermediate subclass, but never
``None`` in concrete leaf subclasses representing specific git object types.
:note:
See also :class:`~git.types.GitObjectTypeString`.
"""
def __init__(self, repo: "Repo", binsha: bytes) -> None:
"""Initialize an object by identifying it by its binary sha.
All keyword arguments will be set on demand if ``None``.
:param repo:
Repository this object is located in.
:param binsha:
20 byte SHA1
"""
super().__init__()
self.repo = repo
self.binsha = binsha
assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (
binsha,
len(binsha),
)
@classmethod
def new(cls, repo: "Repo", id: Union[str, "Reference"]) -> AnyGitObject:
"""
:return:
New :class:`Object` instance of a type appropriate to the object type behind
`id`. The id of the newly created object will be a binsha even though the
input id may have been a `~git.refs.reference.Reference` or rev-spec.
:param id:
:class:`~git.refs.reference.Reference`, rev-spec, or hexsha.
:note:
This cannot be a ``__new__`` method as it would always call :meth:`__init__`
with the input id which is not necessarily a binsha.
"""
return repo.rev_parse(str(id))
@classmethod
def new_from_sha(cls, repo: "Repo", sha1: bytes) -> AnyGitObject:
"""
:return:
New object instance of a type appropriate to represent the given binary sha1
:param sha1:
20 byte binary sha1.
"""
if sha1 == cls.NULL_BIN_SHA:
# The NULL binsha is always the root commit.
return get_object_type_by_name(b"commit")(repo, sha1)
# END handle special case
oinfo = repo.odb.info(sha1)
inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha)
inst.size = oinfo.size
return inst
def _set_cache_(self, attr: str) -> None:
"""Retrieve object information."""
if attr == "size":
oinfo = self.repo.odb.info(self.binsha)
self.size = oinfo.size # type: int
else:
super()._set_cache_(attr)
def __eq__(self, other: Any) -> bool:
""":return: ``True`` if the objects have the same SHA1"""
if not hasattr(other, "binsha"):
return False
return self.binsha == other.binsha
def __ne__(self, other: Any) -> bool:
""":return: ``True`` if the objects do not have the same SHA1"""
if not hasattr(other, "binsha"):
return True
return self.binsha != other.binsha
def __hash__(self) -> int:
""":return: Hash of our id allowing objects to be used in dicts and sets"""
return hash(self.binsha)
def __str__(self) -> str:
""":return: String of our SHA1 as understood by all git commands"""
return self.hexsha
def __repr__(self) -> str:
""":return: String with pythonic representation of our object"""
return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
@property
def hexsha(self) -> str:
""":return: 40 byte hex version of our 20 byte binary sha"""
# b2a_hex produces bytes.
return bin_to_hex(self.binsha).decode("ascii")
@property
def data_stream(self) -> "OStream":
"""
:return:
File-object compatible stream to the uncompressed raw data of the object
:note:
Returned streams must be read in order.
"""
return self.repo.odb.stream(self.binsha)
def stream_data(self, ostream: "OStream") -> "Object":
"""Write our data directly to the given output stream.
:param ostream:
File-object compatible stream object.
:return:
self
"""
istream = self.repo.odb.stream(self.binsha)
stream_copy(istream, ostream)
return self
class IndexObject(Object):
"""Base for all objects that can be part of the index file.
The classes representing git object types that can be part of the index file are
:class:`~git.objects.tree.Tree and :class:`~git.objects.blob.Blob`. In addition,
:class:`~git.objects.submodule.base.Submodule`, which is not really a git object
type but can be part of an index file, is also a subclass.
"""
__slots__ = ("path", "mode")
# For compatibility with iterable lists.
_id_attribute_ = "path"
def __init__(
self,
repo: "Repo",
binsha: bytes,
mode: Union[None, int] = None,
path: Union[None, PathLike] = None,
) -> None:
"""Initialize a newly instanced :class:`IndexObject`.
:param repo:
The :class:`~git.repo.base.Repo` we are located in.
:param binsha:
20 byte sha1.
:param mode:
The stat-compatible file mode as :class:`int`.
Use the :mod:`stat` module to evaluate the information.
:param path:
The path to the file in the file system, relative to the git repository
root, like ``file.ext`` or ``folder/other.ext``.
:note:
Path may not be set if the index object has been created directly, as it
cannot be retrieved without knowing the parent tree.
"""
super().__init__(repo, binsha)
if mode is not None:
self.mode = mode
if path is not None:
self.path = path
def __hash__(self) -> int:
"""
:return:
Hash of our path as index items are uniquely identifiable by path, not by
their data!
"""
return hash(self.path)
def _set_cache_(self, attr: str) -> None:
if attr in IndexObject.__slots__:
# They cannot be retrieved later on (not without searching for them).
raise AttributeError(
"Attribute '%s' unset: path and mode attributes must have been set during %s object creation"
% (attr, type(self).__name__)
)
else:
super()._set_cache_(attr)
# END handle slot attribute
@property
def name(self) -> str:
""":return: Name portion of the path, effectively being the basename"""
return osp.basename(self.path)
@property
def abspath(self) -> PathLike:
R"""
:return:
Absolute path to this index object in the file system (as opposed to the
:attr:`path` field which is a path relative to the git repository).
The returned path will be native to the system and contains ``\`` on
Windows.
"""
if self.repo.working_tree_dir is not None:
return join_path_native(self.repo.working_tree_dir, self.path)
else:
raise WorkTreeRepositoryUnsupported("working_tree_dir was None or empty")

View File

@@ -0,0 +1,48 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["Blob"]
from mimetypes import guess_type
import sys
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
from . import base
class Blob(base.IndexObject):
"""A Blob encapsulates a git blob object.
See :manpage:`gitglossary(7)` on "blob":
https://git-scm.com/docs/gitglossary#def_blob_object
"""
DEFAULT_MIME_TYPE = "text/plain"
type: Literal["blob"] = "blob"
# Valid blob modes
executable_mode = 0o100755
file_mode = 0o100644
link_mode = 0o120000
__slots__ = ()
@property
def mime_type(self) -> str:
"""
:return:
String describing the mime type of this file (based on the filename)
:note:
Defaults to ``text/plain`` in case the actual file type is unknown.
"""
guesses = None
if self.path:
guesses = guess_type(str(self.path))
return guesses and guesses[0] or self.DEFAULT_MIME_TYPE

View File

@@ -0,0 +1,909 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["Commit"]
from collections import defaultdict
import datetime
from io import BytesIO
import logging
import os
import re
from subprocess import Popen, PIPE
import sys
from time import altzone, daylight, localtime, time, timezone
import warnings
from gitdb import IStream
from git.cmd import Git
from git.diff import Diffable
from git.util import Actor, Stats, finalize_process, hex_to_bin
from . import base
from .tree import Tree
from .util import (
Serializable,
TraversableIterableObj,
altz_to_utctz_str,
from_timestamp,
parse_actor_and_date,
parse_date,
)
# typing ------------------------------------------------------------------
from typing import (
Any,
Dict,
IO,
Iterator,
List,
Sequence,
Tuple,
TYPE_CHECKING,
Union,
cast,
)
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
from git.types import PathLike
if TYPE_CHECKING:
from git.refs import SymbolicReference
from git.repo import Repo
# ------------------------------------------------------------------------
_logger = logging.getLogger(__name__)
class Commit(base.Object, TraversableIterableObj, Diffable, Serializable):
"""Wraps a git commit object.
See :manpage:`gitglossary(7)` on "commit object":
https://git-scm.com/docs/gitglossary#def_commit_object
:note:
This class will act lazily on some of its attributes and will query the value on
demand only if it involves calling the git binary.
"""
# ENVIRONMENT VARIABLES
# Read when creating new commits.
env_author_date = "GIT_AUTHOR_DATE"
env_committer_date = "GIT_COMMITTER_DATE"
# CONFIGURATION KEYS
conf_encoding = "i18n.commitencoding"
# INVARIANTS
default_encoding = "UTF-8"
type: Literal["commit"] = "commit"
__slots__ = (
"tree",
"author",
"authored_date",
"author_tz_offset",
"committer",
"committed_date",
"committer_tz_offset",
"message",
"parents",
"encoding",
"gpgsig",
)
_id_attribute_ = "hexsha"
parents: Sequence["Commit"]
def __init__(
self,
repo: "Repo",
binsha: bytes,
tree: Union[Tree, None] = None,
author: Union[Actor, None] = None,
authored_date: Union[int, None] = None,
author_tz_offset: Union[None, float] = None,
committer: Union[Actor, None] = None,
committed_date: Union[int, None] = None,
committer_tz_offset: Union[None, float] = None,
message: Union[str, bytes, None] = None,
parents: Union[Sequence["Commit"], None] = None,
encoding: Union[str, None] = None,
gpgsig: Union[str, None] = None,
) -> None:
"""Instantiate a new :class:`Commit`. All keyword arguments taking ``None`` as
default will be implicitly set on first query.
:param binsha:
20 byte sha1.
:param tree:
A :class:`~git.objects.tree.Tree` object.
:param author:
The author :class:`~git.util.Actor` object.
:param authored_date: int_seconds_since_epoch
The authored DateTime - use :func:`time.gmtime` to convert it into a
different format.
:param author_tz_offset: int_seconds_west_of_utc
The timezone that the `authored_date` is in.
:param committer:
The committer string, as an :class:`~git.util.Actor` object.
:param committed_date: int_seconds_since_epoch
The committed DateTime - use :func:`time.gmtime` to convert it into a
different format.
:param committer_tz_offset: int_seconds_west_of_utc
The timezone that the `committed_date` is in.
:param message: string
The commit message.
:param encoding: string
Encoding of the message, defaults to UTF-8.
:param parents:
List or tuple of :class:`Commit` objects which are our parent(s) in the
commit dependency graph.
:return:
:class:`Commit`
:note:
Timezone information is in the same format and in the same sign as what
:func:`time.altzone` returns. The sign is inverted compared to git's UTC
timezone.
"""
super().__init__(repo, binsha)
self.binsha = binsha
if tree is not None:
assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
if tree is not None:
self.tree = tree
if author is not None:
self.author = author
if authored_date is not None:
self.authored_date = authored_date
if author_tz_offset is not None:
self.author_tz_offset = author_tz_offset
if committer is not None:
self.committer = committer
if committed_date is not None:
self.committed_date = committed_date
if committer_tz_offset is not None:
self.committer_tz_offset = committer_tz_offset
if message is not None:
self.message = message
if parents is not None:
self.parents = parents
if encoding is not None:
self.encoding = encoding
if gpgsig is not None:
self.gpgsig = gpgsig
@classmethod
def _get_intermediate_items(cls, commit: "Commit") -> Tuple["Commit", ...]:
return tuple(commit.parents)
@classmethod
def _calculate_sha_(cls, repo: "Repo", commit: "Commit") -> bytes:
"""Calculate the sha of a commit.
:param repo:
:class:`~git.repo.base.Repo` object the commit should be part of.
:param commit:
:class:`Commit` object for which to generate the sha.
"""
stream = BytesIO()
commit._serialize(stream)
streamlen = stream.tell()
stream.seek(0)
istream = repo.odb.store(IStream(cls.type, streamlen, stream))
return istream.binsha
def replace(self, **kwargs: Any) -> "Commit":
"""Create new commit object from an existing commit object.
Any values provided as keyword arguments will replace the corresponding
attribute in the new object.
"""
attrs = {k: getattr(self, k) for k in self.__slots__}
for attrname in kwargs:
if attrname not in self.__slots__:
raise ValueError("invalid attribute name")
attrs.update(kwargs)
new_commit = self.__class__(self.repo, self.NULL_BIN_SHA, **attrs)
new_commit.binsha = self._calculate_sha_(self.repo, new_commit)
return new_commit
def _set_cache_(self, attr: str) -> None:
if attr in Commit.__slots__:
# Read the data in a chunk, its faster - then provide a file wrapper.
_binsha, _typename, self.size, stream = self.repo.odb.stream(self.binsha)
self._deserialize(BytesIO(stream.read()))
else:
super()._set_cache_(attr)
# END handle attrs
@property
def authored_datetime(self) -> datetime.datetime:
return from_timestamp(self.authored_date, self.author_tz_offset)
@property
def committed_datetime(self) -> datetime.datetime:
return from_timestamp(self.committed_date, self.committer_tz_offset)
@property
def summary(self) -> Union[str, bytes]:
""":return: First line of the commit message"""
if isinstance(self.message, str):
return self.message.split("\n", 1)[0]
else:
return self.message.split(b"\n", 1)[0]
def count(self, paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any) -> int:
"""Count the number of commits reachable from this commit.
:param paths:
An optional path or a list of paths restricting the return value to commits
actually containing the paths.
:param kwargs:
Additional options to be passed to :manpage:`git-rev-list(1)`. They must not
alter the output style of the command, or parsing will yield incorrect
results.
:return:
An int defining the number of reachable commits
"""
# Yes, it makes a difference whether empty paths are given or not in our case as
# the empty paths version will ignore merge commits for some reason.
if paths:
return len(self.repo.git.rev_list(self.hexsha, "--", paths, **kwargs).splitlines())
return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines())
@property
def name_rev(self) -> str:
"""
:return:
String describing the commits hex sha based on the closest
`~git.refs.reference.Reference`.
:note:
Mostly useful for UI purposes.
"""
return self.repo.git.name_rev(self)
@classmethod
def iter_items(
cls,
repo: "Repo",
rev: Union[str, "Commit", "SymbolicReference"],
paths: Union[PathLike, Sequence[PathLike]] = "",
**kwargs: Any,
) -> Iterator["Commit"]:
R"""Find all commits matching the given criteria.
:param repo:
The :class:`~git.repo.base.Repo`.
:param rev:
Revision specifier. See :manpage:`git-rev-parse(1)` for viable options.
:param paths:
An optional path or list of paths. If set only :class:`Commit`\s that
include the path or paths will be considered.
:param kwargs:
Optional keyword arguments to :manpage:`git-rev-list(1)` where:
* ``max_count`` is the maximum number of commits to fetch.
* ``skip`` is the number of commits to skip.
* ``since`` selects all commits since some date, e.g. ``"1970-01-01"``.
:return:
Iterator yielding :class:`Commit` items.
"""
if "pretty" in kwargs:
raise ValueError("--pretty cannot be used as parsing expects single sha's only")
# END handle pretty
# Use -- in all cases, to prevent possibility of ambiguous arguments.
# See https://github.com/gitpython-developers/GitPython/issues/264.
args_list: List[PathLike] = ["--"]
if paths:
paths_tup: Tuple[PathLike, ...]
if isinstance(paths, (str, os.PathLike)):
paths_tup = (paths,)
else:
paths_tup = tuple(paths)
args_list.extend(paths_tup)
# END if paths
proc = repo.git.rev_list(rev, args_list, as_process=True, **kwargs)
return cls._iter_from_process_or_stream(repo, proc)
def iter_parents(self, paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any) -> Iterator["Commit"]:
R"""Iterate _all_ parents of this commit.
:param paths:
Optional path or list of paths limiting the :class:`Commit`\s to those that
contain at least one of the paths.
:param kwargs:
All arguments allowed by :manpage:`git-rev-list(1)`.
:return:
Iterator yielding :class:`Commit` objects which are parents of ``self``
"""
# skip ourselves
skip = kwargs.get("skip", 1)
if skip == 0: # skip ourselves
skip = 1
kwargs["skip"] = skip
return self.iter_items(self.repo, self, paths, **kwargs)
@property
def stats(self) -> Stats:
"""Create a git stat from changes between this commit and its first parent
or from all changes done if this is the very first commit.
:return:
:class:`Stats`
"""
def process_lines(lines: List[str]) -> str:
text = ""
for file_info, line in zip(lines, lines[len(lines) // 2 :]):
change_type = file_info.split("\t")[0][-1]
(insertions, deletions, filename) = line.split("\t")
text += "%s\t%s\t%s\t%s\n" % (change_type, insertions, deletions, filename)
return text
if not self.parents:
lines = self.repo.git.diff_tree(
self.hexsha, "--", numstat=True, no_renames=True, root=True, raw=True
).splitlines()[1:]
text = process_lines(lines)
else:
lines = self.repo.git.diff(
self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True, raw=True
).splitlines()
text = process_lines(lines)
return Stats._list_from_string(self.repo, text)
@property
def trailers(self) -> Dict[str, str]:
"""Deprecated. Get the trailers of the message as a dictionary.
:note:
This property is deprecated, please use either :attr:`trailers_list` or
:attr:`trailers_dict`.
:return:
Dictionary containing whitespace stripped trailer information.
Only contains the latest instance of each trailer key.
"""
warnings.warn(
"Commit.trailers is deprecated, use Commit.trailers_list or Commit.trailers_dict instead",
DeprecationWarning,
stacklevel=2,
)
return {k: v[0] for k, v in self.trailers_dict.items()}
@property
def trailers_list(self) -> List[Tuple[str, str]]:
"""Get the trailers of the message as a list.
Git messages can contain trailer information that are similar to :rfc:`822`
e-mail headers. See :manpage:`git-interpret-trailers(1)`.
This function calls ``git interpret-trailers --parse`` onto the message to
extract the trailer information, returns the raw trailer data as a list.
Valid message with trailer::
Subject line
some body information
another information
key1: value1.1
key1: value1.2
key2 : value 2 with inner spaces
Returned list will look like this::
[
("key1", "value1.1"),
("key1", "value1.2"),
("key2", "value 2 with inner spaces"),
]
:return:
List containing key-value tuples of whitespace stripped trailer information.
"""
cmd = ["git", "interpret-trailers", "--parse"]
proc: Git.AutoInterrupt = self.repo.git.execute( # type: ignore[call-overload]
cmd,
as_process=True,
istream=PIPE,
)
trailer: str = proc.communicate(str(self.message).encode())[0].decode("utf8")
trailer = trailer.strip()
if not trailer:
return []
trailer_list = []
for t in trailer.split("\n"):
key, val = t.split(":", 1)
trailer_list.append((key.strip(), val.strip()))
return trailer_list
@property
def trailers_dict(self) -> Dict[str, List[str]]:
"""Get the trailers of the message as a dictionary.
Git messages can contain trailer information that are similar to :rfc:`822`
e-mail headers. See :manpage:`git-interpret-trailers(1)`.
This function calls ``git interpret-trailers --parse`` onto the message to
extract the trailer information. The key value pairs are stripped of leading and
trailing whitespaces before they get saved into a dictionary.
Valid message with trailer::
Subject line
some body information
another information
key1: value1.1
key1: value1.2
key2 : value 2 with inner spaces
Returned dictionary will look like this::
{
"key1": ["value1.1", "value1.2"],
"key2": ["value 2 with inner spaces"],
}
:return:
Dictionary containing whitespace stripped trailer information, mapping
trailer keys to a list of their corresponding values.
"""
d = defaultdict(list)
for key, val in self.trailers_list:
d[key].append(val)
return dict(d)
@classmethod
def _iter_from_process_or_stream(cls, repo: "Repo", proc_or_stream: Union[Popen, IO]) -> Iterator["Commit"]:
"""Parse out commit information into a list of :class:`Commit` objects.
We expect one line per commit, and parse the actual commit information directly
from our lighting fast object database.
:param proc:
:manpage:`git-rev-list(1)` process instance - one sha per line.
:return:
Iterator supplying :class:`Commit` objects
"""
# def is_proc(inp) -> TypeGuard[Popen]:
# return hasattr(proc_or_stream, 'wait') and not hasattr(proc_or_stream, 'readline')
# def is_stream(inp) -> TypeGuard[IO]:
# return hasattr(proc_or_stream, 'readline')
if hasattr(proc_or_stream, "wait"):
proc_or_stream = cast(Popen, proc_or_stream)
if proc_or_stream.stdout is not None:
stream = proc_or_stream.stdout
elif hasattr(proc_or_stream, "readline"):
proc_or_stream = cast(IO, proc_or_stream) # type: ignore[redundant-cast]
stream = proc_or_stream
readline = stream.readline
while True:
line = readline()
if not line:
break
hexsha = line.strip()
if len(hexsha) > 40:
# Split additional information, as returned by bisect for instance.
hexsha, _ = line.split(None, 1)
# END handle extra info
assert len(hexsha) == 40, "Invalid line: %s" % hexsha
yield cls(repo, hex_to_bin(hexsha))
# END for each line in stream
# TODO: Review this - it seems process handling got a bit out of control due to
# many developers trying to fix the open file handles issue.
if hasattr(proc_or_stream, "wait"):
proc_or_stream = cast(Popen, proc_or_stream)
finalize_process(proc_or_stream)
@classmethod
def create_from_tree(
cls,
repo: "Repo",
tree: Union[Tree, str],
message: str,
parent_commits: Union[None, List["Commit"]] = None,
head: bool = False,
author: Union[None, Actor] = None,
committer: Union[None, Actor] = None,
author_date: Union[None, str, datetime.datetime] = None,
commit_date: Union[None, str, datetime.datetime] = None,
) -> "Commit":
"""Commit the given tree, creating a :class:`Commit` object.
:param repo:
:class:`~git.repo.base.Repo` object the commit should be part of.
:param tree:
:class:`~git.objects.tree.Tree` object or hex or bin sha.
The tree of the new commit.
:param message:
Commit message. It may be an empty string if no message is provided. It will
be converted to a string, in any case.
:param parent_commits:
Optional :class:`Commit` objects to use as parents for the new commit. If
empty list, the commit will have no parents at all and become a root commit.
If ``None``, the current head commit will be the parent of the new commit
object.
:param head:
If ``True``, the HEAD will be advanced to the new commit automatically.
Otherwise the HEAD will remain pointing on the previous commit. This could
lead to undesired results when diffing files.
:param author:
The name of the author, optional.
If unset, the repository configuration is used to obtain this value.
:param committer:
The name of the committer, optional.
If unset, the repository configuration is used to obtain this value.
:param author_date:
The timestamp for the author field.
:param commit_date:
The timestamp for the committer field.
:return:
:class:`Commit` object representing the new commit.
:note:
Additional information about the committer and author are taken from the
environment or from the git configuration. See :manpage:`git-commit-tree(1)`
for more information.
"""
if parent_commits is None:
try:
parent_commits = [repo.head.commit]
except ValueError:
# Empty repositories have no head commit.
parent_commits = []
# END handle parent commits
else:
for p in parent_commits:
if not isinstance(p, cls):
raise ValueError(f"Parent commit '{p!r}' must be of type {cls}")
# END check parent commit types
# END if parent commits are unset
# Retrieve all additional information, create a commit object, and serialize it.
# Generally:
# * Environment variables override configuration values.
# * Sensible defaults are set according to the git documentation.
# COMMITTER AND AUTHOR INFO
cr = repo.config_reader()
env = os.environ
committer = committer or Actor.committer(cr)
author = author or Actor.author(cr)
# PARSE THE DATES
unix_time = int(time())
is_dst = daylight and localtime().tm_isdst > 0
offset = altzone if is_dst else timezone
author_date_str = env.get(cls.env_author_date, "")
if author_date:
author_time, author_offset = parse_date(author_date)
elif author_date_str:
author_time, author_offset = parse_date(author_date_str)
else:
author_time, author_offset = unix_time, offset
# END set author time
committer_date_str = env.get(cls.env_committer_date, "")
if commit_date:
committer_time, committer_offset = parse_date(commit_date)
elif committer_date_str:
committer_time, committer_offset = parse_date(committer_date_str)
else:
committer_time, committer_offset = unix_time, offset
# END set committer time
# Assume UTF-8 encoding.
enc_section, enc_option = cls.conf_encoding.split(".")
conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
if not isinstance(conf_encoding, str):
raise TypeError("conf_encoding could not be coerced to str")
# If the tree is no object, make sure we create one - otherwise the created
# commit object is invalid.
if isinstance(tree, str):
tree = repo.tree(tree)
# END tree conversion
# CREATE NEW COMMIT
new_commit = cls(
repo,
cls.NULL_BIN_SHA,
tree,
author,
author_time,
author_offset,
committer,
committer_time,
committer_offset,
message,
parent_commits,
conf_encoding,
)
new_commit.binsha = cls._calculate_sha_(repo, new_commit)
if head:
# Need late import here, importing git at the very beginning throws as
# well...
import git.refs
try:
repo.head.set_commit(new_commit, logmsg=message)
except ValueError:
# head is not yet set to the ref our HEAD points to.
# Happens on first commit.
master = git.refs.Head.create(
repo,
repo.head.ref,
new_commit,
logmsg="commit (initial): %s" % message,
)
repo.head.set_reference(master, logmsg="commit: Switching to %s" % master)
# END handle empty repositories
# END advance head handling
return new_commit
# { Serializable Implementation
def _serialize(self, stream: BytesIO) -> "Commit":
write = stream.write
write(("tree %s\n" % self.tree).encode("ascii"))
for p in self.parents:
write(("parent %s\n" % p).encode("ascii"))
a = self.author
aname = a.name
c = self.committer
fmt = "%s %s <%s> %s %s\n"
write(
(
fmt
% (
"author",
aname,
a.email,
self.authored_date,
altz_to_utctz_str(self.author_tz_offset),
)
).encode(self.encoding)
)
# Encode committer.
aname = c.name
write(
(
fmt
% (
"committer",
aname,
c.email,
self.committed_date,
altz_to_utctz_str(self.committer_tz_offset),
)
).encode(self.encoding)
)
if self.encoding != self.default_encoding:
write(("encoding %s\n" % self.encoding).encode("ascii"))
try:
if self.__getattribute__("gpgsig"):
write(b"gpgsig")
for sigline in self.gpgsig.rstrip("\n").split("\n"):
write((" " + sigline + "\n").encode("ascii"))
except AttributeError:
pass
write(b"\n")
# Write plain bytes, be sure its encoded according to our encoding.
if isinstance(self.message, str):
write(self.message.encode(self.encoding))
else:
write(self.message)
# END handle encoding
return self
def _deserialize(self, stream: BytesIO) -> "Commit":
readline = stream.readline
self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, "")
self.parents = []
next_line = None
while True:
parent_line = readline()
if not parent_line.startswith(b"parent"):
next_line = parent_line
break
# END abort reading parents
self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode("ascii"))))
# END for each parent line
self.parents = tuple(self.parents)
# We don't know actual author encoding before we have parsed it, so keep the
# lines around.
author_line = next_line
committer_line = readline()
# We might run into one or more mergetag blocks, skip those for now.
next_line = readline()
while next_line.startswith(b"mergetag "):
next_line = readline()
while next_line.startswith(b" "):
next_line = readline()
# END skip mergetags
# Now we can have the encoding line, or an empty line followed by the optional
# message.
self.encoding = self.default_encoding
self.gpgsig = ""
# Read headers.
enc = next_line
buf = enc.strip()
while buf:
if buf[0:10] == b"encoding ":
self.encoding = buf[buf.find(b" ") + 1 :].decode(self.encoding, "ignore")
elif buf[0:7] == b"gpgsig ":
sig = buf[buf.find(b" ") + 1 :] + b"\n"
is_next_header = False
while True:
sigbuf = readline()
if not sigbuf:
break
if sigbuf[0:1] != b" ":
buf = sigbuf.strip()
is_next_header = True
break
sig += sigbuf[1:]
# END read all signature
self.gpgsig = sig.rstrip(b"\n").decode(self.encoding, "ignore")
if is_next_header:
continue
buf = readline().strip()
# Decode the author's name.
try:
(
self.author,
self.authored_date,
self.author_tz_offset,
) = parse_actor_and_date(author_line.decode(self.encoding, "replace"))
except UnicodeDecodeError:
_logger.error(
"Failed to decode author line '%s' using encoding %s",
author_line,
self.encoding,
exc_info=True,
)
try:
(
self.committer,
self.committed_date,
self.committer_tz_offset,
) = parse_actor_and_date(committer_line.decode(self.encoding, "replace"))
except UnicodeDecodeError:
_logger.error(
"Failed to decode committer line '%s' using encoding %s",
committer_line,
self.encoding,
exc_info=True,
)
# END handle author's encoding
# A stream from our data simply gives us the plain message.
# The end of our message stream is marked with a newline that we strip.
self.message = stream.read()
try:
self.message = self.message.decode(self.encoding, "replace")
except UnicodeDecodeError:
_logger.error(
"Failed to decode message '%s' using encoding %s",
self.message,
self.encoding,
exc_info=True,
)
# END exception handling
return self
# } END serializable implementation
@property
def co_authors(self) -> List[Actor]:
"""Search the commit message for any co-authors of this commit.
Details on co-authors:
https://github.blog/2018-01-29-commit-together-with-co-authors/
:return:
List of co-authors for this commit (as :class:`~git.util.Actor` objects).
"""
co_authors = []
if self.message:
results = re.findall(
r"^Co-authored-by: (.*) <(.*?)>$",
self.message,
re.MULTILINE,
)
for author in results:
co_authors.append(Actor(*author))
return co_authors

View File

@@ -0,0 +1,281 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Functions that are supposed to be as fast as possible."""
__all__ = [
"tree_to_stream",
"tree_entries_from_data",
"traverse_trees_recursive",
"traverse_tree_recursive",
]
from stat import S_ISDIR
from git.compat import safe_decode, defenc
# typing ----------------------------------------------
from typing import (
Callable,
List,
MutableSequence,
Sequence,
Tuple,
TYPE_CHECKING,
Union,
overload,
)
if TYPE_CHECKING:
from _typeshed import ReadableBuffer
from git import GitCmdObjectDB
EntryTup = Tuple[bytes, int, str] # Same as TreeCacheTup in tree.py.
EntryTupOrNone = Union[EntryTup, None]
# ---------------------------------------------------
def tree_to_stream(entries: Sequence[EntryTup], write: Callable[["ReadableBuffer"], Union[int, None]]) -> None:
"""Write the given list of entries into a stream using its ``write`` method.
:param entries:
**Sorted** list of tuples with (binsha, mode, name).
:param write:
A ``write`` method which takes a data string.
"""
ord_zero = ord("0")
bit_mask = 7 # 3 bits set.
for binsha, mode, name in entries:
mode_str = b""
for i in range(6):
mode_str = bytes([((mode >> (i * 3)) & bit_mask) + ord_zero]) + mode_str
# END for each 8 octal value
# git slices away the first octal if it's zero.
if mode_str[0] == ord_zero:
mode_str = mode_str[1:]
# END save a byte
# Here it comes: If the name is actually unicode, the replacement below will not
# work as the binsha is not part of the ascii unicode encoding - hence we must
# convert to an UTF-8 string for it to work properly. According to my tests,
# this is exactly what git does, that is it just takes the input literally,
# which appears to be UTF-8 on linux.
if isinstance(name, str):
name_bytes = name.encode(defenc)
else:
name_bytes = name # type: ignore[unreachable] # check runtime types - is always str?
write(b"".join((mode_str, b" ", name_bytes, b"\0", binsha)))
# END for each item
def tree_entries_from_data(data: bytes) -> List[EntryTup]:
"""Read the binary representation of a tree and returns tuples of
:class:`~git.objects.tree.Tree` items.
:param data:
Data block with tree data (as bytes).
:return:
list(tuple(binsha, mode, tree_relative_path), ...)
"""
ord_zero = ord("0")
space_ord = ord(" ")
len_data = len(data)
i = 0
out = []
while i < len_data:
mode = 0
# Read Mode
# Some git versions truncate the leading 0, some don't.
# The type will be extracted from the mode later.
while data[i] != space_ord:
# Move existing mode integer up one level being 3 bits and add the actual
# ordinal value of the character.
mode = (mode << 3) + (data[i] - ord_zero)
i += 1
# END while reading mode
# Byte is space now, skip it.
i += 1
# Parse name, it is NULL separated.
ns = i
while data[i] != 0:
i += 1
# END while not reached NULL
# Default encoding for strings in git is UTF-8.
# Only use the respective unicode object if the byte stream was encoded.
name_bytes = data[ns:i]
name = safe_decode(name_bytes)
# Byte is NULL, get next 20.
i += 1
sha = data[i : i + 20]
i = i + 20
out.append((sha, mode, name))
# END for each byte in data stream
return out
def _find_by_name(tree_data: MutableSequence[EntryTupOrNone], name: str, is_dir: bool, start_at: int) -> EntryTupOrNone:
"""Return data entry matching the given name and tree mode or ``None``.
Before the item is returned, the respective data item is set None in the `tree_data`
list to mark it done.
"""
try:
item = tree_data[start_at]
if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
tree_data[start_at] = None
return item
except IndexError:
pass
# END exception handling
for index, item in enumerate(tree_data):
if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
tree_data[index] = None
return item
# END if item matches
# END for each item
return None
@overload
def _to_full_path(item: None, path_prefix: str) -> None: ...
@overload
def _to_full_path(item: EntryTup, path_prefix: str) -> EntryTup: ...
def _to_full_path(item: EntryTupOrNone, path_prefix: str) -> EntryTupOrNone:
"""Rebuild entry with given path prefix."""
if not item:
return item
return (item[0], item[1], path_prefix + item[2])
def traverse_trees_recursive(
odb: "GitCmdObjectDB", tree_shas: Sequence[Union[bytes, None]], path_prefix: str
) -> List[Tuple[EntryTupOrNone, ...]]:
"""
:return:
List of list with entries according to the given binary tree-shas.
The result is encoded in a list
of n tuple|None per blob/commit, (n == len(tree_shas)), where:
* [0] == 20 byte sha
* [1] == mode as int
* [2] == path relative to working tree root
The entry tuple is ``None`` if the respective blob/commit did not exist in the
given tree.
:param tree_shas:
Iterable of shas pointing to trees. All trees must be on the same level.
A tree-sha may be ``None``, in which case ``None``.
:param path_prefix:
A prefix to be added to the returned paths on this level.
Set it ``""`` for the first iteration.
:note:
The ordering of the returned items will be partially lost.
"""
trees_data: List[List[EntryTupOrNone]] = []
nt = len(tree_shas)
for tree_sha in tree_shas:
if tree_sha is None:
data: List[EntryTupOrNone] = []
else:
# Make new list for typing as list invariant.
data = list(tree_entries_from_data(odb.stream(tree_sha).read()))
# END handle muted trees
trees_data.append(data)
# END for each sha to get data for
out: List[Tuple[EntryTupOrNone, ...]] = []
# Find all matching entries and recursively process them together if the match is a
# tree. If the match is a non-tree item, put it into the result.
# Processed items will be set None.
for ti, tree_data in enumerate(trees_data):
for ii, item in enumerate(tree_data):
if not item:
continue
# END skip already done items
entries: List[EntryTupOrNone]
entries = [None for _ in range(nt)]
entries[ti] = item
_sha, mode, name = item
is_dir = S_ISDIR(mode) # Type mode bits
# Find this item in all other tree data items.
# Wrap around, but stop one before our current index, hence ti+nt, not
# ti+1+nt.
for tio in range(ti + 1, ti + nt):
tio = tio % nt
entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
# END for each other item data
# If we are a directory, enter recursion.
if is_dir:
out.extend(
traverse_trees_recursive(
odb,
[((ei and ei[0]) or None) for ei in entries],
path_prefix + name + "/",
)
)
else:
out.append(tuple(_to_full_path(e, path_prefix) for e in entries))
# END handle recursion
# Finally mark it done.
tree_data[ii] = None
# END for each item
# We are done with one tree, set all its data empty.
del tree_data[:]
# END for each tree_data chunk
return out
def traverse_tree_recursive(odb: "GitCmdObjectDB", tree_sha: bytes, path_prefix: str) -> List[EntryTup]:
"""
:return:
List of entries of the tree pointed to by the binary `tree_sha`.
An entry has the following format:
* [0] 20 byte sha
* [1] mode as int
* [2] path relative to the repository
:param path_prefix:
Prefix to prepend to the front of all returned paths.
"""
entries = []
data = tree_entries_from_data(odb.stream(tree_sha).read())
# Unpacking/packing is faster than accessing individual items.
for sha, mode, name in data:
if S_ISDIR(mode):
entries.extend(traverse_tree_recursive(odb, sha, path_prefix + name + "/"))
else:
entries.append((sha, mode, path_prefix + name))
# END for each item
return entries

View File

@@ -0,0 +1,7 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["Submodule", "UpdateProgress", "RootModule", "RootUpdateProgress"]
from .base import Submodule, UpdateProgress
from .root import RootModule, RootUpdateProgress

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,467 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["RootModule", "RootUpdateProgress"]
import logging
import git
from git.exc import InvalidGitRepositoryError
from .base import Submodule, UpdateProgress
from .util import find_first_remote_branch
# typing -------------------------------------------------------------------
from typing import TYPE_CHECKING, Union
from git.types import Commit_ish
if TYPE_CHECKING:
from git.repo import Repo
from git.util import IterableList
# ----------------------------------------------------------------------------
_logger = logging.getLogger(__name__)
class RootUpdateProgress(UpdateProgress):
"""Utility class which adds more opcodes to
:class:`~git.objects.submodule.base.UpdateProgress`."""
REMOVE, PATHCHANGE, BRANCHCHANGE, URLCHANGE = [
1 << x for x in range(UpdateProgress._num_op_codes, UpdateProgress._num_op_codes + 4)
]
_num_op_codes = UpdateProgress._num_op_codes + 4
__slots__ = ()
BEGIN = RootUpdateProgress.BEGIN
END = RootUpdateProgress.END
REMOVE = RootUpdateProgress.REMOVE
BRANCHCHANGE = RootUpdateProgress.BRANCHCHANGE
URLCHANGE = RootUpdateProgress.URLCHANGE
PATHCHANGE = RootUpdateProgress.PATHCHANGE
class RootModule(Submodule):
"""A (virtual) root of all submodules in the given repository.
This can be used to more easily traverse all submodules of the
superproject (master repository).
"""
__slots__ = ()
k_root_name = "__ROOT__"
def __init__(self, repo: "Repo") -> None:
# repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None)
super().__init__(
repo,
binsha=self.NULL_BIN_SHA,
mode=self.k_default_mode,
path="",
name=self.k_root_name,
parent_commit=repo.head.commit,
url="",
branch_path=git.Head.to_full_path(self.k_head_default),
)
def _clear_cache(self) -> None:
"""May not do anything."""
pass
# { Interface
def update( # type: ignore[override]
self,
previous_commit: Union[Commit_ish, str, None] = None,
recursive: bool = True,
force_remove: bool = False,
init: bool = True,
to_latest_revision: bool = False,
progress: Union[None, "RootUpdateProgress"] = None,
dry_run: bool = False,
force_reset: bool = False,
keep_going: bool = False,
) -> "RootModule":
"""Update the submodules of this repository to the current HEAD commit.
This method behaves smartly by determining changes of the path of a submodule's
repository, next to changes to the to-be-checked-out commit or the branch to be
checked out. This works if the submodule's ID does not change.
Additionally it will detect addition and removal of submodules, which will be
handled gracefully.
:param previous_commit:
If set to a commit-ish, the commit we should use as the previous commit the
HEAD pointed to before it was set to the commit it points to now.
If ``None``, it defaults to ``HEAD@{1}`` otherwise.
:param recursive:
If ``True``, the children of submodules will be updated as well using the
same technique.
:param force_remove:
If submodules have been deleted, they will be forcibly removed. Otherwise
the update may fail if a submodule's repository cannot be deleted as changes
have been made to it.
(See :meth:`Submodule.update <git.objects.submodule.base.Submodule.update>`
for more information.)
:param init:
If we encounter a new module which would need to be initialized, then do it.
:param to_latest_revision:
If ``True``, instead of checking out the revision pointed to by this
submodule's sha, the checked out tracking branch will be merged with the
latest remote branch fetched from the repository's origin.
Unless `force_reset` is specified, a local tracking branch will never be
reset into its past, therefore the remote branch must be in the future for
this to have an effect.
:param force_reset:
If ``True``, submodules may checkout or reset their branch even if the
repository has pending changes that would be overwritten, or if the local
tracking branch is in the future of the remote tracking branch and would be
reset into its past.
:param progress:
:class:`RootUpdateProgress` instance, or ``None`` if no progress should be
sent.
:param dry_run:
If ``True``, operations will not actually be performed. Progress messages
will change accordingly to indicate the WOULD DO state of the operation.
:param keep_going:
If ``True``, we will ignore but log all errors, and keep going recursively.
Unless `dry_run` is set as well, `keep_going` could cause
subsequent/inherited errors you wouldn't see otherwise.
In conjunction with `dry_run`, this can be useful to anticipate all errors
when updating submodules.
:return:
self
"""
if self.repo.bare:
raise InvalidGitRepositoryError("Cannot update submodules in bare repositories")
# END handle bare
if progress is None:
progress = RootUpdateProgress()
# END ensure progress is set
prefix = ""
if dry_run:
prefix = "DRY-RUN: "
repo = self.repo
try:
# SETUP BASE COMMIT
###################
cur_commit = repo.head.commit
if previous_commit is None:
try:
previous_commit = repo.commit(repo.head.log_entry(-1).oldhexsha)
if previous_commit.binsha == previous_commit.NULL_BIN_SHA:
raise IndexError
# END handle initial commit
except IndexError:
# In new repositories, there is no previous commit.
previous_commit = cur_commit
# END exception handling
else:
previous_commit = repo.commit(previous_commit) # Obtain commit object.
# END handle previous commit
psms: "IterableList[Submodule]" = self.list_items(repo, parent_commit=previous_commit)
sms: "IterableList[Submodule]" = self.list_items(repo)
spsms = set(psms)
ssms = set(sms)
# HANDLE REMOVALS
###################
rrsm = spsms - ssms
len_rrsm = len(rrsm)
for i, rsm in enumerate(rrsm):
op = REMOVE
if i == 0:
op |= BEGIN
# END handle begin
# Fake it into thinking its at the current commit to allow deletion
# of previous module. Trigger the cache to be updated before that.
progress.update(
op,
i,
len_rrsm,
prefix + "Removing submodule %r at %s" % (rsm.name, rsm.abspath),
)
rsm._parent_commit = repo.head.commit
rsm.remove(
configuration=False,
module=True,
force=force_remove,
dry_run=dry_run,
)
if i == len_rrsm - 1:
op |= END
# END handle end
progress.update(op, i, len_rrsm, prefix + "Done removing submodule %r" % rsm.name)
# END for each removed submodule
# HANDLE PATH RENAMES
#####################
# URL changes + branch changes.
csms = spsms & ssms
len_csms = len(csms)
for i, csm in enumerate(csms):
psm: "Submodule" = psms[csm.name]
sm: "Submodule" = sms[csm.name]
# PATH CHANGES
##############
if sm.path != psm.path and psm.module_exists():
progress.update(
BEGIN | PATHCHANGE,
i,
len_csms,
prefix + "Moving repository of submodule %r from %s to %s" % (sm.name, psm.abspath, sm.abspath),
)
# Move the module to the new path.
if not dry_run:
psm.move(sm.path, module=True, configuration=False)
# END handle dry_run
progress.update(
END | PATHCHANGE,
i,
len_csms,
prefix + "Done moving repository of submodule %r" % sm.name,
)
# END handle path changes
if sm.module_exists():
# HANDLE URL CHANGE
###################
if sm.url != psm.url:
# Add the new remote, remove the old one.
# This way, if the url just changes, the commits will not have
# to be re-retrieved.
nn = "__new_origin__"
smm = sm.module()
rmts = smm.remotes
# Don't do anything if we already have the url we search in
# place.
if len([r for r in rmts if r.url == sm.url]) == 0:
progress.update(
BEGIN | URLCHANGE,
i,
len_csms,
prefix + "Changing url of submodule %r from %s to %s" % (sm.name, psm.url, sm.url),
)
if not dry_run:
assert nn not in [r.name for r in rmts]
smr = smm.create_remote(nn, sm.url)
smr.fetch(progress=progress)
# If we have a tracking branch, it should be available
# in the new remote as well.
if len([r for r in smr.refs if r.remote_head == sm.branch_name]) == 0:
raise ValueError(
"Submodule branch named %r was not available in new submodule remote at %r"
% (sm.branch_name, sm.url)
)
# END head is not detached
# Now delete the changed one.
rmt_for_deletion = None
for remote in rmts:
if remote.url == psm.url:
rmt_for_deletion = remote
break
# END if urls match
# END for each remote
# If we didn't find a matching remote, but have exactly
# one, we can safely use this one.
if rmt_for_deletion is None:
if len(rmts) == 1:
rmt_for_deletion = rmts[0]
else:
# If we have not found any remote with the
# original URL we may not have a name. This is a
# special case, and its okay to fail here.
# Alternatively we could just generate a unique
# name and leave all existing ones in place.
raise InvalidGitRepositoryError(
"Couldn't find original remote-repo at url %r" % psm.url
)
# END handle one single remote
# END handle check we found a remote
orig_name = rmt_for_deletion.name
smm.delete_remote(rmt_for_deletion)
# NOTE: Currently we leave tags from the deleted remotes
# as well as separate tracking branches in the possibly
# totally changed repository (someone could have changed
# the url to another project). At some point, one might
# want to clean it up, but the danger is high to remove
# stuff the user has added explicitly.
# Rename the new remote back to what it was.
smr.rename(orig_name)
# Early on, we verified that the our current tracking
# branch exists in the remote. Now we have to ensure
# that the sha we point to is still contained in the new
# remote tracking branch.
smsha = sm.binsha
found = False
rref = smr.refs[self.branch_name]
for c in rref.commit.traverse():
if c.binsha == smsha:
found = True
break
# END traverse all commits in search for sha
# END for each commit
if not found:
# Adjust our internal binsha to use the one of the
# remote this way, it will be checked out in the
# next step. This will change the submodule relative
# to us, so the user will be able to commit the
# change easily.
_logger.warning(
"Current sha %s was not contained in the tracking\
branch at the new remote, setting it the the remote's tracking branch",
sm.hexsha,
)
sm.binsha = rref.commit.binsha
# END reset binsha
# NOTE: All checkout is performed by the base
# implementation of update.
# END handle dry_run
progress.update(
END | URLCHANGE,
i,
len_csms,
prefix + "Done adjusting url of submodule %r" % (sm.name),
)
# END skip remote handling if new url already exists in module
# END handle url
# HANDLE PATH CHANGES
#####################
if sm.branch_path != psm.branch_path:
# Finally, create a new tracking branch which tracks the new
# remote branch.
progress.update(
BEGIN | BRANCHCHANGE,
i,
len_csms,
prefix
+ "Changing branch of submodule %r from %s to %s"
% (sm.name, psm.branch_path, sm.branch_path),
)
if not dry_run:
smm = sm.module()
smmr = smm.remotes
# As the branch might not exist yet, we will have to fetch
# all remotes to be sure...
for remote in smmr:
remote.fetch(progress=progress)
# END for each remote
try:
tbr = git.Head.create(
smm,
sm.branch_name,
logmsg="branch: Created from HEAD",
)
except OSError:
# ...or reuse the existing one.
tbr = git.Head(smm, sm.branch_path)
# END ensure tracking branch exists
tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name))
# NOTE: All head-resetting is done in the base
# implementation of update but we will have to checkout the
# new branch here. As it still points to the currently
# checked out commit, we don't do any harm.
# As we don't want to update working-tree or index, changing
# the ref is all there is to do.
smm.head.reference = tbr
# END handle dry_run
progress.update(
END | BRANCHCHANGE,
i,
len_csms,
prefix + "Done changing branch of submodule %r" % sm.name,
)
# END handle branch
# END handle
# END for each common submodule
except Exception as err:
if not keep_going:
raise
_logger.error(str(err))
# END handle keep_going
# FINALLY UPDATE ALL ACTUAL SUBMODULES
######################################
for sm in sms:
# Update the submodule using the default method.
sm.update(
recursive=False,
init=init,
to_latest_revision=to_latest_revision,
progress=progress,
dry_run=dry_run,
force=force_reset,
keep_going=keep_going,
)
# Update recursively depth first - question is which inconsistent state will
# be better in case it fails somewhere. Defective branch or defective depth.
# The RootSubmodule type will never process itself, which was done in the
# previous expression.
if recursive:
# The module would exist by now if we are not in dry_run mode.
if sm.module_exists():
type(self)(sm.module()).update(
recursive=True,
force_remove=force_remove,
init=init,
to_latest_revision=to_latest_revision,
progress=progress,
dry_run=dry_run,
force_reset=force_reset,
keep_going=keep_going,
)
# END handle dry_run
# END handle recursive
# END for each submodule to update
return self
def module(self) -> "Repo":
""":return: The actual repository containing the submodules"""
return self.repo
# } END interface
# } END classes

View File

@@ -0,0 +1,121 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = [
"sm_section",
"sm_name",
"mkhead",
"find_first_remote_branch",
"SubmoduleConfigParser",
]
from io import BytesIO
import weakref
import git
from git.config import GitConfigParser
from git.exc import InvalidGitRepositoryError
# typing -----------------------------------------------------------------------
from typing import Any, Sequence, TYPE_CHECKING, Union
from git.types import PathLike
if TYPE_CHECKING:
from weakref import ReferenceType
from git.refs import Head, RemoteReference
from git.remote import Remote
from git.repo import Repo
from .base import Submodule
# { Utilities
def sm_section(name: str) -> str:
""":return: Section title used in ``.gitmodules`` configuration file"""
return f'submodule "{name}"'
def sm_name(section: str) -> str:
""":return: Name of the submodule as parsed from the section name"""
section = section.strip()
return section[11:-1]
def mkhead(repo: "Repo", path: PathLike) -> "Head":
""":return: New branch/head instance"""
return git.Head(repo, git.Head.to_full_path(path))
def find_first_remote_branch(remotes: Sequence["Remote"], branch_name: str) -> "RemoteReference":
"""Find the remote branch matching the name of the given branch or raise
:exc:`~git.exc.InvalidGitRepositoryError`."""
for remote in remotes:
try:
return remote.refs[branch_name]
except IndexError:
continue
# END exception handling
# END for remote
raise InvalidGitRepositoryError("Didn't find remote branch '%r' in any of the given remotes" % branch_name)
# } END utilities
# { Classes
class SubmoduleConfigParser(GitConfigParser):
"""Catches calls to :meth:`~git.config.GitConfigParser.write`, and updates the
``.gitmodules`` blob in the index with the new data, if we have written into a
stream.
Otherwise it would add the local file to the index to make it correspond with the
working tree. Additionally, the cache must be cleared.
Please note that no mutating method will work in bare mode.
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
self._smref: Union["ReferenceType[Submodule]", None] = None
self._index = None
self._auto_write = True
super().__init__(*args, **kwargs)
# { Interface
def set_submodule(self, submodule: "Submodule") -> None:
"""Set this instance's submodule. It must be called before the first write
operation begins."""
self._smref = weakref.ref(submodule)
def flush_to_index(self) -> None:
"""Flush changes in our configuration file to the index."""
assert self._smref is not None
# Should always have a file here.
assert not isinstance(self._file_or_files, BytesIO)
sm = self._smref()
if sm is not None:
index = self._index
if index is None:
index = sm.repo.index
# END handle index
index.add([sm.k_modules_file], write=self._auto_write)
sm._clear_cache()
# END handle weakref
# } END interface
# { Overridden Methods
def write(self) -> None: # type: ignore[override]
rval: None = super().write()
self.flush_to_index()
return rval
# END overridden methods
# } END classes

View File

@@ -0,0 +1,140 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Provides an :class:`~git.objects.base.Object`-based type for annotated tags.
This defines the :class:`TagObject` class, which represents annotated tags.
For lightweight tags, see the :mod:`git.refs.tag` module.
"""
__all__ = ["TagObject"]
import sys
from git.compat import defenc
from git.util import Actor, hex_to_bin
from . import base
from .util import get_object_type_by_name, parse_actor_and_date
# typing ----------------------------------------------
from typing import List, TYPE_CHECKING, Union
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
if TYPE_CHECKING:
from git.repo import Repo
from .blob import Blob
from .commit import Commit
from .tree import Tree
# ---------------------------------------------------
class TagObject(base.Object):
"""Annotated (i.e. non-lightweight) tag carrying additional information about an
object we are pointing to.
See :manpage:`gitglossary(7)` on "tag object":
https://git-scm.com/docs/gitglossary#def_tag_object
"""
type: Literal["tag"] = "tag"
__slots__ = (
"object",
"tag",
"tagger",
"tagged_date",
"tagger_tz_offset",
"message",
)
def __init__(
self,
repo: "Repo",
binsha: bytes,
object: Union[None, base.Object] = None,
tag: Union[None, str] = None,
tagger: Union[None, Actor] = None,
tagged_date: Union[int, None] = None,
tagger_tz_offset: Union[int, None] = None,
message: Union[str, None] = None,
) -> None: # @ReservedAssignment
"""Initialize a tag object with additional data.
:param repo:
Repository this object is located in.
:param binsha:
20 byte SHA1.
:param object:
:class:`~git.objects.base.Object` instance of object we are pointing to.
:param tag:
Name of this tag.
:param tagger:
:class:`~git.util.Actor` identifying the tagger.
:param tagged_date: int_seconds_since_epoch
The DateTime of the tag creation.
Use :func:`time.gmtime` to convert it into a different format.
:param tagger_tz_offset: int_seconds_west_of_utc
The timezone that the `tagged_date` is in, in a format similar to
:attr:`time.altzone`.
"""
super().__init__(repo, binsha)
if object is not None:
self.object: Union["Commit", "Blob", "Tree", "TagObject"] = object
if tag is not None:
self.tag = tag
if tagger is not None:
self.tagger = tagger
if tagged_date is not None:
self.tagged_date = tagged_date
if tagger_tz_offset is not None:
self.tagger_tz_offset = tagger_tz_offset
if message is not None:
self.message = message
def _set_cache_(self, attr: str) -> None:
"""Cache all our attributes at once."""
if attr in TagObject.__slots__:
ostream = self.repo.odb.stream(self.binsha)
lines: List[str] = ostream.read().decode(defenc, "replace").splitlines()
_obj, hexsha = lines[0].split(" ")
_type_token, type_name = lines[1].split(" ")
object_type = get_object_type_by_name(type_name.encode("ascii"))
self.object = object_type(self.repo, hex_to_bin(hexsha))
self.tag = lines[2][4:] # tag <tag name>
if len(lines) > 3:
tagger_info = lines[3] # tagger <actor> <date>
(
self.tagger,
self.tagged_date,
self.tagger_tz_offset,
) = parse_actor_and_date(tagger_info)
# Line 4 empty - it could mark the beginning of the next header.
# In case there really is no message, it would not exist.
# Otherwise a newline separates header from message.
if len(lines) > 5:
self.message = "\n".join(lines[5:])
else:
self.message = ""
# END check our attributes
else:
super()._set_cache_(attr)

View File

@@ -0,0 +1,414 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["TreeModifier", "Tree"]
import sys
import git.diff as git_diff
from git.util import IterableList, join_path, to_bin_sha
from . import util
from .base import IndexObjUnion, IndexObject
from .blob import Blob
from .fun import tree_entries_from_data, tree_to_stream
from .submodule.base import Submodule
# typing -------------------------------------------------
from typing import (
Any,
Callable,
Dict,
Iterable,
Iterator,
List,
Tuple,
TYPE_CHECKING,
Type,
Union,
cast,
)
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
from git.types import PathLike
if TYPE_CHECKING:
from io import BytesIO
from git.repo import Repo
TreeCacheTup = Tuple[bytes, int, str]
TraversedTreeTup = Union[Tuple[Union["Tree", None], IndexObjUnion, Tuple["Submodule", "Submodule"]]]
# --------------------------------------------------------
cmp: Callable[[str, str], int] = lambda a, b: (a > b) - (a < b)
class TreeModifier:
"""A utility class providing methods to alter the underlying cache in a list-like
fashion.
Once all adjustments are complete, the :attr:`_cache`, which really is a reference
to the cache of a tree, will be sorted. This ensures it will be in a serializable
state.
"""
__slots__ = ("_cache",)
def __init__(self, cache: List[TreeCacheTup]) -> None:
self._cache = cache
def _index_by_name(self, name: str) -> int:
""":return: index of an item with name, or -1 if not found"""
for i, t in enumerate(self._cache):
if t[2] == name:
return i
# END found item
# END for each item in cache
return -1
# { Interface
def set_done(self) -> "TreeModifier":
"""Call this method once you are done modifying the tree information.
This may be called several times, but be aware that each call will cause a sort
operation.
:return:
self
"""
self._cache.sort(key=lambda x: (x[2] + "/") if x[1] == Tree.tree_id << 12 else x[2])
return self
# } END interface
# { Mutators
def add(self, sha: bytes, mode: int, name: str, force: bool = False) -> "TreeModifier":
"""Add the given item to the tree.
If an item with the given name already exists, nothing will be done, but a
:exc:`ValueError` will be raised if the sha and mode of the existing item do not
match the one you add, unless `force` is ``True``.
:param sha:
The 20 or 40 byte sha of the item to add.
:param mode:
:class:`int` representing the stat-compatible mode of the item.
:param force:
If ``True``, an item with your name and information will overwrite any
existing item with the same name, no matter which information it has.
:return:
self
"""
if "/" in name:
raise ValueError("Name must not contain '/' characters")
if (mode >> 12) not in Tree._map_id_to_type:
raise ValueError("Invalid object type according to mode %o" % mode)
sha = to_bin_sha(sha)
index = self._index_by_name(name)
item = (sha, mode, name)
if index == -1:
self._cache.append(item)
else:
if force:
self._cache[index] = item
else:
ex_item = self._cache[index]
if ex_item[0] != sha or ex_item[1] != mode:
raise ValueError("Item %r existed with different properties" % name)
# END handle mismatch
# END handle force
# END handle name exists
return self
def add_unchecked(self, binsha: bytes, mode: int, name: str) -> None:
"""Add the given item to the tree. Its correctness is assumed, so it is the
caller's responsibility to ensure that the input is correct.
For more information on the parameters, see :meth:`add`.
:param binsha:
20 byte binary sha.
"""
assert isinstance(binsha, bytes) and isinstance(mode, int) and isinstance(name, str)
tree_cache = (binsha, mode, name)
self._cache.append(tree_cache)
def __delitem__(self, name: str) -> None:
"""Delete an item with the given name if it exists."""
index = self._index_by_name(name)
if index > -1:
del self._cache[index]
# } END mutators
class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable):
R"""Tree objects represent an ordered list of :class:`~git.objects.blob.Blob`\s and
other :class:`Tree`\s.
See :manpage:`gitglossary(7)` on "tree object":
https://git-scm.com/docs/gitglossary#def_tree_object
Subscripting is supported, as with a list or dict:
* Access a specific blob using the ``tree["filename"]`` notation.
* You may likewise access by index, like ``blob = tree[0]``.
"""
type: Literal["tree"] = "tree"
__slots__ = ("_cache",)
# Actual integer IDs for comparison.
commit_id = 0o16 # Equals stat.S_IFDIR | stat.S_IFLNK - a directory link.
blob_id = 0o10
symlink_id = 0o12
tree_id = 0o04
_map_id_to_type: Dict[int, Type[IndexObjUnion]] = {
commit_id: Submodule,
blob_id: Blob,
symlink_id: Blob,
# Tree ID added once Tree is defined.
}
def __init__(
self,
repo: "Repo",
binsha: bytes,
mode: int = tree_id << 12,
path: Union[PathLike, None] = None,
):
super().__init__(repo, binsha, mode, path)
@classmethod
def _get_intermediate_items(
cls,
index_object: IndexObjUnion,
) -> Union[Tuple["Tree", ...], Tuple[()]]:
if index_object.type == "tree":
return tuple(index_object._iter_convert_to_object(index_object._cache))
return ()
def _set_cache_(self, attr: str) -> None:
if attr == "_cache":
# Set the data when we need it.
ostream = self.repo.odb.stream(self.binsha)
self._cache: List[TreeCacheTup] = tree_entries_from_data(ostream.read())
else:
super()._set_cache_(attr)
# END handle attribute
def _iter_convert_to_object(self, iterable: Iterable[TreeCacheTup]) -> Iterator[IndexObjUnion]:
"""Iterable yields tuples of (binsha, mode, name), which will be converted to
the respective object representation.
"""
for binsha, mode, name in iterable:
path = join_path(self.path, name)
try:
yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path)
except KeyError as e:
raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) from e
# END for each item
def join(self, file: str) -> IndexObjUnion:
"""Find the named object in this tree's contents.
:return:
:class:`~git.objects.blob.Blob`, :class:`Tree`, or
:class:`~git.objects.submodule.base.Submodule`
:raise KeyError:
If the given file or tree does not exist in this tree.
"""
msg = "Blob or Tree named %r not found"
if "/" in file:
tree = self
item = self
tokens = file.split("/")
for i, token in enumerate(tokens):
item = tree[token]
if item.type == "tree":
tree = item
else:
# Safety assertion - blobs are at the end of the path.
if i != len(tokens) - 1:
raise KeyError(msg % file)
return item
# END handle item type
# END for each token of split path
if item == self:
raise KeyError(msg % file)
return item
else:
for info in self._cache:
if info[2] == file: # [2] == name
return self._map_id_to_type[info[1] >> 12](
self.repo, info[0], info[1], join_path(self.path, info[2])
)
# END for each obj
raise KeyError(msg % file)
# END handle long paths
def __truediv__(self, file: str) -> IndexObjUnion:
"""The ``/`` operator is another syntax for joining.
See :meth:`join` for details.
"""
return self.join(file)
@property
def trees(self) -> List["Tree"]:
""":return: list(Tree, ...) List of trees directly below this tree"""
return [i for i in self if i.type == "tree"]
@property
def blobs(self) -> List[Blob]:
""":return: list(Blob, ...) List of blobs directly below this tree"""
return [i for i in self if i.type == "blob"]
@property
def cache(self) -> TreeModifier:
"""
:return:
An object allowing modification of the internal cache. This can be used to
change the tree's contents. When done, make sure you call
:meth:`~TreeModifier.set_done` on the tree modifier, or serialization
behaviour will be incorrect.
:note:
See :class:`TreeModifier` for more information on how to alter the cache.
"""
return TreeModifier(self._cache)
def traverse(
self,
predicate: Callable[[Union[IndexObjUnion, TraversedTreeTup], int], bool] = lambda i, d: True,
prune: Callable[[Union[IndexObjUnion, TraversedTreeTup], int], bool] = lambda i, d: False,
depth: int = -1,
branch_first: bool = True,
visit_once: bool = False,
ignore_self: int = 1,
as_edge: bool = False,
) -> Union[Iterator[IndexObjUnion], Iterator[TraversedTreeTup]]:
"""For documentation, see
`Traversable._traverse() <git.objects.util.Traversable._traverse>`.
Trees are set to ``visit_once = False`` to gain more performance in the
traversal.
"""
# # To typecheck instead of using cast.
# import itertools
# def is_tree_traversed(inp: Tuple) -> TypeGuard[Tuple[Iterator[Union['Tree', 'Blob', 'Submodule']]]]:
# return all(isinstance(x, (Blob, Tree, Submodule)) for x in inp[1])
# ret = super().traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
# ret_tup = itertools.tee(ret, 2)
# assert is_tree_traversed(ret_tup), f"Type is {[type(x) for x in list(ret_tup[0])]}"
# return ret_tup[0]
return cast(
Union[Iterator[IndexObjUnion], Iterator[TraversedTreeTup]],
super()._traverse(
predicate, # type: ignore[arg-type]
prune, # type: ignore[arg-type]
depth,
branch_first,
visit_once,
ignore_self,
),
)
def list_traverse(self, *args: Any, **kwargs: Any) -> IterableList[IndexObjUnion]:
"""
:return:
:class:`~git.util.IterableList` with the results of the traversal as
produced by :meth:`traverse`
Tree -> IterableList[Union[Submodule, Tree, Blob]]
"""
return super()._list_traverse(*args, **kwargs)
# List protocol
def __getslice__(self, i: int, j: int) -> List[IndexObjUnion]:
return list(self._iter_convert_to_object(self._cache[i:j]))
def __iter__(self) -> Iterator[IndexObjUnion]:
return self._iter_convert_to_object(self._cache)
def __len__(self) -> int:
return len(self._cache)
def __getitem__(self, item: Union[str, int, slice]) -> IndexObjUnion:
if isinstance(item, int):
info = self._cache[item]
return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
if isinstance(item, str):
# compatibility
return self.join(item)
# END index is basestring
raise TypeError("Invalid index type: %r" % item)
def __contains__(self, item: Union[IndexObjUnion, PathLike]) -> bool:
if isinstance(item, IndexObject):
for info in self._cache:
if item.binsha == info[0]:
return True
# END compare sha
# END for each entry
# END handle item is index object
# compatibility
# Treat item as repo-relative path.
else:
path = self.path
for info in self._cache:
if item == join_path(path, info[2]):
return True
# END for each item
return False
def __reversed__(self) -> Iterator[IndexObjUnion]:
return reversed(self._iter_convert_to_object(self._cache)) # type: ignore[call-overload]
def _serialize(self, stream: "BytesIO") -> "Tree":
"""Serialize this tree into the stream. Assumes sorted tree data.
:note:
We will assume our tree data to be in a sorted state. If this is not the
case, serialization will not generate a correct tree representation as these
are assumed to be sorted by algorithms.
"""
tree_to_stream(self._cache, stream.write)
return self
def _deserialize(self, stream: "BytesIO") -> "Tree":
self._cache = tree_entries_from_data(stream.read())
return self
# END tree
# Finalize map definition.
Tree._map_id_to_type[Tree.tree_id] = Tree

View File

@@ -0,0 +1,700 @@
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Utility functions for working with git objects."""
__all__ = [
"get_object_type_by_name",
"parse_date",
"parse_actor_and_date",
"ProcessStreamAdapter",
"Traversable",
"altz_to_utctz_str",
"utctz_to_altz",
"verify_utctz",
"Actor",
"tzoffset",
"utc",
]
from abc import ABC, abstractmethod
import calendar
from collections import deque
from datetime import datetime, timedelta, tzinfo
import re
from string import digits
import time
import warnings
from git.util import Actor, IterableList, IterableObj
# typing ------------------------------------------------------------
from typing import (
Any,
Callable,
Deque,
Iterator,
NamedTuple,
Sequence,
TYPE_CHECKING,
Tuple,
Type,
TypeVar,
Union,
cast,
overload,
)
from git.types import Has_id_attribute, Literal
if TYPE_CHECKING:
from io import BytesIO, StringIO
from subprocess import Popen
from git.types import Protocol, runtime_checkable
from .blob import Blob
from .commit import Commit
from .submodule.base import Submodule
from .tag import TagObject
from .tree import TraversedTreeTup, Tree
else:
Protocol = ABC
def runtime_checkable(f):
return f
class TraverseNT(NamedTuple):
depth: int
item: Union["Traversable", "Blob"]
src: Union["Traversable", None]
T_TIobj = TypeVar("T_TIobj", bound="TraversableIterableObj") # For TraversableIterableObj.traverse()
TraversedTup = Union[
Tuple[Union["Traversable", None], "Traversable"], # For Commit, Submodule.
"TraversedTreeTup", # For Tree.traverse().
]
# --------------------------------------------------------------------
ZERO = timedelta(0)
# { Functions
def mode_str_to_int(modestr: Union[bytes, str]) -> int:
"""Convert mode bits from an octal mode string to an integer mode for git.
:param modestr:
String like ``755`` or ``644`` or ``100644`` - only the last 6 chars will be
used.
:return:
String identifying a mode compatible to the mode methods ids of the :mod:`stat`
module regarding the rwx permissions for user, group and other, special flags
and file system flags, such as whether it is a symlink.
"""
mode = 0
for iteration, char in enumerate(reversed(modestr[-6:])):
char = cast(Union[str, int], char)
mode += int(char) << iteration * 3
# END for each char
return mode
def get_object_type_by_name(
object_type_name: bytes,
) -> Union[Type["Commit"], Type["TagObject"], Type["Tree"], Type["Blob"]]:
"""Retrieve the Python class GitPython uses to represent a kind of Git object.
:return:
A type suitable to handle the given as `object_type_name`.
This type can be called create new instances.
:param object_type_name:
Member of :attr:`Object.TYPES <git.objects.base.Object.TYPES>`.
:raise ValueError:
If `object_type_name` is unknown.
"""
if object_type_name == b"commit":
from . import commit
return commit.Commit
elif object_type_name == b"tag":
from . import tag
return tag.TagObject
elif object_type_name == b"blob":
from . import blob
return blob.Blob
elif object_type_name == b"tree":
from . import tree
return tree.Tree
else:
raise ValueError("Cannot handle unknown object type: %s" % object_type_name.decode())
def utctz_to_altz(utctz: str) -> int:
"""Convert a git timezone offset into a timezone offset west of UTC in seconds
(compatible with :attr:`time.altzone`).
:param utctz:
git utc timezone string, e.g. +0200
"""
int_utctz = int(utctz)
seconds = (abs(int_utctz) // 100) * 3600 + (abs(int_utctz) % 100) * 60
return seconds if int_utctz < 0 else -seconds
def altz_to_utctz_str(altz: float) -> str:
"""Convert a timezone offset west of UTC in seconds into a Git timezone offset
string.
:param altz:
Timezone offset in seconds west of UTC.
"""
hours = abs(altz) // 3600
minutes = (abs(altz) % 3600) // 60
sign = "-" if altz >= 60 else "+"
return "{}{:02}{:02}".format(sign, hours, minutes)
def verify_utctz(offset: str) -> str:
"""
:raise ValueError:
If `offset` is incorrect.
:return:
`offset`
"""
fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
if len(offset) != 5:
raise fmt_exc
if offset[0] not in "+-":
raise fmt_exc
if offset[1] not in digits or offset[2] not in digits or offset[3] not in digits or offset[4] not in digits:
raise fmt_exc
# END for each char
return offset
class tzoffset(tzinfo):
def __init__(self, secs_west_of_utc: float, name: Union[None, str] = None) -> None:
self._offset = timedelta(seconds=-secs_west_of_utc)
self._name = name or "fixed"
def __reduce__(self) -> Tuple[Type["tzoffset"], Tuple[float, str]]:
return tzoffset, (-self._offset.total_seconds(), self._name)
def utcoffset(self, dt: Union[datetime, None]) -> timedelta:
return self._offset
def tzname(self, dt: Union[datetime, None]) -> str:
return self._name
def dst(self, dt: Union[datetime, None]) -> timedelta:
return ZERO
utc = tzoffset(0, "UTC")
def from_timestamp(timestamp: float, tz_offset: float) -> datetime:
"""Convert a `timestamp` + `tz_offset` into an aware :class:`~datetime.datetime`
instance."""
utc_dt = datetime.fromtimestamp(timestamp, utc)
try:
local_dt = utc_dt.astimezone(tzoffset(tz_offset))
return local_dt
except ValueError:
return utc_dt
def parse_date(string_date: Union[str, datetime]) -> Tuple[int, int]:
"""Parse the given date as one of the following:
* Aware datetime instance
* Git internal format: timestamp offset
* :rfc:`2822`: ``Thu, 07 Apr 2005 22:13:13 +0200``
* ISO 8601: ``2005-04-07T22:13:13`` - The ``T`` can be a space as well.
:return:
Tuple(int(timestamp_UTC), int(offset)), both in seconds since epoch
:raise ValueError:
If the format could not be understood.
:note:
Date can also be ``YYYY.MM.DD``, ``MM/DD/YYYY`` and ``DD.MM.YYYY``.
"""
if isinstance(string_date, datetime):
if string_date.tzinfo:
utcoffset = cast(timedelta, string_date.utcoffset()) # typeguard, if tzinfoand is not None
offset = -int(utcoffset.total_seconds())
return int(string_date.astimezone(utc).timestamp()), offset
else:
raise ValueError(f"string_date datetime object without tzinfo, {string_date}")
# Git time
try:
if string_date.count(" ") == 1 and string_date.rfind(":") == -1:
timestamp, offset_str = string_date.split()
if timestamp.startswith("@"):
timestamp = timestamp[1:]
timestamp_int = int(timestamp)
return timestamp_int, utctz_to_altz(verify_utctz(offset_str))
else:
offset_str = "+0000" # Local time by default.
if string_date[-5] in "-+":
offset_str = verify_utctz(string_date[-5:])
string_date = string_date[:-6] # skip space as well
# END split timezone info
offset = utctz_to_altz(offset_str)
# Now figure out the date and time portion - split time.
date_formats = []
splitter = -1
if "," in string_date:
date_formats.append("%a, %d %b %Y")
splitter = string_date.rfind(" ")
else:
# ISO plus additional
date_formats.append("%Y-%m-%d")
date_formats.append("%Y.%m.%d")
date_formats.append("%m/%d/%Y")
date_formats.append("%d.%m.%Y")
splitter = string_date.rfind("T")
if splitter == -1:
splitter = string_date.rfind(" ")
# END handle 'T' and ' '
# END handle RFC or ISO
assert splitter > -1
# Split date and time.
time_part = string_date[splitter + 1 :] # Skip space.
date_part = string_date[:splitter]
# Parse time.
tstruct = time.strptime(time_part, "%H:%M:%S")
for fmt in date_formats:
try:
dtstruct = time.strptime(date_part, fmt)
utctime = calendar.timegm(
(
dtstruct.tm_year,
dtstruct.tm_mon,
dtstruct.tm_mday,
tstruct.tm_hour,
tstruct.tm_min,
tstruct.tm_sec,
dtstruct.tm_wday,
dtstruct.tm_yday,
tstruct.tm_isdst,
)
)
return int(utctime), offset
except ValueError:
continue
# END exception handling
# END for each fmt
# Still here ? fail.
raise ValueError("no format matched")
# END handle format
except Exception as e:
raise ValueError(f"Unsupported date format or type: {string_date}, type={type(string_date)}") from e
# END handle exceptions
# Precompiled regexes
_re_actor_epoch = re.compile(r"^.+? (.*) (\d+) ([+-]\d+).*$")
_re_only_actor = re.compile(r"^.+? (.*)$")
def parse_actor_and_date(line: str) -> Tuple[Actor, int, int]:
"""Parse out the actor (author or committer) info from a line like::
author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
:return:
[Actor, int_seconds_since_epoch, int_timezone_offset]
"""
actor, epoch, offset = "", "0", "0"
m = _re_actor_epoch.search(line)
if m:
actor, epoch, offset = m.groups()
else:
m = _re_only_actor.search(line)
actor = m.group(1) if m else line or ""
return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
# } END functions
# { Classes
class ProcessStreamAdapter:
"""Class wiring all calls to the contained Process instance.
Use this type to hide the underlying process to provide access only to a specified
stream. The process is usually wrapped into an :class:`~git.cmd.Git.AutoInterrupt`
class to kill it if the instance goes out of scope.
"""
__slots__ = ("_proc", "_stream")
def __init__(self, process: "Popen", stream_name: str) -> None:
self._proc = process
self._stream: StringIO = getattr(process, stream_name) # guessed type
def __getattr__(self, attr: str) -> Any:
return getattr(self._stream, attr)
@runtime_checkable
class Traversable(Protocol):
"""Simple interface to perform depth-first or breadth-first traversals in one
direction.
Subclasses only need to implement one function.
Instances of the subclass must be hashable.
Defined subclasses:
* :class:`Commit <git.objects.Commit>`
* :class:`Tree <git.objects.tree.Tree>`
* :class:`Submodule <git.objects.submodule.base.Submodule>`
"""
__slots__ = ()
@classmethod
@abstractmethod
def _get_intermediate_items(cls, item: Any) -> Sequence["Traversable"]:
"""
:return:
Tuple of items connected to the given item.
Must be implemented in subclass.
class Commit:: (cls, Commit) -> Tuple[Commit, ...]
class Submodule:: (cls, Submodule) -> Iterablelist[Submodule]
class Tree:: (cls, Tree) -> Tuple[Tree, ...]
"""
raise NotImplementedError("To be implemented in subclass")
@abstractmethod
def list_traverse(self, *args: Any, **kwargs: Any) -> Any:
"""Traverse self and collect all items found.
Calling this directly on the abstract base class, including via a ``super()``
proxy, is deprecated. Only overridden implementations should be called.
"""
warnings.warn(
"list_traverse() method should only be called from subclasses."
" Calling from Traversable abstract class will raise NotImplementedError in 4.0.0."
" The concrete subclasses in GitPython itself are 'Commit', 'RootModule', 'Submodule', and 'Tree'.",
DeprecationWarning,
stacklevel=2,
)
return self._list_traverse(*args, **kwargs)
def _list_traverse(
self, as_edge: bool = False, *args: Any, **kwargs: Any
) -> IterableList[Union["Commit", "Submodule", "Tree", "Blob"]]:
"""Traverse self and collect all items found.
:return:
:class:`~git.util.IterableList` with the results of the traversal as
produced by :meth:`traverse`::
Commit -> IterableList[Commit]
Submodule -> IterableList[Submodule]
Tree -> IterableList[Union[Submodule, Tree, Blob]]
"""
# Commit and Submodule have id.__attribute__ as IterableObj.
# Tree has id.__attribute__ inherited from IndexObject.
if isinstance(self, Has_id_attribute):
id = self._id_attribute_
else:
# Shouldn't reach here, unless Traversable subclass created with no
# _id_attribute_.
id = ""
# Could add _id_attribute_ to Traversable, or make all Traversable also
# Iterable?
if not as_edge:
out: IterableList[Union["Commit", "Submodule", "Tree", "Blob"]] = IterableList(id)
out.extend(self.traverse(as_edge=as_edge, *args, **kwargs)) # noqa: B026
return out
# Overloads in subclasses (mypy doesn't allow typing self: subclass).
# Union[IterableList['Commit'], IterableList['Submodule'], IterableList[Union['Submodule', 'Tree', 'Blob']]]
else:
# Raise DeprecationWarning, it doesn't make sense to use this.
out_list: IterableList = IterableList(self.traverse(*args, **kwargs))
return out_list
@abstractmethod
def traverse(self, *args: Any, **kwargs: Any) -> Any:
"""Iterator yielding items found when traversing self.
Calling this directly on the abstract base class, including via a ``super()``
proxy, is deprecated. Only overridden implementations should be called.
"""
warnings.warn(
"traverse() method should only be called from subclasses."
" Calling from Traversable abstract class will raise NotImplementedError in 4.0.0."
" The concrete subclasses in GitPython itself are 'Commit', 'RootModule', 'Submodule', and 'Tree'.",
DeprecationWarning,
stacklevel=2,
)
return self._traverse(*args, **kwargs)
def _traverse(
self,
predicate: Callable[[Union["Traversable", "Blob", TraversedTup], int], bool] = lambda i, d: True,
prune: Callable[[Union["Traversable", "Blob", TraversedTup], int], bool] = lambda i, d: False,
depth: int = -1,
branch_first: bool = True,
visit_once: bool = True,
ignore_self: int = 1,
as_edge: bool = False,
) -> Union[Iterator[Union["Traversable", "Blob"]], Iterator[TraversedTup]]:
"""Iterator yielding items found when traversing `self`.
:param predicate:
A function ``f(i,d)`` that returns ``False`` if item i at depth ``d`` should
not be included in the result.
:param prune:
A function ``f(i,d)`` that returns ``True`` if the search should stop at
item ``i`` at depth ``d``. Item ``i`` will not be returned.
:param depth:
Defines at which level the iteration should not go deeper if -1. There is no
limit if 0, you would effectively only get `self`, the root of the
iteration. If 1, you would only get the first level of
predecessors/successors.
:param branch_first:
If ``True``, items will be returned branch first, otherwise depth first.
:param visit_once:
If ``True``, items will only be returned once, although they might be
encountered several times. Loops are prevented that way.
:param ignore_self:
If ``True``, `self` will be ignored and automatically pruned from the
result. Otherwise it will be the first item to be returned. If `as_edge` is
``True``, the source of the first edge is ``None``.
:param as_edge:
If ``True``, return a pair of items, first being the source, second the
destination, i.e. tuple(src, dest) with the edge spanning from source to
destination.
:return:
Iterator yielding items found when traversing `self`::
Commit -> Iterator[Union[Commit, Tuple[Commit, Commit]] Submodule ->
Iterator[Submodule, Tuple[Submodule, Submodule]] Tree ->
Iterator[Union[Blob, Tree, Submodule,
Tuple[Union[Submodule, Tree], Union[Blob, Tree,
Submodule]]]
ignore_self=True is_edge=True -> Iterator[item] ignore_self=True
is_edge=False --> Iterator[item] ignore_self=False is_edge=True ->
Iterator[item] | Iterator[Tuple[src, item]] ignore_self=False
is_edge=False -> Iterator[Tuple[src, item]]
"""
visited = set()
stack: Deque[TraverseNT] = deque()
stack.append(TraverseNT(0, self, None)) # self is always depth level 0.
def addToStack(
stack: Deque[TraverseNT],
src_item: "Traversable",
branch_first: bool,
depth: int,
) -> None:
lst = self._get_intermediate_items(item)
if not lst: # Empty list
return
if branch_first:
stack.extendleft(TraverseNT(depth, i, src_item) for i in lst)
else:
reviter = (TraverseNT(depth, lst[i], src_item) for i in range(len(lst) - 1, -1, -1))
stack.extend(reviter)
# END addToStack local method
while stack:
d, item, src = stack.pop() # Depth of item, item, item_source
if visit_once and item in visited:
continue
if visit_once:
visited.add(item)
rval: Union[TraversedTup, "Traversable", "Blob"]
if as_edge:
# If as_edge return (src, item) unless rrc is None
# (e.g. for first item).
rval = (src, item)
else:
rval = item
if prune(rval, d):
continue
skipStartItem = ignore_self and (item is self)
if not skipStartItem and predicate(rval, d):
yield rval
# Only continue to next level if this is appropriate!
next_d = d + 1
if depth > -1 and next_d > depth:
continue
addToStack(stack, item, branch_first, next_d)
# END for each item on work stack
@runtime_checkable
class Serializable(Protocol):
"""Defines methods to serialize and deserialize objects from and into a data
stream."""
__slots__ = ()
# @abstractmethod
def _serialize(self, stream: "BytesIO") -> "Serializable":
"""Serialize the data of this object into the given data stream.
:note:
A serialized object would :meth:`_deserialize` into the same object.
:param stream:
A file-like object.
:return:
self
"""
raise NotImplementedError("To be implemented in subclass")
# @abstractmethod
def _deserialize(self, stream: "BytesIO") -> "Serializable":
"""Deserialize all information regarding this object from the stream.
:param stream:
A file-like object.
:return:
self
"""
raise NotImplementedError("To be implemented in subclass")
class TraversableIterableObj(IterableObj, Traversable):
__slots__ = ()
TIobj_tuple = Tuple[Union[T_TIobj, None], T_TIobj]
def list_traverse(self: T_TIobj, *args: Any, **kwargs: Any) -> IterableList[T_TIobj]:
return super()._list_traverse(*args, **kwargs)
@overload
def traverse(self: T_TIobj) -> Iterator[T_TIobj]: ...
@overload
def traverse(
self: T_TIobj,
predicate: Callable[[Union[T_TIobj, Tuple[Union[T_TIobj, None], T_TIobj]], int], bool],
prune: Callable[[Union[T_TIobj, Tuple[Union[T_TIobj, None], T_TIobj]], int], bool],
depth: int,
branch_first: bool,
visit_once: bool,
ignore_self: Literal[True],
as_edge: Literal[False],
) -> Iterator[T_TIobj]: ...
@overload
def traverse(
self: T_TIobj,
predicate: Callable[[Union[T_TIobj, Tuple[Union[T_TIobj, None], T_TIobj]], int], bool],
prune: Callable[[Union[T_TIobj, Tuple[Union[T_TIobj, None], T_TIobj]], int], bool],
depth: int,
branch_first: bool,
visit_once: bool,
ignore_self: Literal[False],
as_edge: Literal[True],
) -> Iterator[Tuple[Union[T_TIobj, None], T_TIobj]]: ...
@overload
def traverse(
self: T_TIobj,
predicate: Callable[[Union[T_TIobj, TIobj_tuple], int], bool],
prune: Callable[[Union[T_TIobj, TIobj_tuple], int], bool],
depth: int,
branch_first: bool,
visit_once: bool,
ignore_self: Literal[True],
as_edge: Literal[True],
) -> Iterator[Tuple[T_TIobj, T_TIobj]]: ...
def traverse(
self: T_TIobj,
predicate: Callable[[Union[T_TIobj, TIobj_tuple], int], bool] = lambda i, d: True,
prune: Callable[[Union[T_TIobj, TIobj_tuple], int], bool] = lambda i, d: False,
depth: int = -1,
branch_first: bool = True,
visit_once: bool = True,
ignore_self: int = 1,
as_edge: bool = False,
) -> Union[Iterator[T_TIobj], Iterator[Tuple[T_TIobj, T_TIobj]], Iterator[TIobj_tuple]]:
"""For documentation, see :meth:`Traversable._traverse`."""
## To typecheck instead of using cast:
#
# import itertools
# from git.types import TypeGuard
# def is_commit_traversed(inp: Tuple) -> TypeGuard[Tuple[Iterator[Tuple['Commit', 'Commit']]]]:
# for x in inp[1]:
# if not isinstance(x, tuple) and len(x) != 2:
# if all(isinstance(inner, Commit) for inner in x):
# continue
# return True
#
# ret = super(Commit, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self, as_edge)
# ret_tup = itertools.tee(ret, 2)
# assert is_commit_traversed(ret_tup), f"{[type(x) for x in list(ret_tup[0])]}"
# return ret_tup[0]
return cast(
Union[Iterator[T_TIobj], Iterator[Tuple[Union[None, T_TIobj], T_TIobj]]],
super()._traverse(
predicate, # type: ignore[arg-type]
prune, # type: ignore[arg-type]
depth,
branch_first,
visit_once,
ignore_self,
as_edge,
),
)

View File

@@ -0,0 +1,21 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = [
"HEAD",
"Head",
"RefLog",
"RefLogEntry",
"Reference",
"RemoteReference",
"SymbolicReference",
"Tag",
"TagReference",
]
from .head import HEAD, Head
from .log import RefLog, RefLogEntry
from .reference import Reference
from .remote import RemoteReference
from .symbolic import SymbolicReference
from .tag import Tag, TagReference

View File

@@ -0,0 +1,304 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Some ref-based objects.
Note the distinction between the :class:`HEAD` and :class:`Head` classes.
"""
__all__ = ["HEAD", "Head"]
from git.config import GitConfigParser, SectionConstraint
from git.exc import GitCommandError
from git.util import join_path
from .reference import Reference
from .symbolic import SymbolicReference
# typing ---------------------------------------------------
from typing import Any, Sequence, TYPE_CHECKING, Union
from git.types import Commit_ish, PathLike
if TYPE_CHECKING:
from git.objects import Commit
from git.refs import RemoteReference
from git.repo import Repo
# -------------------------------------------------------------------
def strip_quotes(string: str) -> str:
if string.startswith('"') and string.endswith('"'):
return string[1:-1]
return string
class HEAD(SymbolicReference):
"""Special case of a :class:`~git.refs.symbolic.SymbolicReference` representing the
repository's HEAD reference."""
_HEAD_NAME = "HEAD"
_ORIG_HEAD_NAME = "ORIG_HEAD"
__slots__ = ()
# TODO: This can be removed once SymbolicReference.commit has static type hints.
commit: "Commit"
def __init__(self, repo: "Repo", path: PathLike = _HEAD_NAME) -> None:
if path != self._HEAD_NAME:
raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path))
super().__init__(repo, path)
def orig_head(self) -> SymbolicReference:
"""
:return:
:class:`~git.refs.symbolic.SymbolicReference` pointing at the ORIG_HEAD,
which is maintained to contain the previous value of HEAD.
"""
return SymbolicReference(self.repo, self._ORIG_HEAD_NAME)
def reset(
self,
commit: Union[Commit_ish, SymbolicReference, str] = "HEAD",
index: bool = True,
working_tree: bool = False,
paths: Union[PathLike, Sequence[PathLike], None] = None,
**kwargs: Any,
) -> "HEAD":
"""Reset our HEAD to the given commit optionally synchronizing the index and
working tree. The reference we refer to will be set to commit as well.
:param commit:
:class:`~git.objects.commit.Commit`, :class:`~git.refs.reference.Reference`,
or string identifying a revision we should reset HEAD to.
:param index:
If ``True``, the index will be set to match the given commit.
Otherwise it will not be touched.
:param working_tree:
If ``True``, the working tree will be forcefully adjusted to match the given
commit, possibly overwriting uncommitted changes without warning.
If `working_tree` is ``True``, `index` must be ``True`` as well.
:param paths:
Single path or list of paths relative to the git root directory
that are to be reset. This allows to partially reset individual files.
:param kwargs:
Additional arguments passed to :manpage:`git-reset(1)`.
:return:
self
"""
mode: Union[str, None]
mode = "--soft"
if index:
mode = "--mixed"
# Explicit "--mixed" when passing paths is deprecated since git 1.5.4.
# See https://github.com/gitpython-developers/GitPython/discussions/1876.
if paths:
mode = None
# END special case
# END handle index
if working_tree:
mode = "--hard"
if not index:
raise ValueError("Cannot reset the working tree if the index is not reset as well")
# END working tree handling
try:
self.repo.git.reset(mode, commit, "--", paths, **kwargs)
except GitCommandError as e:
# git nowadays may use 1 as status to indicate there are still unstaged
# modifications after the reset.
if e.status != 1:
raise
# END handle exception
return self
class Head(Reference):
"""A Head is a named reference to a :class:`~git.objects.commit.Commit`. Every Head
instance contains a name and a :class:`~git.objects.commit.Commit` object.
Examples::
>>> repo = Repo("/path/to/repo")
>>> head = repo.heads[0]
>>> head.name
'master'
>>> head.commit
<git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455">
>>> head.commit.hexsha
'1c09f116cbc2cb4100fb6935bb162daa4723f455'
"""
_common_path_default = "refs/heads"
k_config_remote = "remote"
k_config_remote_ref = "merge" # Branch to merge from remote.
@classmethod
def delete(cls, repo: "Repo", *heads: "Union[Head, str]", force: bool = False, **kwargs: Any) -> None:
"""Delete the given heads.
:param force:
If ``True``, the heads will be deleted even if they are not yet merged into
the main development stream. Default ``False``.
"""
flag = "-d"
if force:
flag = "-D"
repo.git.branch(flag, *heads)
def set_tracking_branch(self, remote_reference: Union["RemoteReference", None]) -> "Head":
"""Configure this branch to track the given remote reference. This will
alter this branch's configuration accordingly.
:param remote_reference:
The remote reference to track or None to untrack any references.
:return:
self
"""
from .remote import RemoteReference
if remote_reference is not None and not isinstance(remote_reference, RemoteReference):
raise ValueError("Incorrect parameter type: %r" % remote_reference)
# END handle type
with self.config_writer() as writer:
if remote_reference is None:
writer.remove_option(self.k_config_remote)
writer.remove_option(self.k_config_remote_ref)
if len(writer.options()) == 0:
writer.remove_section()
else:
writer.set_value(self.k_config_remote, remote_reference.remote_name)
writer.set_value(
self.k_config_remote_ref,
Head.to_full_path(remote_reference.remote_head),
)
return self
def tracking_branch(self) -> Union["RemoteReference", None]:
"""
:return:
The remote reference we are tracking, or ``None`` if we are not a tracking
branch.
"""
from .remote import RemoteReference
reader = self.config_reader()
if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref):
ref = Head(
self.repo,
Head.to_full_path(strip_quotes(reader.get_value(self.k_config_remote_ref))),
)
remote_refpath = RemoteReference.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name))
return RemoteReference(self.repo, remote_refpath)
# END handle have tracking branch
# We are not a tracking branch.
return None
def rename(self, new_path: PathLike, force: bool = False) -> "Head":
"""Rename self to a new path.
:param new_path:
Either a simple name or a path, e.g. ``new_name`` or ``features/new_name``.
The prefix ``refs/heads`` is implied.
:param force:
If ``True``, the rename will succeed even if a head with the target name
already exists.
:return:
self
:note:
Respects the ref log, as git commands are used.
"""
flag = "-m"
if force:
flag = "-M"
self.repo.git.branch(flag, self, new_path)
self.path = "%s/%s" % (self._common_path_default, new_path)
return self
def checkout(self, force: bool = False, **kwargs: Any) -> Union["HEAD", "Head"]:
"""Check out this head by setting the HEAD to this reference, by updating the
index to reflect the tree we point to and by updating the working tree to
reflect the latest index.
The command will fail if changed working tree files would be overwritten.
:param force:
If ``True``, changes to the index and the working tree will be discarded.
If ``False``, :exc:`~git.exc.GitCommandError` will be raised in that
situation.
:param kwargs:
Additional keyword arguments to be passed to git checkout, e.g.
``b="new_branch"`` to create a new branch at the given spot.
:return:
The active branch after the checkout operation, usually self unless a new
branch has been created.
If there is no active branch, as the HEAD is now detached, the HEAD
reference will be returned instead.
:note:
By default it is only allowed to checkout heads - everything else will leave
the HEAD detached which is allowed and possible, but remains a special state
that some tools might not be able to handle.
"""
kwargs["f"] = force
if kwargs["f"] is False:
kwargs.pop("f")
self.repo.git.checkout(self, **kwargs)
if self.repo.head.is_detached:
return self.repo.head
else:
return self.repo.active_branch
# { Configuration
def _config_parser(self, read_only: bool) -> SectionConstraint[GitConfigParser]:
if read_only:
parser = self.repo.config_reader()
else:
parser = self.repo.config_writer()
# END handle parser instance
return SectionConstraint(parser, 'branch "%s"' % self.name)
def config_reader(self) -> SectionConstraint[GitConfigParser]:
"""
:return:
A configuration parser instance constrained to only read this instance's
values.
"""
return self._config_parser(read_only=True)
def config_writer(self) -> SectionConstraint[GitConfigParser]:
"""
:return:
A configuration writer instance with read-and write access to options of
this head.
"""
return self._config_parser(read_only=False)
# } END configuration

View File

@@ -0,0 +1,399 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["RefLog", "RefLogEntry"]
from mmap import mmap
import os.path as osp
import re
import time as _time
from git.compat import defenc
from git.objects.util import (
Serializable,
altz_to_utctz_str,
parse_date,
)
from git.util import (
Actor,
LockedFD,
LockFile,
assure_directory_exists,
bin_to_hex,
file_contents_ro_filepath,
to_native_path,
)
# typing ------------------------------------------------------------------
from typing import Iterator, List, Tuple, TYPE_CHECKING, Union
from git.types import PathLike
if TYPE_CHECKING:
from io import BytesIO
from git.config import GitConfigParser, SectionConstraint
from git.refs import SymbolicReference
# ------------------------------------------------------------------------------
class RefLogEntry(Tuple[str, str, Actor, Tuple[int, int], str]):
"""Named tuple allowing easy access to the revlog data fields."""
_re_hexsha_only = re.compile(r"^[0-9A-Fa-f]{40}$")
__slots__ = ()
def __repr__(self) -> str:
"""Representation of ourselves in git reflog format."""
return self.format()
def format(self) -> str:
""":return: A string suitable to be placed in a reflog file."""
act = self.actor
time = self.time
return "{} {} {} <{}> {!s} {}\t{}\n".format(
self.oldhexsha,
self.newhexsha,
act.name,
act.email,
time[0],
altz_to_utctz_str(time[1]),
self.message,
)
@property
def oldhexsha(self) -> str:
"""The hexsha to the commit the ref pointed to before the change."""
return self[0]
@property
def newhexsha(self) -> str:
"""The hexsha to the commit the ref now points to, after the change."""
return self[1]
@property
def actor(self) -> Actor:
"""Actor instance, providing access."""
return self[2]
@property
def time(self) -> Tuple[int, int]:
"""Time as tuple:
* [0] = ``int(time)``
* [1] = ``int(timezone_offset)`` in :attr:`time.altzone` format
"""
return self[3]
@property
def message(self) -> str:
"""Message describing the operation that acted on the reference."""
return self[4]
@classmethod
def new(
cls,
oldhexsha: str,
newhexsha: str,
actor: Actor,
time: int,
tz_offset: int,
message: str,
) -> "RefLogEntry": # skipcq: PYL-W0621
""":return: New instance of a :class:`RefLogEntry`"""
if not isinstance(actor, Actor):
raise ValueError("Need actor instance, got %s" % actor)
# END check types
return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), message))
@classmethod
def from_line(cls, line: bytes) -> "RefLogEntry":
""":return: New :class:`RefLogEntry` instance from the given revlog line.
:param line:
Line bytes without trailing newline
:raise ValueError:
If `line` could not be parsed.
"""
line_str = line.decode(defenc)
fields = line_str.split("\t", 1)
if len(fields) == 1:
info, msg = fields[0], None
elif len(fields) == 2:
info, msg = fields
else:
raise ValueError("Line must have up to two TAB-separated fields." " Got %s" % repr(line_str))
# END handle first split
oldhexsha = info[:40]
newhexsha = info[41:81]
for hexsha in (oldhexsha, newhexsha):
if not cls._re_hexsha_only.match(hexsha):
raise ValueError("Invalid hexsha: %r" % (hexsha,))
# END if hexsha re doesn't match
# END for each hexsha
email_end = info.find(">", 82)
if email_end == -1:
raise ValueError("Missing token: >")
# END handle missing end brace
actor = Actor._from_string(info[82 : email_end + 1])
time, tz_offset = parse_date(info[email_end + 2 :]) # skipcq: PYL-W0621
return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg))
class RefLog(List[RefLogEntry], Serializable):
R"""A reflog contains :class:`RefLogEntry`\s, each of which defines a certain state
of the head in question. Custom query methods allow to retrieve log entries by date
or by other criteria.
Reflog entries are ordered. The first added entry is first in the list. The last
entry, i.e. the last change of the head or reference, is last in the list.
"""
__slots__ = ("_path",)
def __new__(cls, filepath: Union[PathLike, None] = None) -> "RefLog":
inst = super().__new__(cls)
return inst
def __init__(self, filepath: Union[PathLike, None] = None) -> None:
"""Initialize this instance with an optional filepath, from which we will
initialize our data. The path is also used to write changes back using the
:meth:`write` method."""
self._path = filepath
if filepath is not None:
self._read_from_file()
# END handle filepath
def _read_from_file(self) -> None:
try:
fmap = file_contents_ro_filepath(self._path, stream=True, allow_mmap=True)
except OSError:
# It is possible and allowed that the file doesn't exist!
return
# END handle invalid log
try:
self._deserialize(fmap)
finally:
fmap.close()
# END handle closing of handle
# { Interface
@classmethod
def from_file(cls, filepath: PathLike) -> "RefLog":
"""
:return:
A new :class:`RefLog` instance containing all entries from the reflog at the
given `filepath`.
:param filepath:
Path to reflog.
:raise ValueError:
If the file could not be read or was corrupted in some way.
"""
return cls(filepath)
@classmethod
def path(cls, ref: "SymbolicReference") -> str:
"""
:return:
String to absolute path at which the reflog of the given ref instance would
be found. The path is not guaranteed to point to a valid file though.
:param ref:
:class:`~git.refs.symbolic.SymbolicReference` instance
"""
return osp.join(ref.repo.git_dir, "logs", to_native_path(ref.path))
@classmethod
def iter_entries(cls, stream: Union[str, "BytesIO", mmap]) -> Iterator[RefLogEntry]:
"""
:return:
Iterator yielding :class:`RefLogEntry` instances, one for each line read
from the given stream.
:param stream:
File-like object containing the revlog in its native format or string
instance pointing to a file to read.
"""
new_entry = RefLogEntry.from_line
if isinstance(stream, str):
# Default args return mmap since Python 3.
_stream = file_contents_ro_filepath(stream)
assert isinstance(_stream, mmap)
else:
_stream = stream
# END handle stream type
while True:
line = _stream.readline()
if not line:
return
yield new_entry(line.strip())
# END endless loop
@classmethod
def entry_at(cls, filepath: PathLike, index: int) -> "RefLogEntry":
"""
:return:
:class:`RefLogEntry` at the given index.
:param filepath:
Full path to the index file from which to read the entry.
:param index:
Python list compatible index, i.e. it may be negative to specify an entry
counted from the end of the list.
:raise IndexError:
If the entry didn't exist.
:note:
This method is faster as it only parses the entry at index, skipping all
other lines. Nonetheless, the whole file has to be read if the index is
negative.
"""
with open(filepath, "rb") as fp:
if index < 0:
return RefLogEntry.from_line(fp.readlines()[index].strip())
# Read until index is reached.
for i in range(index + 1):
line = fp.readline()
if not line:
raise IndexError(f"Index file ended at line {i + 1}, before given index was reached")
# END abort on eof
# END handle runup
return RefLogEntry.from_line(line.strip())
# END handle index
def to_file(self, filepath: PathLike) -> None:
"""Write the contents of the reflog instance to a file at the given filepath.
:param filepath:
Path to file. Parent directories are assumed to exist.
"""
lfd = LockedFD(filepath)
assure_directory_exists(filepath, is_file=True)
fp = lfd.open(write=True, stream=True)
try:
self._serialize(fp)
lfd.commit()
except BaseException:
lfd.rollback()
raise
# END handle change
@classmethod
def append_entry(
cls,
config_reader: Union[Actor, "GitConfigParser", "SectionConstraint", None],
filepath: PathLike,
oldbinsha: bytes,
newbinsha: bytes,
message: str,
write: bool = True,
) -> "RefLogEntry":
"""Append a new log entry to the revlog at filepath.
:param config_reader:
Configuration reader of the repository - used to obtain user information.
May also be an :class:`~git.util.Actor` instance identifying the committer
directly or ``None``.
:param filepath:
Full path to the log file.
:param oldbinsha:
Binary sha of the previous commit.
:param newbinsha:
Binary sha of the current commit.
:param message:
Message describing the change to the reference.
:param write:
If ``True``, the changes will be written right away.
Otherwise the change will not be written.
:return:
:class:`RefLogEntry` objects which was appended to the log.
:note:
As we are append-only, concurrent access is not a problem as we do not
interfere with readers.
"""
if len(oldbinsha) != 20 or len(newbinsha) != 20:
raise ValueError("Shas need to be given in binary format")
# END handle sha type
assure_directory_exists(filepath, is_file=True)
first_line = message.split("\n")[0]
if isinstance(config_reader, Actor):
committer = config_reader # mypy thinks this is Actor | Gitconfigparser, but why?
else:
committer = Actor.committer(config_reader)
entry = RefLogEntry(
(
bin_to_hex(oldbinsha).decode("ascii"),
bin_to_hex(newbinsha).decode("ascii"),
committer,
(int(_time.time()), _time.altzone),
first_line,
)
)
if write:
lf = LockFile(filepath)
lf._obtain_lock_or_raise()
fd = open(filepath, "ab")
try:
fd.write(entry.format().encode(defenc))
finally:
fd.close()
lf._release_lock()
# END handle write operation
return entry
def write(self) -> "RefLog":
"""Write this instance's data to the file we are originating from.
:return:
self
"""
if self._path is None:
raise ValueError("Instance was not initialized with a path, use to_file(...) instead")
# END assert path
self.to_file(self._path)
return self
# } END interface
# { Serializable Interface
def _serialize(self, stream: "BytesIO") -> "RefLog":
write = stream.write
# Write all entries.
for e in self:
write(e.format().encode(defenc))
# END for each entry
return self
def _deserialize(self, stream: "BytesIO") -> "RefLog":
self.extend(self.iter_entries(stream))
return self
# } END serializable interface

View File

@@ -0,0 +1,176 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["Reference"]
from git.util import IterableObj, LazyMixin
from .symbolic import SymbolicReference, T_References
# typing ------------------------------------------------------------------
from typing import Any, Callable, Iterator, TYPE_CHECKING, Type, Union
from git.types import AnyGitObject, PathLike, _T
if TYPE_CHECKING:
from git.repo import Repo
# ------------------------------------------------------------------------------
# { Utilities
def require_remote_ref_path(func: Callable[..., _T]) -> Callable[..., _T]:
"""A decorator raising :exc:`ValueError` if we are not a valid remote, based on the
path."""
def wrapper(self: T_References, *args: Any) -> _T:
if not self.is_remote():
raise ValueError("ref path does not point to a remote reference: %s" % self.path)
return func(self, *args)
# END wrapper
wrapper.__name__ = func.__name__
return wrapper
# } END utilities
class Reference(SymbolicReference, LazyMixin, IterableObj):
"""A named reference to any object.
Subclasses may apply restrictions though, e.g., a :class:`~git.refs.head.Head` can
only point to commits.
"""
__slots__ = ()
_points_to_commits_only = False
_resolve_ref_on_create = True
_common_path_default = "refs"
def __init__(self, repo: "Repo", path: PathLike, check_path: bool = True) -> None:
"""Initialize this instance.
:param repo:
Our parent repository.
:param path:
Path relative to the ``.git/`` directory pointing to the ref in question,
e.g. ``refs/heads/master``.
:param check_path:
If ``False``, you can provide any path.
Otherwise the path must start with the default path prefix of this type.
"""
if check_path and not str(path).startswith(self._common_path_default + "/"):
raise ValueError(f"Cannot instantiate {self.__class__.__name__!r} from path {path}")
self.path: str # SymbolicReference converts to string at the moment.
super().__init__(repo, path)
def __str__(self) -> str:
return self.name
# { Interface
# @ReservedAssignment
def set_object(
self,
object: Union[AnyGitObject, "SymbolicReference", str],
logmsg: Union[str, None] = None,
) -> "Reference":
"""Special version which checks if the head-log needs an update as well.
:return:
self
"""
oldbinsha = None
if logmsg is not None:
head = self.repo.head
if not head.is_detached and head.ref == self:
oldbinsha = self.commit.binsha
# END handle commit retrieval
# END handle message is set
super().set_object(object, logmsg)
if oldbinsha is not None:
# From refs/files-backend.c in git-source:
# /*
# * Special hack: If a branch is updated directly and HEAD
# * points to it (may happen on the remote side of a push
# * for example) then logically the HEAD reflog should be
# * updated too.
# * A generic solution implies reverse symref information,
# * but finding all symrefs pointing to the given branch
# * would be rather costly for this rare event (the direct
# * update of a branch) to be worth it. So let's cheat and
# * check with HEAD only which should cover 99% of all usage
# * scenarios (even 100% of the default ones).
# */
self.repo.head.log_append(oldbinsha, logmsg)
# END check if the head
return self
# NOTE: No need to overwrite properties, as the will only work without a the log.
@property
def name(self) -> str:
"""
:return:
(shortest) Name of this reference - it may contain path components
"""
# The first two path tokens can be removed as they are
# refs/heads or refs/tags or refs/remotes.
tokens = self.path.split("/")
if len(tokens) < 3:
return self.path # could be refs/HEAD
return "/".join(tokens[2:])
@classmethod
def iter_items(
cls: Type[T_References],
repo: "Repo",
common_path: Union[PathLike, None] = None,
*args: Any,
**kwargs: Any,
) -> Iterator[T_References]:
"""Equivalent to
:meth:`SymbolicReference.iter_items <git.refs.symbolic.SymbolicReference.iter_items>`,
but will return non-detached references as well."""
return cls._iter_items(repo, common_path)
# } END interface
# { Remote Interface
@property
@require_remote_ref_path
def remote_name(self) -> str:
"""
:return:
Name of the remote we are a reference of, such as ``origin`` for a reference
named ``origin/master``.
"""
tokens = self.path.split("/")
# /refs/remotes/<remote name>/<branch_name>
return tokens[2]
@property
@require_remote_ref_path
def remote_head(self) -> str:
"""
:return:
Name of the remote head itself, e.g. ``master``.
:note:
The returned name is usually not qualified enough to uniquely identify a
branch.
"""
tokens = self.path.split("/")
return "/".join(tokens[3:])
# } END remote interface

View File

@@ -0,0 +1,79 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Module implementing a remote object allowing easy access to git remotes."""
__all__ = ["RemoteReference"]
import os
from git.util import join_path
from .head import Head
# typing ------------------------------------------------------------------
from typing import Any, Iterator, NoReturn, TYPE_CHECKING, Union
from git.types import PathLike
if TYPE_CHECKING:
from git.remote import Remote
from git.repo import Repo
# ------------------------------------------------------------------------------
class RemoteReference(Head):
"""A reference pointing to a remote head."""
_common_path_default = Head._remote_common_path_default
@classmethod
def iter_items(
cls,
repo: "Repo",
common_path: Union[PathLike, None] = None,
remote: Union["Remote", None] = None,
*args: Any,
**kwargs: Any,
) -> Iterator["RemoteReference"]:
"""Iterate remote references, and if given, constrain them to the given remote."""
common_path = common_path or cls._common_path_default
if remote is not None:
common_path = join_path(common_path, str(remote))
# END handle remote constraint
# super is Reference
return super().iter_items(repo, common_path)
# The Head implementation of delete also accepts strs, but this implementation does
# not. mypy doesn't have a way of representing tightening the types of arguments in
# subclasses and recommends Any or "type: ignore".
# (See: https://github.com/python/typing/issues/241)
@classmethod
def delete(cls, repo: "Repo", *refs: "RemoteReference", **kwargs: Any) -> None: # type: ignore[override]
"""Delete the given remote references.
:note:
`kwargs` are given for comparability with the base class method as we
should not narrow the signature.
"""
repo.git.branch("-d", "-r", *refs)
# The official deletion method will ignore remote symbolic refs - these are
# generally ignored in the refs/ folder. We don't though and delete remainders
# manually.
for ref in refs:
try:
os.remove(os.path.join(repo.common_dir, ref.path))
except OSError:
pass
try:
os.remove(os.path.join(repo.git_dir, ref.path))
except OSError:
pass
# END for each ref
@classmethod
def create(cls, *args: Any, **kwargs: Any) -> NoReturn:
"""Raise :exc:`TypeError`. Defined so the ``create`` method is disabled."""
raise TypeError("Cannot explicitly create remote references")

View File

@@ -0,0 +1,926 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
__all__ = ["SymbolicReference"]
import os
from gitdb.exc import BadName, BadObject
from git.compat import defenc
from git.objects.base import Object
from git.objects.commit import Commit
from git.refs.log import RefLog
from git.util import (
LockedFD,
assure_directory_exists,
hex_to_bin,
join_path,
join_path_native,
to_native_path_linux,
)
# typing ------------------------------------------------------------------
from typing import (
Any,
Iterator,
List,
TYPE_CHECKING,
Tuple,
Type,
TypeVar,
Union,
cast,
)
from git.types import AnyGitObject, PathLike
if TYPE_CHECKING:
from git.config import GitConfigParser
from git.objects.commit import Actor
from git.refs import Head, TagReference, RemoteReference, Reference
from git.refs.log import RefLogEntry
from git.repo import Repo
T_References = TypeVar("T_References", bound="SymbolicReference")
# ------------------------------------------------------------------------------
def _git_dir(repo: "Repo", path: Union[PathLike, None]) -> PathLike:
"""Find the git dir that is appropriate for the path."""
name = f"{path}"
if name in ["HEAD", "ORIG_HEAD", "FETCH_HEAD", "index", "logs"]:
return repo.git_dir
return repo.common_dir
class SymbolicReference:
"""Special case of a reference that is symbolic.
This does not point to a specific commit, but to another
:class:`~git.refs.head.Head`, which itself specifies a commit.
A typical example for a symbolic reference is :class:`~git.refs.head.HEAD`.
"""
__slots__ = ("repo", "path")
_resolve_ref_on_create = False
_points_to_commits_only = True
_common_path_default = ""
_remote_common_path_default = "refs/remotes"
_id_attribute_ = "name"
def __init__(self, repo: "Repo", path: PathLike, check_path: bool = False) -> None:
self.repo = repo
self.path = path
def __str__(self) -> str:
return str(self.path)
def __repr__(self) -> str:
return '<git.%s "%s">' % (self.__class__.__name__, self.path)
def __eq__(self, other: object) -> bool:
if hasattr(other, "path"):
other = cast(SymbolicReference, other)
return self.path == other.path
return False
def __ne__(self, other: object) -> bool:
return not (self == other)
def __hash__(self) -> int:
return hash(self.path)
@property
def name(self) -> str:
"""
:return:
In case of symbolic references, the shortest assumable name is the path
itself.
"""
return str(self.path)
@property
def abspath(self) -> PathLike:
return join_path_native(_git_dir(self.repo, self.path), self.path)
@classmethod
def _get_packed_refs_path(cls, repo: "Repo") -> str:
return os.path.join(repo.common_dir, "packed-refs")
@classmethod
def _iter_packed_refs(cls, repo: "Repo") -> Iterator[Tuple[str, str]]:
"""Return an iterator yielding pairs of sha1/path pairs (as strings) for the
corresponding refs.
:note:
The packed refs file will be kept open as long as we iterate.
"""
try:
with open(cls._get_packed_refs_path(repo), "rt", encoding="UTF-8") as fp:
for line in fp:
line = line.strip()
if not line:
continue
if line.startswith("#"):
# "# pack-refs with: peeled fully-peeled sorted"
# the git source code shows "peeled",
# "fully-peeled" and "sorted" as the keywords
# that can go on this line, as per comments in git file
# refs/packed-backend.c
# I looked at master on 2017-10-11,
# commit 111ef79afe, after tag v2.15.0-rc1
# from repo https://github.com/git/git.git
if line.startswith("# pack-refs with:") and "peeled" not in line:
raise TypeError("PackingType of packed-Refs not understood: %r" % line)
# END abort if we do not understand the packing scheme
continue
# END parse comment
# Skip dereferenced tag object entries - previous line was actual
# tag reference for it.
if line[0] == "^":
continue
yield cast(Tuple[str, str], tuple(line.split(" ", 1)))
# END for each line
except OSError:
return None
# END no packed-refs file handling
@classmethod
def dereference_recursive(cls, repo: "Repo", ref_path: Union[PathLike, None]) -> str:
"""
:return:
hexsha stored in the reference at the given `ref_path`, recursively
dereferencing all intermediate references as required
:param repo:
The repository containing the reference at `ref_path`.
"""
while True:
hexsha, ref_path = cls._get_ref_info(repo, ref_path)
if hexsha is not None:
return hexsha
# END recursive dereferencing
@staticmethod
def _check_ref_name_valid(ref_path: PathLike) -> None:
"""Check a ref name for validity.
This is based on the rules described in :manpage:`git-check-ref-format(1)`.
"""
previous: Union[str, None] = None
one_before_previous: Union[str, None] = None
for c in str(ref_path):
if c in " ~^:?*[\\":
raise ValueError(
f"Invalid reference '{ref_path}': references cannot contain spaces, tildes (~), carets (^),"
f" colons (:), question marks (?), asterisks (*), open brackets ([) or backslashes (\\)"
)
elif c == ".":
if previous is None or previous == "/":
raise ValueError(
f"Invalid reference '{ref_path}': references cannot start with a period (.) or contain '/.'"
)
elif previous == ".":
raise ValueError(f"Invalid reference '{ref_path}': references cannot contain '..'")
elif c == "/":
if previous == "/":
raise ValueError(f"Invalid reference '{ref_path}': references cannot contain '//'")
elif previous is None:
raise ValueError(
f"Invalid reference '{ref_path}': references cannot start with forward slashes '/'"
)
elif c == "{" and previous == "@":
raise ValueError(f"Invalid reference '{ref_path}': references cannot contain '@{{'")
elif ord(c) < 32 or ord(c) == 127:
raise ValueError(f"Invalid reference '{ref_path}': references cannot contain ASCII control characters")
one_before_previous = previous
previous = c
if previous == ".":
raise ValueError(f"Invalid reference '{ref_path}': references cannot end with a period (.)")
elif previous == "/":
raise ValueError(f"Invalid reference '{ref_path}': references cannot end with a forward slash (/)")
elif previous == "@" and one_before_previous is None:
raise ValueError(f"Invalid reference '{ref_path}': references cannot be '@'")
elif any(component.endswith(".lock") for component in str(ref_path).split("/")):
raise ValueError(
f"Invalid reference '{ref_path}': references cannot have slash-separated components that end with"
" '.lock'"
)
@classmethod
def _get_ref_info_helper(
cls, repo: "Repo", ref_path: Union[PathLike, None]
) -> Union[Tuple[str, None], Tuple[None, str]]:
"""
:return:
*(str(sha), str(target_ref_path))*, where:
* *sha* is of the file at rela_path points to if available, or ``None``.
* *target_ref_path* is the reference we point to, or ``None``.
"""
if ref_path:
cls._check_ref_name_valid(ref_path)
tokens: Union[None, List[str], Tuple[str, str]] = None
repodir = _git_dir(repo, ref_path)
try:
with open(os.path.join(repodir, str(ref_path)), "rt", encoding="UTF-8") as fp:
value = fp.read().rstrip()
# Don't only split on spaces, but on whitespace, which allows to parse lines like:
# 60b64ef992065e2600bfef6187a97f92398a9144 branch 'master' of git-server:/path/to/repo
tokens = value.split()
assert len(tokens) != 0
except OSError:
# Probably we are just packed. Find our entry in the packed refs file.
# NOTE: We are not a symbolic ref if we are in a packed file, as these
# are excluded explicitly.
for sha, path in cls._iter_packed_refs(repo):
if path != ref_path:
continue
# sha will be used.
tokens = sha, path
break
# END for each packed ref
# END handle packed refs
if tokens is None:
raise ValueError("Reference at %r does not exist" % ref_path)
# Is it a reference?
if tokens[0] == "ref:":
return (None, tokens[1])
# It's a commit.
if repo.re_hexsha_only.match(tokens[0]):
return (tokens[0], None)
raise ValueError("Failed to parse reference information from %r" % ref_path)
@classmethod
def _get_ref_info(cls, repo: "Repo", ref_path: Union[PathLike, None]) -> Union[Tuple[str, None], Tuple[None, str]]:
"""
:return:
*(str(sha), str(target_ref_path))*, where:
* *sha* is of the file at rela_path points to if available, or ``None``.
* *target_ref_path* is the reference we point to, or ``None``.
"""
return cls._get_ref_info_helper(repo, ref_path)
def _get_object(self) -> AnyGitObject:
"""
:return:
The object our ref currently refers to. Refs can be cached, they will always
point to the actual object as it gets re-created on each query.
"""
# We have to be dynamic here as we may be a tag which can point to anything.
# Our path will be resolved to the hexsha which will be used accordingly.
return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
def _get_commit(self) -> "Commit":
"""
:return:
:class:`~git.objects.commit.Commit` object we point to. This works for
detached and non-detached :class:`SymbolicReference` instances. The symbolic
reference will be dereferenced recursively.
"""
obj = self._get_object()
if obj.type == "tag":
obj = obj.object
# END dereference tag
if obj.type != Commit.type:
raise TypeError("Symbolic Reference pointed to object %r, commit was required" % obj)
# END handle type
return obj
def set_commit(
self,
commit: Union[Commit, "SymbolicReference", str],
logmsg: Union[str, None] = None,
) -> "SymbolicReference":
"""Like :meth:`set_object`, but restricts the type of object to be a
:class:`~git.objects.commit.Commit`.
:raise ValueError:
If `commit` is not a :class:`~git.objects.commit.Commit` object, nor does it
point to a commit.
:return:
self
"""
# Check the type - assume the best if it is a base-string.
invalid_type = False
if isinstance(commit, Object):
invalid_type = commit.type != Commit.type
elif isinstance(commit, SymbolicReference):
invalid_type = commit.object.type != Commit.type
else:
try:
invalid_type = self.repo.rev_parse(commit).type != Commit.type
except (BadObject, BadName) as e:
raise ValueError("Invalid object: %s" % commit) from e
# END handle exception
# END verify type
if invalid_type:
raise ValueError("Need commit, got %r" % commit)
# END handle raise
# We leave strings to the rev-parse method below.
self.set_object(commit, logmsg)
return self
def set_object(
self,
object: Union[AnyGitObject, "SymbolicReference", str],
logmsg: Union[str, None] = None,
) -> "SymbolicReference":
"""Set the object we point to, possibly dereference our symbolic reference
first. If the reference does not exist, it will be created.
:param object:
A refspec, a :class:`SymbolicReference` or an
:class:`~git.objects.base.Object` instance.
* :class:`SymbolicReference` instances will be dereferenced beforehand to
obtain the git object they point to.
* :class:`~git.objects.base.Object` instances must represent git objects
(:class:`~git.types.AnyGitObject`).
:param logmsg:
If not ``None``, the message will be used in the reflog entry to be written.
Otherwise the reflog is not altered.
:note:
Plain :class:`SymbolicReference` instances may not actually point to objects
by convention.
:return:
self
"""
if isinstance(object, SymbolicReference):
object = object.object # @ReservedAssignment
# END resolve references
is_detached = True
try:
is_detached = self.is_detached
except ValueError:
pass
# END handle non-existing ones
if is_detached:
return self.set_reference(object, logmsg)
# set the commit on our reference
return self._get_reference().set_object(object, logmsg)
commit = property(
_get_commit,
set_commit, # type: ignore[arg-type]
doc="Query or set commits directly",
)
object = property(
_get_object,
set_object, # type: ignore[arg-type]
doc="Return the object our ref currently refers to",
)
def _get_reference(self) -> "SymbolicReference":
"""
:return:
:class:`~git.refs.reference.Reference` object we point to
:raise TypeError:
If this symbolic reference is detached, hence it doesn't point to a
reference, but to a commit.
"""
sha, target_ref_path = self._get_ref_info(self.repo, self.path)
if target_ref_path is None:
raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha))
return self.from_path(self.repo, target_ref_path)
def set_reference(
self,
ref: Union[AnyGitObject, "SymbolicReference", str],
logmsg: Union[str, None] = None,
) -> "SymbolicReference":
"""Set ourselves to the given `ref`.
It will stay a symbol if the `ref` is a :class:`~git.refs.reference.Reference`.
Otherwise a git object, specified as a :class:`~git.objects.base.Object`
instance or refspec, is assumed. If it is valid, this reference will be set to
it, which effectively detaches the reference if it was a purely symbolic one.
:param ref:
A :class:`SymbolicReference` instance, an :class:`~git.objects.base.Object`
instance (specifically an :class:`~git.types.AnyGitObject`), or a refspec
string. Only if the ref is a :class:`SymbolicReference` instance, we will
point to it. Everything else is dereferenced to obtain the actual object.
:param logmsg:
If set to a string, the message will be used in the reflog.
Otherwise, a reflog entry is not written for the changed reference.
The previous commit of the entry will be the commit we point to now.
See also: :meth:`log_append`
:return:
self
:note:
This symbolic reference will not be dereferenced. For that, see
:meth:`set_object`.
"""
write_value = None
obj = None
if isinstance(ref, SymbolicReference):
write_value = "ref: %s" % ref.path
elif isinstance(ref, Object):
obj = ref
write_value = ref.hexsha
elif isinstance(ref, str):
try:
obj = self.repo.rev_parse(ref + "^{}") # Optionally dereference tags.
write_value = obj.hexsha
except (BadObject, BadName) as e:
raise ValueError("Could not extract object from %s" % ref) from e
# END end try string
else:
raise ValueError("Unrecognized Value: %r" % ref)
# END try commit attribute
# typecheck
if obj is not None and self._points_to_commits_only and obj.type != Commit.type:
raise TypeError("Require commit, got %r" % obj)
# END verify type
oldbinsha: bytes = b""
if logmsg is not None:
try:
oldbinsha = self.commit.binsha
except ValueError:
oldbinsha = Commit.NULL_BIN_SHA
# END handle non-existing
# END retrieve old hexsha
fpath = self.abspath
assure_directory_exists(fpath, is_file=True)
lfd = LockedFD(fpath)
fd = lfd.open(write=True, stream=True)
try:
fd.write(write_value.encode("utf-8") + b"\n")
lfd.commit()
except BaseException:
lfd.rollback()
raise
# Adjust the reflog
if logmsg is not None:
self.log_append(oldbinsha, logmsg)
return self
# Aliased reference
reference: Union["Head", "TagReference", "RemoteReference", "Reference"]
reference = property( # type: ignore[assignment]
_get_reference,
set_reference, # type: ignore[arg-type]
doc="Returns the Reference we point to",
)
ref = reference
def is_valid(self) -> bool:
"""
:return:
``True`` if the reference is valid, hence it can be read and points to a
valid object or reference.
"""
try:
self.object # noqa: B018
except (OSError, ValueError):
return False
else:
return True
@property
def is_detached(self) -> bool:
"""
:return:
``True`` if we are a detached reference, hence we point to a specific commit
instead to another reference.
"""
try:
self.ref # noqa: B018
return False
except TypeError:
return True
def log(self) -> "RefLog":
"""
:return:
:class:`~git.refs.log.RefLog` for this reference.
Its last entry reflects the latest change applied to this reference.
:note:
As the log is parsed every time, its recommended to cache it for use instead
of calling this method repeatedly. It should be considered read-only.
"""
return RefLog.from_file(RefLog.path(self))
def log_append(
self,
oldbinsha: bytes,
message: Union[str, None],
newbinsha: Union[bytes, None] = None,
) -> "RefLogEntry":
"""Append a logentry to the logfile of this ref.
:param oldbinsha:
Binary sha this ref used to point to.
:param message:
A message describing the change.
:param newbinsha:
The sha the ref points to now. If None, our current commit sha will be used.
:return:
The added :class:`~git.refs.log.RefLogEntry` instance.
"""
# NOTE: We use the committer of the currently active commit - this should be
# correct to allow overriding the committer on a per-commit level.
# See https://github.com/gitpython-developers/GitPython/pull/146.
try:
committer_or_reader: Union["Actor", "GitConfigParser"] = self.commit.committer
except ValueError:
committer_or_reader = self.repo.config_reader()
# END handle newly cloned repositories
if newbinsha is None:
newbinsha = self.commit.binsha
if message is None:
message = ""
return RefLog.append_entry(committer_or_reader, RefLog.path(self), oldbinsha, newbinsha, message)
def log_entry(self, index: int) -> "RefLogEntry":
"""
:return:
:class:`~git.refs.log.RefLogEntry` at the given index
:param index:
Python list compatible positive or negative index.
:note:
This method must read part of the reflog during execution, hence it should
be used sparingly, or only if you need just one index. In that case, it will
be faster than the :meth:`log` method.
"""
return RefLog.entry_at(RefLog.path(self), index)
@classmethod
def to_full_path(cls, path: Union[PathLike, "SymbolicReference"]) -> PathLike:
"""
:return:
String with a full repository-relative path which can be used to initialize
a :class:`~git.refs.reference.Reference` instance, for instance by using
:meth:`Reference.from_path <git.refs.reference.Reference.from_path>`.
"""
if isinstance(path, SymbolicReference):
path = path.path
full_ref_path = path
if not cls._common_path_default:
return full_ref_path
if not str(path).startswith(cls._common_path_default + "/"):
full_ref_path = "%s/%s" % (cls._common_path_default, path)
return full_ref_path
@classmethod
def delete(cls, repo: "Repo", path: PathLike) -> None:
"""Delete the reference at the given path.
:param repo:
Repository to delete the reference from.
:param path:
Short or full path pointing to the reference, e.g. ``refs/myreference`` or
just ``myreference``, hence ``refs/`` is implied.
Alternatively the symbolic reference to be deleted.
"""
full_ref_path = cls.to_full_path(path)
abs_path = os.path.join(repo.common_dir, full_ref_path)
if os.path.exists(abs_path):
os.remove(abs_path)
else:
# Check packed refs.
pack_file_path = cls._get_packed_refs_path(repo)
try:
with open(pack_file_path, "rb") as reader:
new_lines = []
made_change = False
dropped_last_line = False
for line_bytes in reader:
line = line_bytes.decode(defenc)
_, _, line_ref = line.partition(" ")
line_ref = line_ref.strip()
# Keep line if it is a comment or if the ref to delete is not in
# the line.
# If we deleted the last line and this one is a tag-reference
# object, we drop it as well.
if (line.startswith("#") or full_ref_path != line_ref) and (
not dropped_last_line or dropped_last_line and not line.startswith("^")
):
new_lines.append(line)
dropped_last_line = False
continue
# END skip comments and lines without our path
# Drop this line.
made_change = True
dropped_last_line = True
# Write the new lines.
if made_change:
# Binary writing is required, otherwise Windows will open the file
# in text mode and change LF to CRLF!
with open(pack_file_path, "wb") as fd:
fd.writelines(line.encode(defenc) for line in new_lines)
except OSError:
pass # It didn't exist at all.
# Delete the reflog.
reflog_path = RefLog.path(cls(repo, full_ref_path))
if os.path.isfile(reflog_path):
os.remove(reflog_path)
# END remove reflog
@classmethod
def _create(
cls: Type[T_References],
repo: "Repo",
path: PathLike,
resolve: bool,
reference: Union["SymbolicReference", str],
force: bool,
logmsg: Union[str, None] = None,
) -> T_References:
"""Internal method used to create a new symbolic reference.
If `resolve` is ``False``, the reference will be taken as is, creating a proper
symbolic reference. Otherwise it will be resolved to the corresponding object
and a detached symbolic reference will be created instead.
"""
git_dir = _git_dir(repo, path)
full_ref_path = cls.to_full_path(path)
abs_ref_path = os.path.join(git_dir, full_ref_path)
# Figure out target data.
target = reference
if resolve:
target = repo.rev_parse(str(reference))
if not force and os.path.isfile(abs_ref_path):
target_data = str(target)
if isinstance(target, SymbolicReference):
target_data = str(target.path)
if not resolve:
target_data = "ref: " + target_data
with open(abs_ref_path, "rb") as fd:
existing_data = fd.read().decode(defenc).strip()
if existing_data != target_data:
raise OSError(
"Reference at %r does already exist, pointing to %r, requested was %r"
% (full_ref_path, existing_data, target_data)
)
# END no force handling
ref = cls(repo, full_ref_path)
ref.set_reference(target, logmsg)
return ref
@classmethod
def create(
cls: Type[T_References],
repo: "Repo",
path: PathLike,
reference: Union["SymbolicReference", str] = "HEAD",
logmsg: Union[str, None] = None,
force: bool = False,
**kwargs: Any,
) -> T_References:
"""Create a new symbolic reference: a reference pointing to another reference.
:param repo:
Repository to create the reference in.
:param path:
Full path at which the new symbolic reference is supposed to be created at,
e.g. ``NEW_HEAD`` or ``symrefs/my_new_symref``.
:param reference:
The reference which the new symbolic reference should point to.
If it is a commit-ish, the symbolic ref will be detached.
:param force:
If ``True``, force creation even if a symbolic reference with that name
already exists. Raise :exc:`OSError` otherwise.
:param logmsg:
If not ``None``, the message to append to the reflog.
If ``None``, no reflog entry is written.
:return:
Newly created symbolic reference
:raise OSError:
If a (Symbolic)Reference with the same name but different contents already
exists.
:note:
This does not alter the current HEAD, index or working tree.
"""
return cls._create(repo, path, cls._resolve_ref_on_create, reference, force, logmsg)
def rename(self, new_path: PathLike, force: bool = False) -> "SymbolicReference":
"""Rename self to a new path.
:param new_path:
Either a simple name or a full path, e.g. ``new_name`` or
``features/new_name``.
The prefix ``refs/`` is implied for references and will be set as needed.
In case this is a symbolic ref, there is no implied prefix.
:param force:
If ``True``, the rename will succeed even if a head with the target name
already exists. It will be overwritten in that case.
:return:
self
:raise OSError:
If a file at path but with different contents already exists.
"""
new_path = self.to_full_path(new_path)
if self.path == new_path:
return self
new_abs_path = os.path.join(_git_dir(self.repo, new_path), new_path)
cur_abs_path = os.path.join(_git_dir(self.repo, self.path), self.path)
if os.path.isfile(new_abs_path):
if not force:
# If they point to the same file, it's not an error.
with open(new_abs_path, "rb") as fd1:
f1 = fd1.read().strip()
with open(cur_abs_path, "rb") as fd2:
f2 = fd2.read().strip()
if f1 != f2:
raise OSError("File at path %r already exists" % new_abs_path)
# else: We could remove ourselves and use the other one, but...
# ...for clarity, we just continue as usual.
# END not force handling
os.remove(new_abs_path)
# END handle existing target file
dname = os.path.dirname(new_abs_path)
if not os.path.isdir(dname):
os.makedirs(dname)
# END create directory
os.rename(cur_abs_path, new_abs_path)
self.path = new_path
return self
@classmethod
def _iter_items(
cls: Type[T_References], repo: "Repo", common_path: Union[PathLike, None] = None
) -> Iterator[T_References]:
if common_path is None:
common_path = cls._common_path_default
rela_paths = set()
# Walk loose refs.
# Currently we do not follow links.
for root, dirs, files in os.walk(join_path_native(repo.common_dir, common_path)):
if "refs" not in root.split(os.sep): # Skip non-refs subfolders.
refs_id = [d for d in dirs if d == "refs"]
if refs_id:
dirs[0:] = ["refs"]
# END prune non-refs folders
for f in files:
if f == "packed-refs":
continue
abs_path = to_native_path_linux(join_path(root, f))
rela_paths.add(abs_path.replace(to_native_path_linux(repo.common_dir) + "/", ""))
# END for each file in root directory
# END for each directory to walk
# Read packed refs.
for _sha, rela_path in cls._iter_packed_refs(repo):
if rela_path.startswith(str(common_path)):
rela_paths.add(rela_path)
# END relative path matches common path
# END packed refs reading
# Yield paths in sorted order.
for path in sorted(rela_paths):
try:
yield cls.from_path(repo, path)
except ValueError:
continue
# END for each sorted relative refpath
@classmethod
def iter_items(
cls: Type[T_References],
repo: "Repo",
common_path: Union[PathLike, None] = None,
*args: Any,
**kwargs: Any,
) -> Iterator[T_References]:
"""Find all refs in the repository.
:param repo:
The :class:`~git.repo.base.Repo`.
:param common_path:
Optional keyword argument to the path which is to be shared by all returned
Ref objects.
Defaults to class specific portion if ``None``, ensuring that only refs
suitable for the actual class are returned.
:return:
A list of :class:`SymbolicReference`, each guaranteed to be a symbolic ref
which is not detached and pointing to a valid ref.
The list is lexicographically sorted. The returned objects are instances of
concrete subclasses, such as :class:`~git.refs.head.Head` or
:class:`~git.refs.tag.TagReference`.
"""
return (r for r in cls._iter_items(repo, common_path) if r.__class__ is SymbolicReference or not r.is_detached)
@classmethod
def from_path(cls: Type[T_References], repo: "Repo", path: PathLike) -> T_References:
"""Make a symbolic reference from a path.
:param path:
Full ``.git``-directory-relative path name to the Reference to instantiate.
:note:
Use :meth:`to_full_path` if you only have a partial path of a known
Reference type.
:return:
Instance of type :class:`~git.refs.reference.Reference`,
:class:`~git.refs.head.Head`, or :class:`~git.refs.tag.Tag`, depending on
the given path.
"""
if not path:
raise ValueError("Cannot create Reference from %r" % path)
# Names like HEAD are inserted after the refs module is imported - we have an
# import dependency cycle and don't want to import these names in-function.
from . import HEAD, Head, RemoteReference, TagReference, Reference
for ref_type in (
HEAD,
Head,
RemoteReference,
TagReference,
Reference,
SymbolicReference,
):
try:
instance: T_References
instance = ref_type(repo, path)
if instance.__class__ is SymbolicReference and instance.is_detached:
raise ValueError("SymbolicRef was detached, we drop it")
else:
return instance
except ValueError:
pass
# END exception handling
# END for each type to try
raise ValueError("Could not find reference type suitable to handle path %r" % path)
def is_remote(self) -> bool:
""":return: True if this symbolic reference points to a remote branch"""
return str(self.path).startswith(self._remote_common_path_default + "/")

View File

@@ -0,0 +1,155 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Provides a :class:`~git.refs.reference.Reference`-based type for lightweight tags.
This defines the :class:`TagReference` class (and its alias :class:`Tag`), which
represents lightweight tags. For annotated tags (which are git objects), see the
:mod:`git.objects.tag` module.
"""
__all__ = ["TagReference", "Tag"]
from .reference import Reference
# typing ------------------------------------------------------------------
from typing import Any, TYPE_CHECKING, Type, Union
from git.types import AnyGitObject, PathLike
if TYPE_CHECKING:
from git.objects import Commit, TagObject
from git.refs import SymbolicReference
from git.repo import Repo
# ------------------------------------------------------------------------------
class TagReference(Reference):
"""A lightweight tag reference which either points to a commit, a tag object or any
other object. In the latter case additional information, like the signature or the
tag-creator, is available.
This tag object will always point to a commit object, but may carry additional
information in a tag object::
tagref = TagReference.list_items(repo)[0]
print(tagref.commit.message)
if tagref.tag is not None:
print(tagref.tag.message)
"""
__slots__ = ()
_common_default = "tags"
_common_path_default = Reference._common_path_default + "/" + _common_default
@property
def commit(self) -> "Commit": # type: ignore[override] # LazyMixin has unrelated commit method
""":return: Commit object the tag ref points to
:raise ValueError:
If the tag points to a tree or blob.
"""
obj = self.object
while obj.type != "commit":
if obj.type == "tag":
# It is a tag object which carries the commit as an object - we can point to anything.
obj = obj.object
else:
raise ValueError(
(
"Cannot resolve commit as tag %s points to a %s object - "
+ "use the `.object` property instead to access it"
)
% (self, obj.type)
)
return obj
@property
def tag(self) -> Union["TagObject", None]:
"""
:return:
Tag object this tag ref points to, or ``None`` in case we are a lightweight
tag
"""
obj = self.object
if obj.type == "tag":
return obj
return None
# Make object read-only. It should be reasonably hard to adjust an existing tag.
@property
def object(self) -> AnyGitObject: # type: ignore[override]
return Reference._get_object(self)
@classmethod
def create(
cls: Type["TagReference"],
repo: "Repo",
path: PathLike,
reference: Union[str, "SymbolicReference"] = "HEAD",
logmsg: Union[str, None] = None,
force: bool = False,
**kwargs: Any,
) -> "TagReference":
"""Create a new tag reference.
:param repo:
The :class:`~git.repo.base.Repo` to create the tag in.
:param path:
The name of the tag, e.g. ``1.0`` or ``releases/1.0``.
The prefix ``refs/tags`` is implied.
:param reference:
A reference to the :class:`~git.objects.base.Object` you want to tag.
The referenced object can be a commit, tree, or blob.
:param logmsg:
If not ``None``, the message will be used in your tag object. This will also
create an additional tag object that allows to obtain that information,
e.g.::
tagref.tag.message
:param message:
Synonym for the `logmsg` parameter. Included for backwards compatibility.
`logmsg` takes precedence if both are passed.
:param force:
If ``True``, force creation of a tag even though that tag already exists.
:param kwargs:
Additional keyword arguments to be passed to :manpage:`git-tag(1)`.
:return:
A new :class:`TagReference`.
"""
if "ref" in kwargs and kwargs["ref"]:
reference = kwargs["ref"]
if "message" in kwargs and kwargs["message"]:
kwargs["m"] = kwargs["message"]
del kwargs["message"]
if logmsg:
kwargs["m"] = logmsg
if force:
kwargs["f"] = True
args = (path, reference)
repo.git.tag(*args, **kwargs)
return TagReference(repo, "%s/%s" % (cls._common_path_default, path))
@classmethod
def delete(cls, repo: "Repo", *tags: "TagReference") -> None: # type: ignore[override]
"""Delete the given existing tag or tags."""
repo.git.tag("-d", *tags)
# Provide an alias.
Tag = TagReference

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,8 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""Initialize the repo package."""
__all__ = ["Repo"]
from .base import Repo

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,419 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
"""General repository-related functions."""
from __future__ import annotations
__all__ = [
"rev_parse",
"is_git_dir",
"touch",
"find_submodule_git_dir",
"name_to_object",
"short_to_long",
"deref_tag",
"to_commit",
"find_worktree_git_dir",
]
import os
import os.path as osp
from pathlib import Path
import stat
from string import digits
from gitdb.exc import BadName, BadObject
from git.cmd import Git
from git.exc import WorkTreeRepositoryUnsupported
from git.objects import Object
from git.refs import SymbolicReference
from git.util import cygpath, bin_to_hex, hex_to_bin
# Typing ----------------------------------------------------------------------
from typing import Optional, TYPE_CHECKING, Union, cast, overload
from git.types import AnyGitObject, Literal, PathLike
if TYPE_CHECKING:
from git.db import GitCmdObjectDB
from git.objects import Commit, TagObject
from git.refs.reference import Reference
from git.refs.tag import Tag
from .base import Repo
# ----------------------------------------------------------------------------
def touch(filename: str) -> str:
with open(filename, "ab"):
pass
return filename
def is_git_dir(d: PathLike) -> bool:
"""This is taken from the git setup.c:is_git_directory function.
:raise git.exc.WorkTreeRepositoryUnsupported:
If it sees a worktree directory. It's quite hacky to do that here, but at least
clearly indicates that we don't support it. There is the unlikely danger to
throw if we see directories which just look like a worktree dir, but are none.
"""
if osp.isdir(d):
if (osp.isdir(osp.join(d, "objects")) or "GIT_OBJECT_DIRECTORY" in os.environ) and osp.isdir(
osp.join(d, "refs")
):
headref = osp.join(d, "HEAD")
return osp.isfile(headref) or (osp.islink(headref) and os.readlink(headref).startswith("refs"))
elif (
osp.isfile(osp.join(d, "gitdir"))
and osp.isfile(osp.join(d, "commondir"))
and osp.isfile(osp.join(d, "gitfile"))
):
raise WorkTreeRepositoryUnsupported(d)
return False
def find_worktree_git_dir(dotgit: PathLike) -> Optional[str]:
"""Search for a gitdir for this worktree."""
try:
statbuf = os.stat(dotgit)
except OSError:
return None
if not stat.S_ISREG(statbuf.st_mode):
return None
try:
lines = Path(dotgit).read_text().splitlines()
for key, value in [line.strip().split(": ") for line in lines]:
if key == "gitdir":
return value
except ValueError:
pass
return None
def find_submodule_git_dir(d: PathLike) -> Optional[PathLike]:
"""Search for a submodule repo."""
if is_git_dir(d):
return d
try:
with open(d) as fp:
content = fp.read().rstrip()
except IOError:
# It's probably not a file.
pass
else:
if content.startswith("gitdir: "):
path = content[8:]
if Git.is_cygwin():
# Cygwin creates submodules prefixed with `/cygdrive/...`.
# Cygwin git understands Cygwin paths much better than Windows ones.
# Also the Cygwin tests are assuming Cygwin paths.
path = cygpath(path)
if not osp.isabs(path):
path = osp.normpath(osp.join(osp.dirname(d), path))
return find_submodule_git_dir(path)
# END handle exception
return None
def short_to_long(odb: "GitCmdObjectDB", hexsha: str) -> Optional[bytes]:
"""
:return:
Long hexadecimal sha1 from the given less than 40 byte hexsha, or ``None`` if no
candidate could be found.
:param hexsha:
hexsha with less than 40 bytes.
"""
try:
return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
except BadObject:
return None
# END exception handling
@overload
def name_to_object(repo: "Repo", name: str, return_ref: Literal[False] = ...) -> AnyGitObject: ...
@overload
def name_to_object(repo: "Repo", name: str, return_ref: Literal[True]) -> Union[AnyGitObject, SymbolicReference]: ...
def name_to_object(repo: "Repo", name: str, return_ref: bool = False) -> Union[AnyGitObject, SymbolicReference]:
"""
:return:
Object specified by the given name - hexshas (short and long) as well as
references are supported.
:param return_ref:
If ``True``, and name specifies a reference, we will return the reference
instead of the object. Otherwise it will raise :exc:`~gitdb.exc.BadObject` or
:exc:`~gitdb.exc.BadName`.
"""
hexsha: Union[None, str, bytes] = None
# Is it a hexsha? Try the most common ones, which is 7 to 40.
if repo.re_hexsha_shortened.match(name):
if len(name) != 40:
# Find long sha for short sha.
hexsha = short_to_long(repo.odb, name)
else:
hexsha = name
# END handle short shas
# END find sha if it matches
# If we couldn't find an object for what seemed to be a short hexsha, try to find it
# as reference anyway, it could be named 'aaa' for instance.
if hexsha is None:
for base in (
"%s",
"refs/%s",
"refs/tags/%s",
"refs/heads/%s",
"refs/remotes/%s",
"refs/remotes/%s/HEAD",
):
try:
hexsha = SymbolicReference.dereference_recursive(repo, base % name)
if return_ref:
return SymbolicReference(repo, base % name)
# END handle symbolic ref
break
except ValueError:
pass
# END for each base
# END handle hexsha
# Didn't find any ref, this is an error.
if return_ref:
raise BadObject("Couldn't find reference named %r" % name)
# END handle return ref
# Tried everything ? fail.
if hexsha is None:
raise BadName(name)
# END assert hexsha was found
return Object.new_from_sha(repo, hex_to_bin(hexsha))
def deref_tag(tag: "Tag") -> AnyGitObject:
"""Recursively dereference a tag and return the resulting object."""
while True:
try:
tag = tag.object
except AttributeError:
break
# END dereference tag
return tag
def to_commit(obj: Object) -> "Commit":
"""Convert the given object to a commit if possible and return it."""
if obj.type == "tag":
obj = deref_tag(obj)
if obj.type != "commit":
raise ValueError("Cannot convert object %r to type commit" % obj)
# END verify type
return obj
def rev_parse(repo: "Repo", rev: str) -> AnyGitObject:
"""Parse a revision string. Like :manpage:`git-rev-parse(1)`.
:return:
`~git.objects.base.Object` at the given revision.
This may be any type of git object:
* :class:`Commit <git.objects.commit.Commit>`
* :class:`TagObject <git.objects.tag.TagObject>`
* :class:`Tree <git.objects.tree.Tree>`
* :class:`Blob <git.objects.blob.Blob>`
:param rev:
:manpage:`git-rev-parse(1)`-compatible revision specification as string.
Please see :manpage:`git-rev-parse(1)` for details.
:raise gitdb.exc.BadObject:
If the given revision could not be found.
:raise ValueError:
If `rev` couldn't be parsed.
:raise IndexError:
If an invalid reflog index is specified.
"""
# Are we in colon search mode?
if rev.startswith(":/"):
# Colon search mode
raise NotImplementedError("commit by message search (regex)")
# END handle search
obj: Optional[AnyGitObject] = None
ref = None
output_type = "commit"
start = 0
parsed_to = 0
lr = len(rev)
while start < lr:
if rev[start] not in "^~:@":
start += 1
continue
# END handle start
token = rev[start]
if obj is None:
# token is a rev name.
if start == 0:
ref = repo.head.ref
else:
if token == "@":
ref = cast("Reference", name_to_object(repo, rev[:start], return_ref=True))
else:
obj = name_to_object(repo, rev[:start])
# END handle token
# END handle refname
else:
if ref is not None:
obj = cast("Commit", ref.commit)
# END handle ref
# END initialize obj on first token
start += 1
# Try to parse {type}.
if start < lr and rev[start] == "{":
end = rev.find("}", start)
if end == -1:
raise ValueError("Missing closing brace to define type in %s" % rev)
output_type = rev[start + 1 : end] # Exclude brace.
# Handle type.
if output_type == "commit":
pass # Default.
elif output_type == "tree":
try:
obj = cast(AnyGitObject, obj)
obj = to_commit(obj).tree
except (AttributeError, ValueError):
pass # Error raised later.
# END exception handling
elif output_type in ("", "blob"):
obj = cast("TagObject", obj)
if obj and obj.type == "tag":
obj = deref_tag(obj)
else:
# Cannot do anything for non-tags.
pass
# END handle tag
elif token == "@":
# try single int
assert ref is not None, "Require Reference to access reflog"
revlog_index = None
try:
# Transform reversed index into the format of our revlog.
revlog_index = -(int(output_type) + 1)
except ValueError as e:
# TODO: Try to parse the other date options, using parse_date maybe.
raise NotImplementedError("Support for additional @{...} modes not implemented") from e
# END handle revlog index
try:
entry = ref.log_entry(revlog_index)
except IndexError as e:
raise IndexError("Invalid revlog index: %i" % revlog_index) from e
# END handle index out of bound
obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))
# Make it pass the following checks.
output_type = ""
else:
raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev))
# END handle output type
# Empty output types don't require any specific type, its just about
# dereferencing tags.
if output_type and obj and obj.type != output_type:
raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type))
# END verify output type
start = end + 1 # Skip brace.
parsed_to = start
continue
# END parse type
# Try to parse a number.
num = 0
if token != ":":
found_digit = False
while start < lr:
if rev[start] in digits:
num = num * 10 + int(rev[start])
start += 1
found_digit = True
else:
break
# END handle number
# END number parse loop
# No explicit number given, 1 is the default. It could be 0 though.
if not found_digit:
num = 1
# END set default num
# END number parsing only if non-blob mode
parsed_to = start
# Handle hierarchy walk.
try:
obj = cast(AnyGitObject, obj)
if token == "~":
obj = to_commit(obj)
for _ in range(num):
obj = obj.parents[0]
# END for each history item to walk
elif token == "^":
obj = to_commit(obj)
# Must be n'th parent.
if num:
obj = obj.parents[num - 1]
elif token == ":":
if obj.type != "tree":
obj = obj.tree
# END get tree type
obj = obj[rev[start:]]
parsed_to = lr
else:
raise ValueError("Invalid token: %r" % token)
# END end handle tag
except (IndexError, AttributeError) as e:
raise BadName(
f"Invalid revision spec '{rev}' - not enough " f"parent commits to reach '{token}{int(num)}'"
) from e
# END exception handling
# END parse loop
# Still no obj? It's probably a simple name.
if obj is None:
obj = name_to_object(repo, rev)
parsed_to = lr
# END handle simple name
if obj is None:
raise ValueError("Revision specifier could not be parsed: %s" % rev)
if parsed_to != lr:
raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))
return obj

View File

@@ -0,0 +1,285 @@
# This module is part of GitPython and is released under the
# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/
import os
import sys
from typing import (
Any,
Callable,
Dict,
List,
NoReturn,
Optional,
Sequence as Sequence,
Tuple,
TYPE_CHECKING,
Type,
TypeVar,
Union,
)
import warnings
if sys.version_info >= (3, 8):
from typing import (
Literal,
Protocol,
SupportsIndex as SupportsIndex,
TypedDict,
runtime_checkable,
)
else:
from typing_extensions import (
Literal,
Protocol,
SupportsIndex as SupportsIndex,
TypedDict,
runtime_checkable,
)
if TYPE_CHECKING:
from git.objects import Commit, Tree, TagObject, Blob
from git.repo import Repo
PathLike = Union[str, "os.PathLike[str]"]
"""A :class:`str` (Unicode) based file or directory path."""
TBD = Any
"""Alias of :class:`~typing.Any`, when a type hint is meant to become more specific."""
_T = TypeVar("_T")
"""Type variable used internally in GitPython."""
AnyGitObject = Union["Commit", "Tree", "TagObject", "Blob"]
"""Union of the :class:`~git.objects.base.Object`-based types that represent actual git
object types.
As noted in :class:`~git.objects.base.Object`, which has further details, these are:
* :class:`Blob <git.objects.blob.Blob>`
* :class:`Tree <git.objects.tree.Tree>`
* :class:`Commit <git.objects.commit.Commit>`
* :class:`TagObject <git.objects.tag.TagObject>`
Those GitPython classes represent the four git object types, per
:manpage:`gitglossary(7)`:
* "blob": https://git-scm.com/docs/gitglossary#def_blob_object
* "tree object": https://git-scm.com/docs/gitglossary#def_tree_object
* "commit object": https://git-scm.com/docs/gitglossary#def_commit_object
* "tag object": https://git-scm.com/docs/gitglossary#def_tag_object
For more general information on git objects and their types as git understands them:
* "object": https://git-scm.com/docs/gitglossary#def_object
* "object type": https://git-scm.com/docs/gitglossary#def_object_type
:note:
See also the :class:`Tree_ish` and :class:`Commit_ish` unions.
"""
Tree_ish = Union["Commit", "Tree", "TagObject"]
"""Union of :class:`~git.objects.base.Object`-based types that are typically tree-ish.
See :manpage:`gitglossary(7)` on "tree-ish":
https://git-scm.com/docs/gitglossary#def_tree-ish
:note:
:class:`~git.objects.tree.Tree` and :class:`~git.objects.commit.Commit` are the
classes whose instances are all tree-ish. This union includes them, but also
:class:`~git.objects.tag.TagObject`, only **most** of whose instances are tree-ish.
Whether a particular :class:`~git.objects.tag.TagObject` peels (recursively
dereferences) to a tree or commit, rather than a blob, can in general only be known
at runtime. In practice, git tag objects are nearly always used for tagging commits,
and such tags are tree-ish because commits are tree-ish.
:note:
See also the :class:`AnyGitObject` union of all four classes corresponding to git
object types.
"""
Commit_ish = Union["Commit", "TagObject"]
"""Union of :class:`~git.objects.base.Object`-based types that are typically commit-ish.
See :manpage:`gitglossary(7)` on "commit-ish":
https://git-scm.com/docs/gitglossary#def_commit-ish
:note:
:class:`~git.objects.commit.Commit` is the only class whose instances are all
commit-ish. This union type includes :class:`~git.objects.commit.Commit`, but also
:class:`~git.objects.tag.TagObject`, only **most** of whose instances are
commit-ish. Whether a particular :class:`~git.objects.tag.TagObject` peels
(recursively dereferences) to a commit, rather than a tree or blob, can in general
only be known at runtime. In practice, git tag objects are nearly always used for
tagging commits, and such tags are of course commit-ish.
:note:
See also the :class:`AnyGitObject` union of all four classes corresponding to git
object types.
"""
GitObjectTypeString = Literal["commit", "tag", "blob", "tree"]
"""Literal strings identifying git object types and the
:class:`~git.objects.base.Object`-based types that represent them.
See the :attr:`Object.type <git.objects.base.Object.type>` attribute. These are its
values in :class:`~git.objects.base.Object` subclasses that represent git objects. These
literals therefore correspond to the types in the :class:`AnyGitObject` union.
These are the same strings git itself uses to identify its four object types.
See :manpage:`gitglossary(7)` on "object type":
https://git-scm.com/docs/gitglossary#def_object_type
"""
Lit_commit_ish: Type[Literal["commit", "tag"]]
"""Deprecated. Type of literal strings identifying typically-commitish git object types.
Prior to a bugfix, this type had been defined more broadly. Any usage is in practice
ambiguous and likely to be incorrect. This type has therefore been made a static type
error to appear in annotations. It is preserved, with a deprecated status, to avoid
introducing runtime errors in code that refers to it, but it should not be used.
Instead of this type:
* For the type of the string literals associated with :class:`Commit_ish`, use
``Literal["commit", "tag"]`` or create a new type alias for it. That is equivalent to
this type as currently defined (but usable in statically checked type annotations).
* For the type of all four string literals associated with :class:`AnyGitObject`, use
:class:`GitObjectTypeString`. That is equivalent to the old definition of this type
prior to the bugfix (and is also usable in statically checked type annotations).
"""
def _getattr(name: str) -> Any:
if name != "Lit_commit_ish":
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
warnings.warn(
"Lit_commit_ish is deprecated. It is currently defined as "
'`Literal["commit", "tag"]`, which should be used in its place if desired. It '
'had previously been defined as `Literal["commit", "tag", "blob", "tree"]`, '
"covering all four git object type strings including those that are never "
"commit-ish. For that, use the GitObjectTypeString type instead.",
DeprecationWarning,
stacklevel=2,
)
return Literal["commit", "tag"]
if not TYPE_CHECKING: # Preserve static checking for undefined/misspelled attributes.
__getattr__ = _getattr
def __dir__() -> List[str]:
return [*globals(), "Lit_commit_ish"]
# Config_levels ---------------------------------------------------------
Lit_config_levels = Literal["system", "global", "user", "repository"]
"""Type of literal strings naming git configuration levels.
These strings relate to which file a git configuration variable is in.
"""
ConfigLevels_Tup = Tuple[Literal["system"], Literal["user"], Literal["global"], Literal["repository"]]
"""Static type of a tuple of the four strings representing configuration levels."""
# Progress parameter type alias -----------------------------------------
CallableProgress = Optional[Callable[[int, Union[str, float], Union[str, float, None], str], None]]
"""General type of a function or other callable used as a progress reporter for cloning.
This is the type of a function or other callable that reports the progress of a clone,
when passed as a ``progress`` argument to :meth:`Repo.clone <git.repo.base.Repo.clone>`
or :meth:`Repo.clone_from <git.repo.base.Repo.clone_from>`.
:note:
Those :meth:`~git.repo.base.Repo.clone` and :meth:`~git.repo.base.Repo.clone_from`
methods also accept :meth:`~git.util.RemoteProgress` instances, including instances
of its :meth:`~git.util.CallableRemoteProgress` subclass.
:note:
Unlike objects that match this type, :meth:`~git.util.RemoteProgress` instances are
not directly callable, not even when they are instances of
:meth:`~git.util.CallableRemoteProgress`, which wraps a callable and forwards
information to it but is not itself callable.
:note:
This type also allows ``None``, for cloning without reporting progress.
"""
# -----------------------------------------------------------------------------------
def assert_never(inp: NoReturn, raise_error: bool = True, exc: Union[Exception, None] = None) -> None:
"""For use in exhaustive checking of a literal or enum in if/else chains.
A call to this function should only be reached if not all members are handled, or if
an attempt is made to pass non-members through the chain.
:param inp:
If all members are handled, the argument for `inp` will have the
:class:`~typing.Never`/:class:`~typing.NoReturn` type.
Otherwise, the type will mismatch and cause a mypy error.
:param raise_error:
If ``True``, will also raise :exc:`ValueError` with a general
"unhandled literal" message, or the exception object passed as `exc`.
:param exc:
It not ``None``, this should be an already-constructed exception object, to be
raised if `raise_error` is ``True``.
"""
if raise_error:
if exc is None:
raise ValueError(f"An unhandled literal ({inp!r}) in an if/else chain was found")
else:
raise exc
class Files_TD(TypedDict):
"""Dictionary with stat counts for the diff of a particular file.
For the :class:`~git.util.Stats.files` attribute of :class:`~git.util.Stats`
objects.
"""
insertions: int
deletions: int
lines: int
change_type: str
class Total_TD(TypedDict):
"""Dictionary with total stats from any number of files.
For the :class:`~git.util.Stats.total` attribute of :class:`~git.util.Stats`
objects.
"""
insertions: int
deletions: int
lines: int
files: int
class HSH_TD(TypedDict):
"""Dictionary carrying the same information as a :class:`~git.util.Stats` object."""
total: Total_TD
files: Dict[PathLike, Files_TD]
@runtime_checkable
class Has_Repo(Protocol):
"""Protocol for having a :attr:`repo` attribute, the repository to operate on."""
repo: "Repo"
@runtime_checkable
class Has_id_attribute(Protocol):
"""Protocol for having :attr:`_id_attribute_` used in iteration and traversal."""
_id_attribute_: str

File diff suppressed because it is too large Load Diff