Mise à jour de Monitor.py et autres scripts

This commit is contained in:
Debian
2025-07-23 10:46:27 +02:00
parent 7081418ce0
commit 7de3e0fb50
8604 changed files with 2789953 additions and 295 deletions

View File

@@ -0,0 +1,28 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from streamlit.watcher.local_sources_watcher import LocalSourcesWatcher
from streamlit.watcher.path_watcher import (
report_watchdog_availability,
watch_dir,
watch_file,
)
__all__ = [
"LocalSourcesWatcher",
"report_watchdog_availability",
"watch_dir",
"watch_file",
]

View File

@@ -0,0 +1,406 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Declares the EventBasedPathWatcher class, which watches given paths in the file system.
How these classes work together
-------------------------------
- EventBasedPathWatcher : each instance of this is able to watch a single
file or directory at a given path so long as there's a browser interested in
it. This uses _MultiPathWatcher to watch paths.
- _MultiPathWatcher : singleton that watches multiple paths. It does this by
holding a watchdog.observer.Observer object, and manages several
_FolderEventHandler instances. This creates _FolderEventHandlers as needed,
if the required folder is not already being watched. And it also tells
existing _FolderEventHandlers which paths it should be watching for.
- _FolderEventHandler : event handler for when a folder is modified. You can
register paths in that folder that you're interested in. Then this object
listens to folder events, sees if registered paths changed, and fires
callbacks if so.
This module is lazy-loaded and used only if watchdog is installed.
"""
from __future__ import annotations
import os
import threading
from typing import TYPE_CHECKING, Callable, Final, cast
from blinker import ANY, Signal
from typing_extensions import Self
from watchdog import events
from watchdog.observers import Observer
from streamlit.logger import get_logger
from streamlit.util import repr_
from streamlit.watcher import util
if TYPE_CHECKING:
from watchdog.observers.api import ObservedWatch
_LOGGER: Final = get_logger(__name__)
def _get_abs_folder_path(path: str) -> str:
"""Get the absolute folder path for a given path.
If the path is a directory, return the absolute path.
Otherwise, return the absolute path of the parent directory.
"""
return os.path.abspath(path if os.path.isdir(path) else os.path.dirname(path))
class EventBasedPathWatcher:
"""Watches a single path on disk using watchdog."""
@staticmethod
def close_all() -> None:
"""Close the _MultiPathWatcher singleton."""
path_watcher = _MultiPathWatcher.get_singleton()
path_watcher.close()
_LOGGER.debug("Watcher closed")
def __init__(
self,
path: str,
on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> None:
"""Constructor for EventBasedPathWatchers.
Parameters
----------
path : str
The path to watch.
on_changed : Callable[[str], None]
Callback to call when the path changes.
glob_pattern : str or None
A glob pattern to filter the files in a directory that should be
watched. Only relevant when creating an EventBasedPathWatcher on a
directory.
allow_nonexistent : bool
If True, the watcher will not raise an exception if the path does
not exist. This can be used to watch for the creation of a file or
directory at a given path.
"""
self._path = os.path.abspath(path)
self._on_changed = on_changed
path_watcher = _MultiPathWatcher.get_singleton()
path_watcher.watch_path(
self._path,
on_changed,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
_LOGGER.debug("Watcher created for %s", self._path)
def __repr__(self) -> str:
return repr_(self)
def close(self) -> None:
"""Stop watching the path corresponding to this EventBasedPathWatcher."""
path_watcher = _MultiPathWatcher.get_singleton()
path_watcher.stop_watching_path(self._path, self._on_changed)
class _MultiPathWatcher:
"""Watches multiple paths."""
_singleton: _MultiPathWatcher | None = None
@classmethod
def get_singleton(cls) -> _MultiPathWatcher:
"""Return the singleton _MultiPathWatcher object.
Instantiates one if necessary.
"""
if cls._singleton is None:
_LOGGER.debug("No singleton. Registering one.")
_MultiPathWatcher()
return cast("_MultiPathWatcher", _MultiPathWatcher._singleton)
# Don't allow constructor to be called more than once.
def __new__(cls) -> Self:
"""Constructor."""
if _MultiPathWatcher._singleton is not None:
raise RuntimeError("Use .get_singleton() instead")
return super().__new__(cls)
def __init__(self) -> None:
"""Constructor."""
_MultiPathWatcher._singleton = self
# Map of folder_to_watch -> _FolderEventHandler.
self._folder_handlers: dict[str, _FolderEventHandler] = {}
# Used for mutation of _folder_handlers dict
self._lock = threading.Lock()
# The Observer object from the Watchdog module. Since this class is
# only instantiated once, we only have a single Observer in Streamlit,
# and it's in charge of watching all paths we're interested in.
self._observer = Observer()
self._observer.start() # Start observer thread.
def __repr__(self) -> str:
return repr_(self)
def watch_path(
self,
path: str,
callback: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> None:
"""Start watching a path."""
folder_path = _get_abs_folder_path(path)
with self._lock:
folder_handler = self._folder_handlers.get(folder_path)
if folder_handler is None:
folder_handler = _FolderEventHandler()
self._folder_handlers[folder_path] = folder_handler
folder_handler.watch = self._observer.schedule(
folder_handler, folder_path, recursive=True
)
folder_handler.add_path_change_listener(
path,
callback,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
def stop_watching_path(self, path: str, callback: Callable[[str], None]) -> None:
"""Stop watching a path."""
folder_path = _get_abs_folder_path(path)
with self._lock:
folder_handler = self._folder_handlers.get(folder_path)
if folder_handler is None:
_LOGGER.debug(
"Cannot stop watching path, because it is already not being "
"watched. %s",
folder_path,
)
return
folder_handler.remove_path_change_listener(path, callback)
if (
not folder_handler.is_watching_paths()
and folder_handler.watch is not None
):
self._observer.unschedule(folder_handler.watch)
del self._folder_handlers[folder_path]
def close(self) -> None:
with self._lock:
"""Close this _MultiPathWatcher object forever."""
if len(self._folder_handlers) != 0:
self._folder_handlers = {}
_LOGGER.debug(
"Stopping observer thread even though there is a non-zero "
"number of event observers!"
)
else:
_LOGGER.debug("Stopping observer thread")
self._observer.stop()
self._observer.join(timeout=5)
class WatchedPath:
"""Emits notifications when a single path is modified."""
def __init__(
self,
md5: str,
modification_time: float,
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
):
self.md5 = md5
self.modification_time = modification_time
self.glob_pattern = glob_pattern
self.allow_nonexistent = allow_nonexistent
self.on_changed = Signal()
def __repr__(self) -> str:
return repr_(self)
class _FolderEventHandler(events.FileSystemEventHandler):
"""Listen to folder events. If certain paths change, fire a callback.
The super class, FileSystemEventHandler, listens to changes to *folders*,
but we need to listen to changes to *both* folders and files. I believe
this is a limitation of the Mac FSEvents system API, and the watchdog
library takes the lower common denominator.
So in this class we watch for folder events and then filter them based
on whether or not we care for the path the event is about.
"""
def __init__(self) -> None:
super().__init__()
self._watched_paths: dict[str, WatchedPath] = {}
self._lock = threading.Lock() # for watched_paths mutations
self.watch: ObservedWatch | None = None
def __repr__(self) -> str:
return repr_(self)
def add_path_change_listener(
self,
path: str,
callback: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> None:
"""Add a path to this object's event filter."""
with self._lock:
watched_path = self._watched_paths.get(path, None)
if watched_path is None:
md5 = util.calc_md5_with_blocking_retries(
path,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
modification_time = util.path_modification_time(path, allow_nonexistent)
watched_path = WatchedPath(
md5=md5,
modification_time=modification_time,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
self._watched_paths[path] = watched_path
watched_path.on_changed.connect(callback, weak=False)
def remove_path_change_listener(
self, path: str, callback: Callable[[str], None]
) -> None:
"""Remove a path from this object's event filter."""
with self._lock:
watched_path = self._watched_paths.get(path, None)
if watched_path is None:
return
watched_path.on_changed.disconnect(callback)
if not watched_path.on_changed.has_receivers_for(ANY):
del self._watched_paths[path]
def is_watching_paths(self) -> bool:
"""Return true if this object has 1+ paths in its event filter."""
return len(self._watched_paths) > 0
def handle_path_change_event(self, event: events.FileSystemEvent) -> None:
"""Handle when a path (corresponding to a file or dir) is changed.
The events that can call this are modification, creation or moved
events.
"""
# Check for both modified and moved files, because many programs write
# to a backup file then rename (i.e. move) it.
if event.event_type == events.EVENT_TYPE_MODIFIED:
changed_path = event.src_path
elif event.event_type == events.EVENT_TYPE_MOVED:
# Teach mypy that this event has a dest_path, because it can't infer
# the desired subtype from the event_type check
event = cast("events.FileSystemMovedEvent", event)
_LOGGER.debug(
"Move event: src %s; dest %s", event.src_path, event.dest_path
)
changed_path = event.dest_path
# On OSX with VI, on save, the file is deleted, the swap file is
# modified and then the original file is created hence why we
# capture EVENT_TYPE_CREATED
elif event.event_type == events.EVENT_TYPE_CREATED:
changed_path = event.src_path
else:
_LOGGER.debug("Don't care about event type %s", event.event_type)
return
# Watchdog 5.X is supported Python >=3.9, so watchdog 4.X is used for Python 3.8.
# In Watchdog 5.X, the path can be bytes or str, but in Watchdog 4.X, the path is always str,
# that's why we convert the path to str, but we need to ignore the unreachable code warning for Python 3.8.
if isinstance(changed_path, bytes): # type: ignore[unreachable, unused-ignore]
changed_path = changed_path.decode("utf-8") # type: ignore[unreachable, unused-ignore]
abs_changed_path = os.path.abspath(changed_path)
changed_path_info = self._watched_paths.get(abs_changed_path, None)
if changed_path_info is None:
_LOGGER.debug(
"Ignoring changed path %s.\nWatched_paths: %s",
abs_changed_path,
self._watched_paths,
)
return
modification_time = util.path_modification_time(
abs_changed_path, changed_path_info.allow_nonexistent
)
# We add modification_time != 0.0 check since on some file systems (s3fs/fuse)
# modification_time is always 0.0 because of file system limitations.
if (
modification_time != 0.0
and modification_time == changed_path_info.modification_time
):
_LOGGER.debug("File/dir timestamp did not change: %s", abs_changed_path)
return
changed_path_info.modification_time = modification_time
new_md5 = util.calc_md5_with_blocking_retries(
abs_changed_path,
glob_pattern=changed_path_info.glob_pattern,
allow_nonexistent=changed_path_info.allow_nonexistent,
)
if new_md5 == changed_path_info.md5:
_LOGGER.debug("File/dir MD5 did not change: %s", abs_changed_path)
return
_LOGGER.debug("File/dir MD5 changed: %s", abs_changed_path)
changed_path_info.md5 = new_md5
changed_path_info.on_changed.send(abs_changed_path)
def on_created(self, event: events.FileSystemEvent) -> None:
self.handle_path_change_event(event)
def on_modified(self, event: events.FileSystemEvent) -> None:
self.handle_path_change_event(event)
def on_moved(self, event: events.FileSystemEvent) -> None:
self.handle_path_change_event(event)

View File

@@ -0,0 +1,82 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import os
from streamlit import config, file_util, util
# The files in the folders below should always be blacklisted.
DEFAULT_FOLDER_BLACKLIST = [
"**/.*",
"**/anaconda",
"**/anaconda2",
"**/anaconda3",
"**/dist-packages",
"**/miniconda",
"**/miniconda2",
"**/miniconda3",
"**/node_modules",
"**/pyenv",
"**/site-packages",
"**/venv",
"**/virtualenv",
"**/htmlcov",
]
class FolderBlackList:
"""Implement a black list object with globbing.
Note
----
Blacklist any path that matches a glob in `DEFAULT_FOLDER_BLACKLIST`.
"""
def __init__(self, folder_blacklist):
"""Constructor.
Parameters
----------
folder_blacklist : list of str
list of folder names with globbing to blacklist.
"""
self._folder_blacklist = list(folder_blacklist)
self._folder_blacklist.extend(DEFAULT_FOLDER_BLACKLIST)
# Add the Streamlit lib folder when in dev mode, since otherwise we end
# up with weird situations where the ID of a class in one run is not
# the same as in another run.
if config.get_option("global.developmentMode"):
self._folder_blacklist.append(os.path.dirname(__file__))
def __repr__(self) -> str:
return util.repr_(self)
def is_blacklisted(self, filepath):
"""Test if filepath is in the blacklist.
Parameters
----------
filepath : str
File path that we intend to test.
"""
return any(
file_util.file_is_in_folder_glob(filepath, blacklisted_folder)
for blacklisted_folder in self._folder_blacklist
)

View File

@@ -0,0 +1,233 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import os
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Final, NamedTuple
from streamlit import config, file_util
from streamlit.logger import get_logger
from streamlit.watcher.folder_black_list import FolderBlackList
from streamlit.watcher.path_watcher import (
NoOpPathWatcher,
get_default_path_watcher_class,
)
if TYPE_CHECKING:
from types import ModuleType
from streamlit.runtime.pages_manager import PagesManager
_LOGGER: Final = get_logger(__name__)
class WatchedModule(NamedTuple):
watcher: Any
module_name: Any
# This needs to be initialized lazily to avoid calling config.get_option() and
# thus initializing config options when this file is first imported.
PathWatcher = None
class LocalSourcesWatcher:
def __init__(self, pages_manager: PagesManager):
self._pages_manager = pages_manager
self._main_script_path = os.path.abspath(self._pages_manager.main_script_path)
self._script_folder = os.path.dirname(self._main_script_path)
self._on_file_changed: list[Callable[[str], None]] = []
self._is_closed = False
self._cached_sys_modules: set[str] = set()
# Blacklist for folders that should not be watched
self._folder_black_list = FolderBlackList(
config.get_option("server.folderWatchBlacklist")
)
self._watched_modules: dict[str, WatchedModule] = {}
self._watched_pages: set[str] = set()
self.update_watched_pages()
def update_watched_pages(self) -> None:
old_page_paths = self._watched_pages.copy()
new_pages_paths: set[str] = set()
for page_info in self._pages_manager.get_pages().values():
if not page_info["script_path"]:
continue
new_pages_paths.add(page_info["script_path"])
if page_info["script_path"] not in self._watched_pages:
self._register_watcher(
page_info["script_path"],
module_name=None,
)
for old_page_path in old_page_paths:
# Only remove pages that are no longer valid files
if old_page_path not in new_pages_paths and not os.path.isfile(
old_page_path
):
self._deregister_watcher(old_page_path)
self._watched_pages.remove(old_page_path)
self._watched_pages = self._watched_pages.union(new_pages_paths)
def register_file_change_callback(self, cb: Callable[[str], None]) -> None:
self._on_file_changed.append(cb)
def on_file_changed(self, filepath):
if filepath not in self._watched_modules:
_LOGGER.error("Received event for non-watched file: %s", filepath)
return
# Workaround:
# Delete all watched modules so we can guarantee changes to the
# updated module are reflected on reload.
#
# In principle, for reloading a given module, we only need to unload
# the module itself and all of the modules which import it (directly
# or indirectly) such that when we exec the application code, the
# changes are reloaded and reflected in the running application.
#
# However, determining all import paths for a given loaded module is
# non-trivial, and so as a workaround we simply unload all watched
# modules.
for wm in self._watched_modules.values():
if wm.module_name is not None and wm.module_name in sys.modules:
del sys.modules[wm.module_name]
for cb in self._on_file_changed:
cb(filepath)
def close(self):
for wm in self._watched_modules.values():
wm.watcher.close()
self._watched_modules = {}
self._watched_pages = set()
self._is_closed = True
def _register_watcher(self, filepath, module_name):
global PathWatcher
if PathWatcher is None:
PathWatcher = get_default_path_watcher_class()
if PathWatcher is NoOpPathWatcher:
return
try:
wm = WatchedModule(
watcher=PathWatcher(filepath, self.on_file_changed),
module_name=module_name,
)
except PermissionError:
# If you don't have permission to read this file, don't even add it
# to watchers.
return
self._watched_modules[filepath] = wm
def _deregister_watcher(self, filepath):
if filepath not in self._watched_modules:
return
if filepath == self._main_script_path:
return
wm = self._watched_modules[filepath]
wm.watcher.close()
del self._watched_modules[filepath]
def _file_is_new(self, filepath):
return filepath not in self._watched_modules
def _file_should_be_watched(self, filepath):
# Using short circuiting for performance.
return self._file_is_new(filepath) and (
file_util.file_is_in_folder_glob(filepath, self._script_folder)
or file_util.file_in_pythonpath(filepath)
)
def update_watched_modules(self):
if self._is_closed:
return
if set(sys.modules) != self._cached_sys_modules:
modules_paths = {
name: self._exclude_blacklisted_paths(get_module_paths(module))
for name, module in dict(sys.modules).items()
}
self._cached_sys_modules = set(sys.modules)
self._register_necessary_watchers(modules_paths)
def _register_necessary_watchers(self, module_paths: dict[str, set[str]]) -> None:
for name, paths in module_paths.items():
for path in paths:
if self._file_should_be_watched(path):
self._register_watcher(str(Path(path).resolve()), name)
def _exclude_blacklisted_paths(self, paths: set[str]) -> set[str]:
return {p for p in paths if not self._folder_black_list.is_blacklisted(p)}
def get_module_paths(module: ModuleType) -> set[str]:
paths_extractors = [
# https://docs.python.org/3/reference/datamodel.html
# __file__ is the pathname of the file from which the module was loaded
# if it was loaded from a file.
# The __file__ attribute may be missing for certain types of modules
lambda m: [m.__file__],
# https://docs.python.org/3/reference/import.html#__spec__
# The __spec__ attribute is set to the module spec that was used
# when importing the module. one exception is __main__,
# where __spec__ is set to None in some cases.
# https://www.python.org/dev/peps/pep-0451/#id16
# "origin" in an import context means the system
# (or resource within a system) from which a module originates
# ... It is up to the loader to decide on how to interpret
# and use a module's origin, if at all.
lambda m: [m.__spec__.origin],
# https://www.python.org/dev/peps/pep-0420/
# Handling of "namespace packages" in which the __path__ attribute
# is a _NamespacePath object with a _path attribute containing
# the various paths of the package.
lambda m: list(m.__path__._path),
]
all_paths = set()
for extract_paths in paths_extractors:
potential_paths = []
try:
potential_paths = extract_paths(module)
except AttributeError:
# Some modules might not have __file__ or __spec__ attributes.
pass
except Exception:
_LOGGER.warning(
f"Examining the path of {module.__name__} raised:", exc_info=True
)
all_paths.update(
[os.path.abspath(str(p)) for p in potential_paths if _is_valid_path(p)]
)
return all_paths
def _is_valid_path(path: str | None) -> bool:
return isinstance(path, str) and (os.path.isfile(path) or os.path.isdir(path))

View File

@@ -0,0 +1,185 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import os
from typing import Callable, Union
import streamlit.watcher
from streamlit import cli_util, config, env_util
from streamlit.watcher.polling_path_watcher import PollingPathWatcher
# local_sources_watcher.py caches the return value of
# get_default_path_watcher_class(), so it needs to differentiate between the
# cases where it:
# 1. has yet to call get_default_path_watcher_class()
# 2. has called get_default_path_watcher_class(), which returned that no
# path watcher should be installed.
# This forces us to define this stub class since the cached value equaling
# None corresponds to case 1 above.
class NoOpPathWatcher:
def __init__(
self,
_path_str: str,
_on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
):
pass
# EventBasedPathWatcher will be a stub and have no functional
# implementation if its import failed (due to missing watchdog module),
# so we can't reference it directly in this type.
PathWatcherType = Union[
type["streamlit.watcher.event_based_path_watcher.EventBasedPathWatcher"],
type[PollingPathWatcher],
type[NoOpPathWatcher],
]
def _is_watchdog_available() -> bool:
"""Check if the watchdog module is installed."""
try:
import watchdog # noqa: F401
return True
except ImportError:
return False
def report_watchdog_availability():
if (
config.get_option("server.fileWatcherType") not in ["poll", "none"]
and not _is_watchdog_available()
):
msg = "\n $ xcode-select --install" if env_util.IS_DARWIN else ""
cli_util.print_to_cli(
" %s" % "For better performance, install the Watchdog module:",
fg="blue",
bold=True,
)
cli_util.print_to_cli(
"""%s
$ pip install watchdog
"""
% msg
)
def _watch_path(
path: str,
on_path_changed: Callable[[str], None],
watcher_type: str | None = None,
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> bool:
"""Create a PathWatcher for the given path if we have a viable
PathWatcher class.
Parameters
----------
path
Path to watch.
on_path_changed
Function that's called when the path changes.
watcher_type
Optional watcher_type string. If None, it will default to the
'server.fileWatcherType` config option.
glob_pattern
Optional glob pattern to use when watching a directory. If set, only
files matching the pattern will be counted as being created/deleted
within the watched directory.
allow_nonexistent
If True, allow the file or directory at the given path to be
nonexistent.
Returns
-------
bool
True if the path is being watched, or False if we have no
PathWatcher class.
"""
if watcher_type is None:
watcher_type = config.get_option("server.fileWatcherType")
watcher_class = get_path_watcher_class(watcher_type)
if watcher_class is NoOpPathWatcher:
return False
watcher_class(
path,
on_path_changed,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
return True
def watch_file(
path: str,
on_file_changed: Callable[[str], None],
watcher_type: str | None = None,
) -> bool:
return _watch_path(path, on_file_changed, watcher_type)
def watch_dir(
path: str,
on_dir_changed: Callable[[str], None],
watcher_type: str | None = None,
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> bool:
# Add a trailing slash to the path to ensure
# that its interpreted as a directory.
path = os.path.join(path, "")
return _watch_path(
path,
on_dir_changed,
watcher_type,
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
def get_default_path_watcher_class() -> PathWatcherType:
"""Return the class to use for path changes notifications, based on the
server.fileWatcherType config option.
"""
return get_path_watcher_class(config.get_option("server.fileWatcherType"))
def get_path_watcher_class(watcher_type: str) -> PathWatcherType:
"""Return the PathWatcher class that corresponds to the given watcher_type
string. Acceptable values are 'auto', 'watchdog', 'poll' and 'none'.
"""
if watcher_type in {"watchdog", "auto"} and _is_watchdog_available():
# Lazy-import this module to prevent unnecessary imports of the watchdog package.
from streamlit.watcher.event_based_path_watcher import EventBasedPathWatcher
return EventBasedPathWatcher
elif watcher_type == "auto":
return PollingPathWatcher
elif watcher_type == "poll":
return PollingPathWatcher
else:
return NoOpPathWatcher

View File

@@ -0,0 +1,124 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A class that watches a given path via polling."""
from __future__ import annotations
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Callable, Final
from streamlit.logger import get_logger
from streamlit.util import repr_
from streamlit.watcher import util
_LOGGER: Final = get_logger(__name__)
_MAX_WORKERS: Final = 4
_POLLING_PERIOD_SECS: Final = 0.2
class PollingPathWatcher:
"""Watches a path on disk via a polling loop."""
_executor = ThreadPoolExecutor(max_workers=_MAX_WORKERS)
@staticmethod
def close_all() -> None:
"""Close top-level watcher object.
This is a no-op, and exists for interface parity with
EventBasedPathWatcher.
"""
_LOGGER.debug("Watcher closed")
def __init__(
self,
path: str,
on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> None:
"""Constructor.
You do not need to retain a reference to a PollingPathWatcher to
prevent it from being garbage collected. (The global _executor object
retains references to all active instances.)
"""
# TODO(vdonato): Modernize this by switching to pathlib.
self._path = path
self._on_changed = on_changed
self._glob_pattern = glob_pattern
self._allow_nonexistent = allow_nonexistent
self._active = True
self._modification_time = util.path_modification_time(
self._path, self._allow_nonexistent
)
self._md5 = util.calc_md5_with_blocking_retries(
self._path,
glob_pattern=self._glob_pattern,
allow_nonexistent=self._allow_nonexistent,
)
self._schedule()
def __repr__(self) -> str:
return repr_(self)
def _schedule(self) -> None:
def task():
time.sleep(_POLLING_PERIOD_SECS)
self._check_if_path_changed()
PollingPathWatcher._executor.submit(task)
def _check_if_path_changed(self) -> None:
if not self._active:
# Don't call self._schedule()
return
modification_time = util.path_modification_time(
self._path, self._allow_nonexistent
)
# We add modification_time != 0.0 check since on some file systems (s3fs/fuse)
# modification_time is always 0.0 because of file system limitations.
if modification_time != 0.0 and modification_time <= self._modification_time:
self._schedule()
return
self._modification_time = modification_time
md5 = util.calc_md5_with_blocking_retries(
self._path,
glob_pattern=self._glob_pattern,
allow_nonexistent=self._allow_nonexistent,
)
if md5 == self._md5:
self._schedule()
return
self._md5 = md5
_LOGGER.debug("Change detected: %s", self._path)
self._on_changed(self._path)
self._schedule()
def close(self) -> None:
"""Stop watching the file system."""
self._active = False

View File

@@ -0,0 +1,207 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A bunch of useful utilities for the watcher.
These are functions that only make sense within the watcher. In particular,
functions that use streamlit.config can go here to avoid a dependency cycle.
"""
from __future__ import annotations
import os
import time
from pathlib import Path
from typing import Callable, TypeVar
from streamlit.errors import Error
from streamlit.util import calc_md5
# How many times to try to grab the MD5 hash.
_MAX_RETRIES = 5
# How long to wait between retries.
_RETRY_WAIT_SECS = 0.1
def calc_md5_with_blocking_retries(
path: str,
*, # keyword-only arguments:
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> str:
"""Calculate the MD5 checksum of a given path.
For a file, this means calculating the md5 of the file's contents. For a
directory, we concatenate the directory's path with the names of all the
files in it and calculate the md5 of that.
IMPORTANT: This method calls time.sleep(), which blocks execution. So you
should only use this outside the main thread.
"""
if allow_nonexistent and not os.path.exists(path):
content = path.encode("UTF-8")
elif os.path.isdir(path):
glob_pattern = glob_pattern or "*"
content = _stable_dir_identifier(path, glob_pattern).encode("UTF-8")
else:
# There's a race condition where sometimes file_path no longer exists when
# we try to read it (since the file is in the process of being written).
# So here we retry a few times using this loop. See issue #186.
content = _do_with_retries(
lambda: _get_file_content(path),
FileNotFoundError,
path,
)
return calc_md5(content)
def path_modification_time(path: str, allow_nonexistent: bool = False) -> float:
"""Return the modification time of a path (file or directory).
If allow_nonexistent is True and the path does not exist, we return 0.0 to
guarantee that any file/dir later created at the path has a later
modification time than the last time returned by this function for that
path.
If allow_nonexistent is False and no file/dir exists at the path, a
FileNotFoundError is raised (by os.stat).
For any path that does correspond to an existing file/dir, we return its
modification time.
"""
if allow_nonexistent and not os.path.exists(path):
return 0.0
# Use retries to avoid race condition where file may be in the process of being
# modified.
return _do_with_retries(
lambda: os.stat(path).st_mtime,
FileNotFoundError,
path,
)
def _get_file_content(file_path: str) -> bytes:
with open(file_path, "rb") as f:
return f.read()
def _dirfiles(dir_path: str, glob_pattern: str) -> str:
p = Path(dir_path)
filenames = sorted(
[f.name for f in p.glob(glob_pattern) if not f.name.startswith(".")]
)
return "+".join(filenames)
def _stable_dir_identifier(dir_path: str, glob_pattern: str) -> str:
"""Wait for the files in a directory to look stable-ish before returning an id.
We do this to deal with problems that would otherwise arise from many tools
(e.g. git) and editors (e.g. vim) "editing" files (from the user's
perspective) by doing some combination of deleting, creating, and moving
various files under the hood.
Because of this, we're unable to rely on FileSystemEvents that we receive
from watchdog to determine when a file has been added to or removed from a
directory.
This is a bit of an unfortunate situation, but the approach we take here is
most likely fine as:
- The worst thing that can happen taking this approach is a false
positive page added/removed notification, which isn't too disastrous
and can just be ignored.
- It is impossible (that is, I'm fairly certain that the problem is
undecidable) to know whether a file created/deleted/moved event
corresponds to a legitimate file creation/deletion/move or is part of
some sequence of events that results in what the user sees as a file
"edit".
"""
dirfiles = _dirfiles(dir_path, glob_pattern)
for _ in _retry_dance():
new_dirfiles = _dirfiles(dir_path, glob_pattern)
if dirfiles == new_dirfiles:
break
dirfiles = new_dirfiles
return f"{dir_path}+{dirfiles}"
T = TypeVar("T")
def _do_with_retries(
orig_fn: Callable[[], T],
exception: type[Exception],
path: str | Path,
) -> T:
"""Helper for retrying a function.
Calls `orig_fn`. If `exception` is raised, retry.
To use this, just replace things like this...
result = thing_to_do(file_path, a, b, c)
...with this:
result = _do_with_retries(
lambda: thing_to_do(file_path, a, b, c),
exception: ExceptionThatWillCauseARetry,
file_path, # For pretty error message.
)
"""
for i in _retry_dance():
try:
return orig_fn()
except exception:
if i >= _MAX_RETRIES - 1:
raise
else:
# Continue with loop to either retry or raise MaxRetriesError.
pass
raise MaxRetriesError(f"Unable to access file or folder: {path}")
def _retry_dance():
"""Helper for writing a retry loop.
This is useful to make sure all our retry loops work the same way. For example,
prior to this helper, some loops had time.sleep() *before the first try*, which just
slowed things down for no reason.
Usage:
for i in _retry_dance():
# Do the thing you want to retry automatically.
the_thing_worked = do_thing()
# Don't forget to include a break/return when the thing you're trying to do
# works.
if the_thing_worked:
break
"""
for i in range(_MAX_RETRIES):
yield i
time.sleep(_RETRY_WAIT_SECS)
class MaxRetriesError(Error):
pass