Mise à jour de Monitor.py et autres scripts

2025-07-23 10:46:27 +02:00
parent 7081418ce0
commit 7de3e0fb50
8604 changed files with 2789953 additions and 295 deletions
--- a/myenv/lib/python3.11/site-packages/pyarrow/ipc.py
+++ b/myenv/lib/python3.11/site-packages/pyarrow/ipc.py
@@ -0,0 +1,280 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Arrow file and stream reader/writer classes, and other messaging tools
+
+import os
+
+import pyarrow as pa
+
+from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats,  # noqa
+                         Message, MessageReader,
+                         RecordBatchReader, _ReadPandasMixin,
+                         MetadataVersion, Alignment,
+                         read_message, read_record_batch, read_schema,
+                         read_tensor, write_tensor,
+                         get_record_batch_size, get_tensor_size)
+import pyarrow.lib as lib
+
+
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    """
+    Reader for the Arrow streaming binary format.
+
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+        If you want to use memory map use MemoryMappedFile as source.
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC deserialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+    """
+
+    def __init__(self, source, *, options=None, memory_pool=None):
+        options = _ensure_default_ipc_read_options(options)
+        self._open(source, options=options, memory_pool=memory_pool)
+
+
+_ipc_writer_class_doc = """\
+Parameters
+----------
+sink : str, pyarrow.NativeFile, or file-like Python object
+    Either a file path, or a writable file object.
+schema : pyarrow.Schema
+    The Arrow schema for data to be written to the file.
+options : pyarrow.ipc.IpcWriteOptions
+    Options for IPC serialization.
+
+    If None, default values will be used: the legacy format will not
+    be used unless overridden by setting the environment variable
+    ARROW_PRE_0_15_IPC_FORMAT=1, and the V5 metadata version will be
+    used unless overridden by setting the environment variable
+    ARROW_PRE_1_0_METADATA_VERSION=1."""
+
+
+_ipc_file_writer_class_doc = (
+    _ipc_writer_class_doc
+    + "\n"
+    + """\
+metadata : dict | pyarrow.KeyValueMetadata, optional
+    Key/value pairs (both must be bytes-like) that will be stored
+    in the file footer and are retrievable via
+    pyarrow.ipc.open_file(...).metadata."""
+)
+
+
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    __doc__ = f"""Writer for the Arrow streaming binary format
+
+{_ipc_writer_class_doc}"""
+
+    def __init__(self, sink, schema, *, options=None):
+        options = _get_legacy_format_default(options)
+        self._open(sink, schema, options=options)
+
+
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    """
+    Class for reading Arrow record batch data from the Arrow binary file format
+
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+        If you want to use memory map use MemoryMappedFile as source.
+    footer_offset : int, default None
+        If the file is embedded in some larger file, this is the byte offset to
+        the very end of the file data
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC serialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+    """
+
+    def __init__(self, source, footer_offset=None, *, options=None,
+                 memory_pool=None):
+        options = _ensure_default_ipc_read_options(options)
+        self._open(source, footer_offset=footer_offset,
+                   options=options, memory_pool=memory_pool)
+
+
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+
+    __doc__ = f"""Writer to create the Arrow binary file format
+
+{_ipc_file_writer_class_doc}"""
+
+    def __init__(self, sink, schema, *, options=None, metadata=None):
+        options = _get_legacy_format_default(options)
+        self._open(sink, schema, options=options, metadata=metadata)
+
+
+def _get_legacy_format_default(options):
+    if options:
+        if not isinstance(options, IpcWriteOptions):
+            raise TypeError(f"expected IpcWriteOptions, got {type(options)}")
+        return options
+
+    metadata_version = MetadataVersion.V5
+    use_legacy_format = \
+        bool(int(os.environ.get('ARROW_PRE_0_15_IPC_FORMAT', '0')))
+    if bool(int(os.environ.get('ARROW_PRE_1_0_METADATA_VERSION', '0'))):
+        metadata_version = MetadataVersion.V4
+    return IpcWriteOptions(use_legacy_format=use_legacy_format,
+                           metadata_version=metadata_version)
+
+
+def _ensure_default_ipc_read_options(options):
+    if options and not isinstance(options, IpcReadOptions):
+        raise TypeError(f"expected IpcReadOptions, got {type(options)}")
+    return options or IpcReadOptions()
+
+
+def new_stream(sink, schema, *, options=None):
+    return RecordBatchStreamWriter(sink, schema,
+                                   options=options)
+
+
+new_stream.__doc__ = f"""\
+Create an Arrow columnar IPC stream writer instance
+
+{_ipc_writer_class_doc}
+
+Returns
+-------
+writer : RecordBatchStreamWriter
+    A writer for the given sink
+"""
+
+
+def open_stream(source, *, options=None, memory_pool=None):
+    """
+    Create reader for Arrow streaming format.
+
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC serialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+
+    Returns
+    -------
+    reader : RecordBatchStreamReader
+        A reader for the given source
+    """
+    return RecordBatchStreamReader(source, options=options,
+                                   memory_pool=memory_pool)
+
+
+def new_file(sink, schema, *, options=None, metadata=None):
+    return RecordBatchFileWriter(sink, schema, options=options, metadata=metadata)
+
+
+new_file.__doc__ = f"""\
+Create an Arrow columnar IPC file writer instance
+
+{_ipc_file_writer_class_doc}
+
+Returns
+-------
+writer : RecordBatchFileWriter
+    A writer for the given sink
+"""
+
+
+def open_file(source, footer_offset=None, *, options=None, memory_pool=None):
+    """
+    Create reader for Arrow file format.
+
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+    footer_offset : int, default None
+        If the file is embedded in some larger file, this is the byte offset to
+        the very end of the file data.
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC serialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+
+    Returns
+    -------
+    reader : RecordBatchFileReader
+        A reader for the given source
+    """
+    return RecordBatchFileReader(
+        source, footer_offset=footer_offset,
+        options=options, memory_pool=memory_pool)
+
+
+def serialize_pandas(df, *, nthreads=None, preserve_index=None):
+    """
+    Serialize a pandas DataFrame into a buffer protocol compatible object.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+    nthreads : int, default None
+        Number of threads to use for conversion to Arrow, default all CPUs.
+    preserve_index : bool, default None
+        The default of None will store the index as a column, except for
+        RangeIndex which is stored as metadata only. If True, always
+        preserve the pandas index data as a column. If False, no index
+        information is saved and the result will have a default RangeIndex.
+
+    Returns
+    -------
+    buf : buffer
+        An object compatible with the buffer protocol.
+    """
+    batch = pa.RecordBatch.from_pandas(df, nthreads=nthreads,
+                                       preserve_index=preserve_index)
+    sink = pa.BufferOutputStream()
+    with pa.RecordBatchStreamWriter(sink, batch.schema) as writer:
+        writer.write_batch(batch)
+    return sink.getvalue()
+
+
+def deserialize_pandas(buf, *, use_threads=True):
+    """Deserialize a buffer protocol compatible object into a pandas DataFrame.
+
+    Parameters
+    ----------
+    buf : buffer
+        An object compatible with the buffer protocol.
+    use_threads : bool, default True
+        Whether to parallelize the conversion using multiple threads.
+
+    Returns
+    -------
+    df : pandas.DataFrame
+        The buffer deserialized as pandas DataFrame
+    """
+    buffer_reader = pa.BufferReader(buf)
+    with pa.RecordBatchStreamReader(buffer_reader) as reader:
+        table = reader.read_all()
+    return table.to_pandas(use_threads=use_threads)