Mise à jour de Monitor.py et autres scripts

2025-07-23 10:46:27 +02:00
parent 7081418ce0
commit 7de3e0fb50
8604 changed files with 2789953 additions and 295 deletions
--- a/myenv/lib/python3.11/site-packages/pyarrow/tests/pandas_examples.py
+++ b/myenv/lib/python3.11/site-packages/pyarrow/tests/pandas_examples.py
@@ -0,0 +1,172 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import OrderedDict
+from datetime import date, time
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+
+
+def dataframe_with_arrays(include_index=False):
+    """
+    Dataframe with numpy arrays columns of every possible primitive type.
+
+    Returns
+    -------
+    df: pandas.DataFrame
+    schema: pyarrow.Schema
+        Arrow schema definition that is in line with the constructed df.
+    """
+    dtypes = [('i1', pa.int8()), ('i2', pa.int16()),
+              ('i4', pa.int32()), ('i8', pa.int64()),
+              ('u1', pa.uint8()), ('u2', pa.uint16()),
+              ('u4', pa.uint32()), ('u8', pa.uint64()),
+              ('f4', pa.float32()), ('f8', pa.float64())]
+
+    arrays = OrderedDict()
+    fields = []
+    for dtype, arrow_dtype in dtypes:
+        fields.append(pa.field(dtype, pa.list_(arrow_dtype)))
+        arrays[dtype] = [
+            np.arange(10, dtype=dtype),
+            np.arange(5, dtype=dtype),
+            None,
+            np.arange(1, dtype=dtype)
+        ]
+
+    fields.append(pa.field('str', pa.list_(pa.string())))
+    arrays['str'] = [
+        np.array(["1", "ä"], dtype="object"),
+        None,
+        np.array(["1"], dtype="object"),
+        np.array(["1", "2", "3"], dtype="object")
+    ]
+
+    fields.append(pa.field('datetime64', pa.list_(pa.timestamp('ms'))))
+    arrays['datetime64'] = [
+        np.array(['2007-07-13T01:23:34.123456789',
+                  None,
+                  '2010-08-13T05:46:57.437699912'],
+                 dtype='datetime64[ms]'),
+        None,
+        None,
+        np.array(['2007-07-13T02',
+                  None,
+                  '2010-08-13T05:46:57.437699912'],
+                 dtype='datetime64[ms]'),
+    ]
+
+    if include_index:
+        fields.append(pa.field('__index_level_0__', pa.int64()))
+    df = pd.DataFrame(arrays)
+    schema = pa.schema(fields)
+
+    return df, schema
+
+
+def dataframe_with_lists(include_index=False, parquet_compatible=False):
+    """
+    Dataframe with list columns of every possible primitive type.
+
+    Returns
+    -------
+    df: pandas.DataFrame
+    schema: pyarrow.Schema
+        Arrow schema definition that is in line with the constructed df.
+    parquet_compatible: bool
+        Exclude types not supported by parquet
+    """
+    arrays = OrderedDict()
+    fields = []
+
+    fields.append(pa.field('int64', pa.list_(pa.int64())))
+    arrays['int64'] = [
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+        [0, 1, 2, 3, 4],
+        None,
+        [],
+        np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9] * 2,
+                 dtype=np.int64)[::2]
+    ]
+    fields.append(pa.field('double', pa.list_(pa.float64())))
+    arrays['double'] = [
+        [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
+        [0., 1., 2., 3., 4.],
+        None,
+        [],
+        np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.] * 2)[::2],
+    ]
+    fields.append(pa.field('bytes_list', pa.list_(pa.binary())))
+    arrays['bytes_list'] = [
+        [b"1", b"f"],
+        None,
+        [b"1"],
+        [b"1", b"2", b"3"],
+        [],
+    ]
+    fields.append(pa.field('str_list', pa.list_(pa.string())))
+    arrays['str_list'] = [
+        ["1", "ä"],
+        None,
+        ["1"],
+        ["1", "2", "3"],
+        [],
+    ]
+
+    date_data = [
+        [],
+        [date(2018, 1, 1), date(2032, 12, 30)],
+        [date(2000, 6, 7)],
+        None,
+        [date(1969, 6, 9), date(1972, 7, 3)]
+    ]
+    time_data = [
+        [time(23, 11, 11), time(1, 2, 3), time(23, 59, 59)],
+        [],
+        [time(22, 5, 59)],
+        None,
+        [time(0, 0, 0), time(18, 0, 2), time(12, 7, 3)]
+    ]
+
+    temporal_pairs = [
+        (pa.date32(), date_data),
+        (pa.date64(), date_data),
+        (pa.time32('s'), time_data),
+        (pa.time32('ms'), time_data),
+        (pa.time64('us'), time_data)
+    ]
+    if not parquet_compatible:
+        temporal_pairs += [
+            (pa.time64('ns'), time_data),
+        ]
+
+    for value_type, data in temporal_pairs:
+        field_name = f'{value_type}_list'
+        field_type = pa.list_(value_type)
+        field = pa.field(field_name, field_type)
+        fields.append(field)
+        arrays[field_name] = data
+
+    if include_index:
+        fields.append(pa.field('__index_level_0__', pa.int64()))
+
+    df = pd.DataFrame(arrays)
+    schema = pa.schema(fields)
+
+    return df, schema