
    ɯei                        d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
mZmZ ddlZddlmc mc mc mc mZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lm Z  dd
l!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6 ejn                  dk  rddlm8Z8 nddl9m8Z8  G d d      Z: G d d      Z;de8e<   ddfdZ=y)as  User-defined table functions (UDTFs) in Snowpark. Please see `Python UDTF <https://docs.snowflake.com/en/developer-guide/snowpark/python/creating-udtfs>`_ for details.
There is also vectorized UDTF. Compared to the default row-by-row processing pattern of a normal UDTF, which sometimes is inefficient, vectorized Python UDTFs (user-defined table functions) enable seamless partition-by-partition processing
by operating on partitions as `pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and returning results as `pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ or lists of
`pandas arrays <https://pandas.pydata.org/docs/reference/api/pandas.array.html>`_ or `pandas Series <https://pandas.pydata.org/docs/reference/series.html>`_.

In addition, vectorized Python UDTFs allow for easy integration with libraries that operate on pandas DataFrames or pandas arrays.

A vectorized UDTF handler class:
    - defines an :code:`end_partition` method that takes in a DataFrame argument and returns a :code:`pandas.DataFrame` or a tuple of :code:`pandas.Series` or :code:`pandas.arrays` where each array is a column.
    - does NOT define a :code:`process` method.
    - optionally defines a handler class with an :code:`__init__` method which will be invoked before processing each partition.

Note:
    A vectorized UDTF must be called with :meth:`~snowflake.snowpark.Window.partition_by` to build the partitions.

Refer to :class:`~snowflake.snowpark.udtf.UDTFRegistration` for details and sample code on how to create regular and vectorized UDTFs using Snowpark Python API.
    N)
ModuleType)AnyCallableDictListOptionalTupleTypeUnion)ProgrammingError)
build_udtfbuild_udtf_applywith_src_position)SnowparkClientExceptionMessages)"open_telemetry_udf_context_manager)ColumnOrName)		UDFColumnRegistrationTypecheck_python_runtime_versioncheck_register_args%cleanup_failed_permanent_registrationcreate_python_udf_or_spprocess_file_pathprocess_registration_inputsresolve_imports_and_packages)TempObjectTypecheck_imports_typecheck_output_schema_type	publicapivalidate_object_name)TableFunctionCall)DataTypePandasDataFrameType
StructType)   	   )Iterablec                       e Zd ZdZ	 	 	 ddeeeeef   f   deee	e   df   de
e   dedee
eeef         d	eej                     d
ee   ddfdZedddeee	e   f   dedefd       Zy)UserDefinedTableFunctiona  
    Encapsulates a user defined table function that is returned by
    :func:`~snowflake.snowpark.functions.udtf`, :meth:`UDTFRegistration.register` or
    :meth:`UDTFRegistration.register_from_file`. The constructor of this class is not supposed
    to be called directly.

    Call an instance of :class:`UserDefinedTableFunction` to generate a
    :class:`~snowflake.snowpark.table_function.TableFunctionCall` instance. The input type can be
    a column name as a :class:`str`, or a :class:`~snowflake.snowpark.Column` object.

    See Also:
        - :class:`UDTFRegistration`
        - :func:`~snowflake.snowpark.functions.udtf`
    Nhandleroutput_schemar#   input_typesnamepackages_ast_ast_idreturnc                 f    || _         || _        || _        || _        || _        || _        || _        y N)r*   r-   _output_schema_input_types	_packagesr/   r0   )selfr*   r+   r,   r-   r.   r/   r0   s           Y/var/www/html/glpi_dashboard/venv/lib/python3.12/site-packages/snowflake/snowpark/udtf.py__init__z!UserDefinedTableFunction.__init__V   s:     :A	+'! 	    T)	_emit_ast	argumentsr;   c                *   d }|r`| j                   T| j                   J d       | j                  J d       t        j                         }t	        || j                  g|i | t        | j                  g|i |d|i}|j                  d       |S )Nz7Need to ensure _emit_ast is True when registering UDTF.zNeed to assign UDTF an ID.r/   z!UserDefinedTableFunction.__call__)r/   r0   protoExprr   r!   r-   _set_api_call_source)r7   r;   r<   named_arguments	udtf_exprtable_function_calls         r8   __call__z!UserDefinedTableFunction.__call__n   s     	.		%IHI%<<+I-II+

IYTyTOT/II
!
%4
;D
 	001TU""r:   )NNN)__name__
__module____qualname____doc__r   r   r	   strr$   r'   r   r"   r   r   r>   Udtfintr9   r   r   boolr!   rD    r:   r8   r)   r)   F   s    * <@%)!%xsCx01 Z#8MMN (^	
  4c:o 678 uzz" # 
0  #,(>>?# #
 
# #r:   r)   c            :          e Zd ZdZded   ddfdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d-ddddd	d
edee	e
e   df   deee      deee      deeee
e   f      dedee   deeeeeeef   f         deeeeef         dedededededeee      deeeef      dedee   dee   dedeeeef      d ee   d!eeeef      d"edef2d#       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d.dddddd$d%ed&edee	e
e   df   deee      deee      deeee
e   f      dedee   deeeeeeef   f         deeeeef         dedededededeee      deeeef      dedee   dedeeeef      d'ed ee   d!eeeef      d"edef4d(       Z	 	 	 	 	 	 	 	 	 	 	 	 	 d/ddddddddd)d
eeeeef   f   dee	e
e   df   deee      deee      dee   dee   deeeeeeef   f         deeeeef         dedededededeee      deeeef      dedee   dee   d*eeeef      deeeef      d+ed'ededed ee   d!eeeef      d"edef8d,Zy)0UDTFRegistrationaV  
    Provides methods to register classes as UDTFs in the Snowflake database.
    For more information about Snowflake Python UDTFs, see `Python UDTFs <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-tabular-functions.html>`__.

    :attr:`session.udtf <snowflake.snowpark.Session.udtf>` returns an object of this class.
    You can use this object to register UDTFs that you plan to use in the current session or
    permanently. The methods that register a UDTF return a :class:`UserDefinedTableFunction` object,
    which you can also use to call the UDTF.

    Registering a UDTF is like registering a scalar UDF, you can use :meth:`register` or :func:`snowflake.snowpark.functions.udtf`
    to explicitly register it. You can also use the decorator `@udtf`. They all use ``cloudpickle`` to transfer the code from the client to the server.
    Another way is to use :meth:`register_from_file`. Refer to module :class:`snowflake.snowpark.udtf.UDTFRegistration` for when to use them.

    To query a registered UDTF is the same as to query other table functions.
    Refer to :meth:`~snowflake.snowpark.Session.table_function` and :meth:`~snowflake.snowpark.DataFrame.join_table_function`.
    If you want to query a UDTF right after it's created, you can call the created :class:`UserDefinedTableFunction` instance like in Example 1 below.

    Example 1
        Create a temporary UDTF and call it:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> class GeneratorUDTF:
            ...     def process(self, n):
            ...         for i in range(n):
            ...             yield (i, )
            >>> generator_udtf = udtf(GeneratorUDTF, output_schema=StructType([StructField("number", IntegerType())]), input_types=[IntegerType()])
            >>> session.table_function(generator_udtf(lit(3))).collect()  # Query it by calling it
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]
            >>> session.table_function(generator_udtf.name, lit(3)).collect()  # Query it by using the name
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]
            >>> # Or you can lateral-join a UDTF like any other table functions
            >>> df = session.create_dataframe([2, 3], schema=["c"])
            >>> df.join_table_function(generator_udtf(df["c"])).sort("c", "number").show()
            ------------------
            |"C"  |"NUMBER"  |
            ------------------
            |2    |0         |
            |2    |1         |
            |3    |0         |
            |3    |1         |
            |3    |2         |
            ------------------
            <BLANKLINE>

    Example 2
        Create a UDTF with type hints and ``@udtf`` decorator and query it:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> @udtf(output_schema=["number"])
            ... class generator_udtf:
            ...     def process(self, n: int) -> Iterable[Tuple[int]]:
            ...         for i in range(n):
            ...             yield (i, )
            >>> session.table_function(generator_udtf(lit(3))).collect()  # Query it by calling it
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]
            >>> session.table_function(generator_udtf.name, lit(3)).collect()  # Query it by using the name
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]

    Example 3
        Create a permanent UDTF with a name and call it in SQL:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> _ = session.sql("create or replace temp stage mystage").collect()
            >>> class GeneratorUDTF:
            ...     def process(self, n):
            ...         for i in range(n):
            ...             yield (i, )
            >>> generator_udtf = udtf(
            ...     GeneratorUDTF, output_schema=StructType([StructField("number", IntegerType())]), input_types=[IntegerType()],
            ...     is_permanent=True, name="generator_udtf", replace=True, stage_location="@mystage"
            ... )
            >>> session.sql("select * from table(generator_udtf(3))").collect()
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]

    Example 4
        Create a UDTF with type hints:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> @udtf(output_schema=["n1", "n2"])
            ... class generator_udtf:
            ...     def process(self, n: int) -> Iterable[Tuple[int, int]]:
            ...         for i in range(n):
            ...             yield (i, i+1)
            >>> session.table_function(generator_udtf(lit(3))).collect()
            [Row(N1=0, N2=1), Row(N1=1, N2=2), Row(N1=2, N2=3)]

    Example 5
        Create a UDTF with type hints by using ``...`` for multiple columns of the same type:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> @udtf(output_schema=["n1", "n2"])
            ... class generator_udtf:
            ...     def process(self, n: int) -> Iterable[Tuple[int, ...]]:
            ...         for i in range(n):
            ...             yield (i, i+1)
            >>> session.table_function(generator_udtf(lit(3))).collect()
            [Row(N1=0, N2=1), Row(N1=1, N2=2), Row(N1=2, N2=3)]

    Example 6
        Create a UDTF with UDF-level imports and type hints:

            >>> from resources.test_udf_dir.test_udf_file import mod5
            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> @udtf(output_schema=["number"], imports=[("tests/resources/test_udf_dir/test_udf_file.py", "resources.test_udf_dir.test_udf_file")])
            ... class generator_udtf:
            ...     def process(self, n: int) -> Iterable[Tuple[int]]:
            ...         for i in range(n):
            ...             yield (mod5(i), )
            >>> session.table_function(generator_udtf(lit(6))).collect()
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2), Row(NUMBER=3), Row(NUMBER=4), Row(NUMBER=0)]

    Example 7
        Create a UDTF with UDF-level packages and type hints:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> import numpy as np
            >>> @udtf(output_schema=["number"], packages=["numpy"])
            ... class generator_udtf:
            ...     def process(self, n: int) -> Iterable[Tuple[int]]:
            ...         for i in np.arange(n):
            ...             yield (i, )
            >>> session.table_function(generator_udtf(lit(3))).collect()
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]

    Example 8
        Creating a UDTF with the constructor and ``end_partition`` method.

            >>> from collections import Counter
            >>> from typing import Iterable, Tuple
            >>> from snowflake.snowpark.functions import lit
            >>> class MyWordCount:
            ...     def __init__(self) -> None:
            ...         self._total_per_partition = 0
            ...
            ...     def process(self, s1: str) -> Iterable[Tuple[str, int]]:
            ...         words = s1.split()
            ...         self._total_per_partition = len(words)
            ...         counter = Counter(words)
            ...         yield from counter.items()
            ...
            ...     def end_partition(self):
            ...         yield ("partition_total", self._total_per_partition)

            >>> udtf_name = "word_count_udtf"
            >>> word_count_udtf = session.udtf.register(
            ...     MyWordCount, ["word", "count"], name=udtf_name, is_permanent=False, replace=True
            ... )
            >>> # Call it by its name
            >>> df1 = session.table_function(udtf_name, lit("w1 w2 w2 w3 w3 w3"))
            >>> df1.show()
            -----------------------------
            |"WORD"           |"COUNT"  |
            -----------------------------
            |w1               |1        |
            |w2               |2        |
            |w3               |3        |
            |partition_total  |6        |
            -----------------------------
            <BLANKLINE>

            >>> # Call it by the returned callable instance
            >>> df2 = session.table_function(word_count_udtf(lit("w1 w2 w2 w3 w3 w3")))
            >>> df2.show()
            -----------------------------
            |"WORD"           |"COUNT"  |
            -----------------------------
            |w1               |1        |
            |w2               |2        |
            |w3               |3        |
            |partition_total  |6        |
            -----------------------------
            <BLANKLINE>

    Example 9
        Creating a UDTF from a local Python file:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> generator_udtf = session.udtf.register_from_file(
            ...     file_path="tests/resources/test_udtf_dir/test_udtf_file.py",
            ...     handler_name="GeneratorUDTF",
            ...     output_schema=StructType([StructField("number", IntegerType())]),
            ...     input_types=[IntegerType()]
            ... )
            >>> session.table_function(generator_udtf(lit(3))).collect()
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]

    Example 10
        Creating a UDTF from a Python file on an internal stage:

            >>> from snowflake.snowpark.types import IntegerType, StructField, StructType
            >>> from snowflake.snowpark.functions import udtf, lit
            >>> _ = session.sql("create or replace temp stage mystage").collect()
            >>> _ = session.file.put("tests/resources/test_udtf_dir/test_udtf_file.py", "@mystage", auto_compress=False)
            >>> generator_udtf = session.udtf.register_from_file(
            ...     file_path="@mystage/test_udtf_file.py",
            ...     handler_name="GeneratorUDTF",
            ...     output_schema=StructType([StructField("number", IntegerType())]),
            ...     input_types=[IntegerType()]
            ... )
            >>> session.table_function(generator_udtf(lit(3))).collect()
            [Row(NUMBER=0), Row(NUMBER=1), Row(NUMBER=2)]

    You can use :func:`~snowflake.snowpark.functions.udtf`, :meth:`register` or
    :func:`~snowflake.snowpark.functions.pandas_udtf` to create a vectorized UDTF by providing
    appropriate return and input types. If you would like to use :meth:`register_from_file` to
    create a vectorized UDTF, you would need to explicitly mark the handler method as vectorized using
    either the decorator ``@vectorized(input=pandas.DataFrame)`` or setting
    ``<class>.end_partition._sf_vectorized_input = pandas.DataFrame``

    Example 11
        Creating a vectorized UDTF by specifying a ``PandasDataFrameType`` as ``input_types`` and a
        ``PandasDataFrameType`` with column names as ``output_schema``.

            >>> from snowflake.snowpark.types import PandasDataFrameType, IntegerType, StringType, FloatType
            >>> class multiply:
            ...     def __init__(self):
            ...         self.multiplier = 10
            ...     def end_partition(self, df):
            ...         df.col1 = df.col1*self.multiplier
            ...         df.col2 = df.col2*self.multiplier
            ...         yield df
            >>> multiply_udtf = session.udtf.register(
            ...     multiply,
            ...     output_schema=PandasDataFrameType([StringType(), IntegerType(), FloatType()], ["id_", "col1_", "col2_"]),
            ...     input_types=[PandasDataFrameType([StringType(), IntegerType(), FloatType()])],
            ...     input_names = ['"id"', '"col1"', '"col2"'],
            ... )
            >>> df = session.create_dataframe([['x', 3, 35.9],['x', 9, 20.5]], schema=["id", "col1", "col2"])
            >>> df.select(multiply_udtf("id", "col1", "col2").over(partition_by=["id"])).sort("col1_").show()
            -----------------------------
            |"ID_"  |"COL1_"  |"COL2_"  |
            -----------------------------
            |x      |30       |359.0    |
            |x      |90       |205.0    |
            -----------------------------
            <BLANKLINE>

    Example 12
        Creating a vectorized UDTF by specifying ``PandasDataFrame`` with nested types as type hints.

            >>> from snowflake.snowpark.types import PandasDataFrame
            >>> class multiply:
            ...     def __init__(self):
            ...         self.multiplier = 10
            ...     def end_partition(self, df: PandasDataFrame[str, int, float]) -> PandasDataFrame[str, int, float]:
            ...         df.col1 = df.col1*self.multiplier
            ...         df.col2 = df.col2*self.multiplier
            ...         yield df
            >>> multiply_udtf = session.udtf.register(
            ...     multiply,
            ...     output_schema=["id_", "col1_", "col2_"],
            ...     input_names = ['"id"', '"col1"', '"col2"'],
            ... )
            >>> df = session.create_dataframe([['x', 3, 35.9],['x', 9, 20.5]], schema=["id", "col1", "col2"])
            >>> df.select(multiply_udtf("id", "col1", "col2").over(partition_by=["id"])).sort("col1_").show()
            -----------------------------
            |"ID_"  |"COL1_"  |"COL2_"  |
            -----------------------------
            |x      |30       |359.0    |
            |x      |90       |205.0    |
            -----------------------------
            <BLANKLINE>

    Example 13
        Creating a vectorized UDTF by specifying a ``pandas.DataFrame`` as type hints and a ``StructType`` with type information and column names as ``output_schema``.

            >>> import pandas as pd
            >>> from snowflake.snowpark.types import IntegerType, StringType, FloatType, StructType, StructField
            >>> class multiply:
            ...     def __init__(self):
            ...         self.multiplier = 10
            ...     def end_partition(self, df: pd.DataFrame) -> pd.DataFrame:
            ...         df.col1 = df.col1*self.multiplier
            ...         df.col2 = df.col2*self.multiplier
            ...         yield df
            >>> multiply_udtf = session.udtf.register(
            ...     multiply,
            ...     output_schema=StructType([StructField("id_", StringType()), StructField("col1_", IntegerType()), StructField("col2_", FloatType())]),
            ...     input_types=[StringType(), IntegerType(), FloatType()],
            ...     input_names = ['"id"', '"col1"', '"col2"'],
            ... )
            >>> df = session.create_dataframe([['x', 3, 35.9],['x', 9, 20.5]], schema=["id", "col1", "col2"])
            >>> df.select(multiply_udtf("id", "col1", "col2").over(partition_by=["id"])).sort("col1_").show()
            -----------------------------
            |"ID_"  |"COL1_"  |"COL2_"  |
            -----------------------------
            |x      |30       |359.0    |
            |x      |90       |205.0    |
            -----------------------------
            <BLANKLINE>

    Example 14
        Same as Example 12, but does not specify `input_names` and instead set the column names in `end_partition`.

            >>> from snowflake.snowpark.types import PandasDataFrameType, IntegerType, StringType, FloatType
            >>> class multiply:
            ...     def __init__(self):
            ...         self.multiplier = 10
            ...     def end_partition(self, df):
            ...         df.columns = ["id", "col1", "col2"]
            ...         df.col1 = df.col1*self.multiplier
            ...         df.col2 = df.col2*self.multiplier
            ...         yield df
            >>> multiply_udtf = session.udtf.register(
            ...     multiply,
            ...     output_schema=PandasDataFrameType([StringType(), IntegerType(), FloatType()], ["id_", "col1_", "col2_"]),
            ...     input_types=[PandasDataFrameType([StringType(), IntegerType(), FloatType()])],
            ... )
            >>> df = session.create_dataframe([['x', 3, 35.9],['x', 9, 20.5]], schema=["id", "col1", "col2"])
            >>> df.select(multiply_udtf("id", "col1", "col2").over(partition_by=["id"])).sort("col1_").show()
            -----------------------------
            |"ID_"  |"COL1_"  |"COL2_"  |
            -----------------------------
            |x      |30       |359.0    |
            |x      |90       |205.0    |
            -----------------------------
            <BLANKLINE>

    The syntax for declaring UDTF with a vectorized process method is similar to above.
    Defining ``__init__`` and ``end_partition`` methods are optional. The ``process`` method only accepts one
    argument which is the pandas Dataframe object, and outputs the same number of rows as is in the given input.
    Both ``__init__`` and ``end_partition`` do not take any additional arguments.

    Example 15
        Vectorized UDTF process method without end_partition

            >>> class multiply:
            ...     def process(self, df: PandasDataFrame[str,int, float]) -> PandasDataFrame[int]:
            ...         return (df['col1'] * 10, )
            >>> multiply_udtf = session.udtf.register(
            ...     multiply,
            ...     output_schema=["col1x10"],
            ...     input_names=['"id"', '"col1"', '"col2"']
            ... )
            >>> df = session.create_dataframe([['x', 3, 35.9],['x', 9, 20.5]], schema=["id", "col1", "col2"])
            >>> df.select("id", "col1", "col2", multiply_udtf("id", "col1", "col2")).order_by("col1").show()
            --------------------------------------
            |"ID"  |"COL1"  |"COL2"  |"COL1X10"  |
            --------------------------------------
            |x     |3       |35.9    |30         |
            |x     |9       |20.5    |90         |
            --------------------------------------
            <BLANKLINE>


    Example 16
        Vectorized UDTF process method with end_partition

            >>> class mean:
            ...     def __init__(self) -> None:
            ...         self.sum = 0
            ...         self.len = 0
            ...     def process(self, df: pd.DataFrame) -> pd.DataFrame:
            ...         self.sum += df['value'].sum()
            ...         self.len += len(df)
            ...         return ([None] * len(df),)
            ...     def end_partition(self):
            ...         return ([self.sum / self.len],)
            >>> mean_udtf = session.udtf.register(mean,
            ...                       output_schema=StructType([StructField("mean", FloatType())]),
            ...                       input_types=[StringType(), IntegerType()],
            ...                       input_names=['"name"', '"value"'])
            >>> df = session.create_dataframe([["x", 10], ["x", 20], ["x", 33], ["y", 10], ["y", 25], ], schema=["name", "value"])
            >>> df.select("name", "value", mean_udtf("name", "value").over(partition_by="name")).order_by("name", "value").show()
            -----------------------------
            |"NAME"  |"VALUE"  |"MEAN"  |
            -----------------------------
            |x       |NULL     |21.0    |
            |x       |10       |NULL    |
            |x       |20       |NULL    |
            |x       |33       |NULL    |
            |y       |NULL     |17.5    |
            |y       |10       |NULL    |
            |y       |25       |NULL    |
            -----------------------------
            <BLANKLINE>

    Example 17
        Vectorized UDTF process method with end_partition and max_batch_size

            >>> class sum:
            ...     def __init__(self):
            ...         self.sum = None
            ...     def process(self, df):
            ...         if self.sum is None:
            ...             self.sum = df
            ...         else:
            ...             self.sum += df
            ...         return df
            ...     def end_partition(self):
            ...         return self.sum
            >>> sum_udtf = session.udtf.register(sum,
            ...         output_schema=PandasDataFrameType([StringType(), IntegerType()], ["id_", "col1_"]),
            ...         input_types=[PandasDataFrameType([StringType(), IntegerType()])],
            ...         max_batch_size=1)
            >>> df = session.create_dataframe([["x", 10], ["x", 20], ["x", 33], ["y", 10], ["y", 25], ], schema=["id", "col1"])
            >>> df.select("id", "col1", sum_udtf("id", "col1").over(partition_by="id")).order_by("id", "col1").show()
            -----------------------------------
            |"ID"  |"COL1"  |"ID_"  |"COL1_"  |
            -----------------------------------
            |x     |NULL    |xxx    |63       |
            |x     |10      |x      |10       |
            |x     |20      |x      |20       |
            |x     |33      |x      |33       |
            |y     |NULL    |yy     |35       |
            |y     |10      |y      |10       |
            |y     |25      |y      |25       |
            -----------------------------------
            <BLANKLINE>

    See Also:
        - :func:`~snowflake.snowpark.functions.udtf`
        - :meth:`register`
        - :meth:`register_from_file`
        - :meth:`~snowflake.snowpark.Session.add_import`
        - :meth:`~snowflake.snowpark.Session.add_packages`
        - :meth:`~snowflake.snowpark.Session.table_function`
        - :meth:`~snowflake.snowpark.DataFrame.join_table_function`
    sessionzsnowflake.snowpark.Sessionr1   Nc                     || _         y r3   )_session)r7   rP   s     r8   r9   zUDTFRegistration.__init__4  s	    r:   FT)statement_paramsartifact_repositoryresource_constraintr;   r*   r+   r#   r,   input_namesr-   is_permanentstage_locationimportsr.   replaceif_not_existsparallelstrictsecureexternal_access_integrationssecrets	immutablemax_batch_sizecommentcopy_grantsrS   rT   rU   r;   c                   t        | j                  ||      5  t        |      s(|j                  d      t	        dt        |             t        t        j                  ||||       |j                  dd      }d|v r|d=  | j                  ||||||||	|
||||f||||||d||||||d|cddd       S # 1 sw Y   yxY w)a  
        Registers a Python class as a Snowflake Python UDTF and returns the UDTF.
        The usage, input arguments, and return value of this method are the same as
        they are for :func:`~snowflake.snowpark.functions.udtf`, but :meth:`register`
        cannot be used as a decorator. See examples in
        :class:`~snowflake.snowpark.udtf.UDTFRegistration`.

        Args:
            handler: A Python class used for creating the UDTF.
            output_schema: A list of column names, or a :class:`~snowflake.snowpark.types.StructType` instance that represents the table function's columns, or a ``PandasDataFrameType`` instance for vectorized UDTF.
             If a list of column names is provided, the ``process`` method of the handler class must have a return type hint to indicate the output schema data types.
            input_types: A list of :class:`~snowflake.snowpark.types.DataType`
                representing the input data types of the UDTF. Optional if
                type hints are provided.
            input_names: A list of `str` representing the input column names of the UDTF, this only applies to vectorized UDTF and is essentially a noop for regular UDTFs. If unspecified, default column names will be
                ARG1, ARG2, etc.
            name: A string or list of strings that specify the name or fully-qualified
                object identifier (database name, schema name, and function name) for
                the UDTF in Snowflake.
                If it is not provided, a name will be automatically generated for the UDTF.
                A name must be specified when ``is_permanent`` is ``True``.
            is_permanent: Whether to create a permanent UDTF. The default is ``False``.
                If it is ``True``, a valid ``stage_location`` must be provided.
            stage_location: The stage location where the Python file for the UDTF
                and its dependencies should be uploaded. The stage location must be specified
                when ``is_permanent`` is ``True``, and it will be ignored when
                ``is_permanent`` is ``False``. It can be any stage other than temporary
                stages and external stages.
            imports: A list of imports that only apply to this UDTF. You can use a string to
                represent a file path (similar to the ``path`` argument in
                :meth:`~snowflake.snowpark.Session.add_import`) in this list, or a tuple of two
                strings to represent a file path and an import path (similar to the ``import_path``
                argument in :meth:`~snowflake.snowpark.Session.add_import`). These UDTF-level imports
                will override the session-level imports added by
                :meth:`~snowflake.snowpark.Session.add_import`.
            packages: A list of packages that only apply to this UDTF. These UDTF-level packages
                will override the session-level packages added by
                :meth:`~snowflake.snowpark.Session.add_packages` and
                :meth:`~snowflake.snowpark.Session.add_requirements`. To use Python packages that are not available
                in Snowflake, refer to :meth:`~snowflake.snowpark.Session.custom_package_usage_config`.
            replace: Whether to replace a UDTF that already was registered. The default is ``False``.
                If it is ``False``, attempting to register a UDTF with a name that already exists
                results in a ``SnowparkSQLException`` exception being thrown. If it is ``True``,
                an existing UDTF with the same name is overwritten.
            if_not_exists: Whether to skip creation of a UDTF when one with the same signature already exists.
                The default is ``False``. ``if_not_exists`` and ``replace`` are mutually exclusive
                and a ``ValueError`` is raised when both are set. If it is ``True`` and a UDTF with
                the same signature exists, the UDTF creation is skipped.
            session: Use this session to register the UDTF. If it's not specified, the session that you created before calling this function will be used.
                You need to specify this parameter if you have created multiple sessions before calling this method.
            parallel: The number of threads to use for uploading UDTF files with the
                `PUT <https://docs.snowflake.com/en/sql-reference/sql/put.html#put>`_
                command. The default value is 4 and supported values are from 1 to 99.
                Increasing the number of threads can improve performance when uploading
                large UDTF files.
            strict: Whether the created UDTF is strict. A strict UDTF will not invoke the UDTF if any input is
                null. Instead, a null value will always be returned for that row. Note that the UDTF might
                still return null for non-null inputs.
            secure: Whether the created UDTF is secure. For more information about secure functions,
                see `Secure UDFs <https://docs.snowflake.com/en/sql-reference/udf-secure.html>`_.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            external_access_integrations: The names of one or more external access integrations. Each
                integration you specify allows access to the external network locations and secrets
                the integration specifies.
            secrets: The key-value pairs of string types of secrets used to authenticate the external network location.
                The secrets can be accessed from handler code. The secrets specified as values must
                also be specified in the external access integration and the keys are strings used to
                retrieve the secrets using secret API.
            immutable: Whether the UDTF result is deterministic or not for the same input.
            max_batch_size: The maximum number of rows per input pandas DataFrame or pandas Series
                inside a vectorized UDTF. Because a vectorized UDTF will be executed within a time limit,
                which is `60` seconds, this optional argument can be used to reduce the running time of
                every batch by setting a smaller batch size. Note that setting a larger value does not
                guarantee that Snowflake will encode batches with the specified number of rows. It will
                be ignored when registering a non-vectorized UDTF.
            comment: Adds a comment for the created object. See
                `COMMENT <https://docs.snowflake.com/en/sql-reference/sql/comment>`_
            copy_grants: Specifies to retain the access privileges from the original function when a new function is created
                using CREATE OR REPLACE FUNCTION.
            artifact_repository: The name of an artifact_repository that packages are found in. If unspecified, packages are
                pulled from Anaconda.
            resource_constraint: A dictionary containing a resource properties of a warehouse and then
                constraints needed to run this function. Eg ``{"architecture": "x86"}`` requires an x86
                warehouse be used for execution.

        See Also:
            - :func:`~snowflake.snowpark.functions.udtf`
            - :meth:`register_from_file`
        )r*   r-   _registered_object_nameNzHInvalid function: not a function or callable (__call__ is not defined): native_app_paramszUDTFRegistration.register)r_   r`   ra   rb   rc   rS   api_call_sourcerW   rg   rd   rT   rU   r;   )
r   registercallableget	TypeErrortyper   r   TABLE_FUNCTION_do_register_udtf)r7   r*   r+   r,   rV   r-   rW   rX   rY   r.   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   rS   rT   rU   r;   kwargsrg   s                              r8   ri   zUDTFRegistration.register7  s   n 0MM7
 2	 G$4M)N)V226w-B 
  -- !'

+> E"f,./ *4)) .J#-!1 ;)"3'$7$7#56 7-2	 2	 2	s   BB99C)rS   skip_upload_on_content_matchrT   rU   r;   	file_pathhandler_namerq   c                   t        | j                  |||      5  t        |      }t        t        j
                  ||||        | j                  ||f||||||	|
|||||f|||||d||||||d|cddd       S # 1 sw Y   yxY w)a  
        Registers a Python class as a Snowflake Python UDTF from a Python or zip file,
        and returns the UDTF. Apart from ``file_path`` and ``func_name``, the input arguments
        of this method are the same as :meth:`register`. See examples in
        :class:`~snowflake.snowpark.udtf.UDTFRegistration`.

        Args:
            file_path: The path of a local file or a remote file in the stage. See
                more details on ``path`` argument of
                :meth:`session.add_import() <snowflake.snowpark.Session.add_import>`.
                Note that unlike ``path`` argument of
                :meth:`session.add_import() <snowflake.snowpark.Session.add_import>`,
                here the file can only be a Python file or a compressed file
                (e.g., .zip file) containing Python modules.
            handler_name: The Python class name in the file that the UDTF will use as the handler.
            output_schema: A list of column names, or a :class:`~snowflake.snowpark.types.StructType` instance that represents the table function's columns, or a ``PandasDataFrameType`` instance for vectorized UDTF.
            input_types: A list of :class:`~snowflake.snowpark.types.DataType`
                representing the input data types of the UDTF. Optional if
                type hints are provided.
            input_names: A list of `str` representing the input column names of the UDTF, this only applies to vectorized UDTF and is essentially a noop for regular UDTFs. If unspecified, default column names will be
                ARG1, ARG2, etc.
            name: A string or list of strings that specify the name or fully-qualified
                object identifier (database name, schema name, and function name) for
                the UDTF in Snowflake, which allows you to call this UDTF in a SQL
                command or via :func:`~snowflake.snowpark.functions.call_udtf`.
                If it is not provided, a name will be automatically generated for the UDTF.
                A name must be specified when ``is_permanent`` is ``True``.
            is_permanent: Whether to create a permanent UDTF. The default is ``False``.
                If it is ``True``, a valid ``stage_location`` must be provided.
            stage_location: The stage location where the Python file for the UDTF
                and its dependencies should be uploaded. The stage location must be specified
                when ``is_permanent`` is ``True``, and it will be ignored when
                ``is_permanent`` is ``False``. It can be any stage other than temporary
                stages and external stages.
            imports: A list of imports that only apply to this UDTF. You can use a string to
                represent a file path (similar to the ``path`` argument in
                :meth:`~snowflake.snowpark.Session.add_import`) in this list, or a tuple of two
                strings to represent a file path and an import path (similar to the ``import_path``
                argument in :meth:`~snowflake.snowpark.Session.add_import`). These UDTF-level imports
                will override the session-level imports added by
                :meth:`~snowflake.snowpark.Session.add_import`.
            packages: A list of packages that only apply to this UDTF. These UDTF-level packages
                will override the session-level packages added by
                :meth:`~snowflake.snowpark.Session.add_packages` and
                :meth:`~snowflake.snowpark.Session.add_requirements`. To use Python packages that are not
                available in Snowflake, refer to :meth:`~snowflake.snowpark.Session.custom_package_usage_config`.
            replace: Whether to replace a UDTF that already was registered. The default is ``False``.
                If it is ``False``, attempting to register a UDTF with a name that already exists
                results in a ``SnowparkSQLException`` exception being thrown. If it is ``True``,
                an existing UDTF with the same name is overwritten.
            if_not_exists: Whether to skip creation of a UDTF when one with the same signature already exists.
                The default is ``False``. ``if_not_exists`` and ``replace`` are mutually exclusive
                and a ``ValueError`` is raised when both are set. If it is ``True`` and a UDTF with
                the same signature exists, the UDTF creation is skipped.
            session: Use this session to register the UDTF. If it's not specified, the session that you created before calling this function will be used.
                You need to specify this parameter if you have created multiple sessions before calling this method.
            parallel: The number of threads to use for uploading UDTF files with the
                `PUT <https://docs.snowflake.com/en/sql-reference/sql/put.html#put>`_
                command. The default value is 4 and supported values are from 1 to 99.
                Increasing the number of threads can improve performance when uploading
                large UDTF files.
            strict: Whether the created UDTF is strict. A strict UDTF will not invoke the UDTF if any input is
                null. Instead, a null value will always be returned for that row. Note that the UDTF might
                still return null for non-null inputs.
            secure: Whether the created UDTF is secure. For more information about secure functions,
                see `Secure UDFs <https://docs.snowflake.com/en/sql-reference/udf-secure.html>`_.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            skip_upload_on_content_match: When set to ``True`` and a version of source file already exists on stage, the given source
                file will be uploaded to stage only if the contents of the current file differ from the remote file on stage. Defaults
                to ``False``.
            external_access_integrations: The names of one or more external access integrations. Each
                integration you specify allows access to the external network locations and secrets
                the integration specifies.
            secrets: The key-value pairs of string types of secrets used to authenticate the external network location.
                The secrets can be accessed from handler code. The secrets specified as values must
                also be specified in the external access integration and the keys are strings used to
                retrieve the secrets using secret API.
            immutable: Whether the UDTF result is deterministic or not for the same input.
            comment: Adds a comment for the created object. See
                `COMMENT <https://docs.snowflake.com/en/sql-reference/sql/comment>`_
            copy_grants: Specifies to retain the access privileges from the original function when a new function is created
                using CREATE OR REPLACE FUNCTION.
            artifact_repository: The name of an artifact_repository that packages are found in. If unspecified, packages are
                pulled from Anaconda.
            resource_constraint: A dictionary containing a resource properties of a warehouse and then
                constraints needed to run this function. Eg ``{"architecture": "x86"}`` requires an x86
                warehouse be used for execution.

        Note::
            The type hints can still be extracted from the local source Python file if they
            are provided, but currently are not working for a zip file or a remote file. Therefore,
            you have to provide ``output_schema`` and ``input_types`` when ``path``
            points to a zip file or a remote file.

        See Also:
            - :func:`~snowflake.snowpark.functions.udtf`
            - :meth:`register`
        )rr   rs   r-   z#UDTFRegistration.register_from_file)r_   r`   ra   rc   rS   rh   rq   rW   rd   rT   rU   r;   N)r   register_from_filer   r   r   rn   ro   )r7   rr   rs   r+   r,   rV   r-   rW   rX   rY   r.   rZ   r[   r\   r]   r^   r_   r`   ra   rc   rd   rS   rq   rT   rU   r;   rp   s                              r8   ru   z#UDTFRegistration.register_from_file  s    B 0##%	
 +	 *)4I-- *4))L) .J#!1 E-I)'$7$7#34 5!+	 +	 +	s   AA::B)rg   rS   rq   rW   rd   rT   rU   r;   rg   rh   c       	            d\  }}|j                  d      f|rP| j                  j                  j                         }t	        |j
                  j                  |      }|j                  }t        ||||d   ||      S t        |       t        |d       t        |t              rt        |j                         |} d }nRt        |t              rt        |j                          |} d }n(t        |t"              rt%        |      }t        |       d } t'        | j                  t(        j*                  | |||      \  }!}"}#}}}$d|_        |r| j                  j                  j                         }t	        |j
                  j                  |      }|j                  }t/        ||fi d|d|d	|d
|d|d|d|	d|
d|d|d|d|d|d|d|d|d|d|d| j                  d|!| |xs( t1        t3        |            D %cg c]
  }%d|%dz     c}%}&t5        ||&      D '(cg c]  \  }'}(t7        |'|(       })}'}(t9        | j                  t(        j*                  ||&|!|||||"|#||||||j                  dd            \  }*}+},}-}.}/d }0| j                  | j                  j:                  }0|/st=        |0       d}1	 t?        d3i d| j                  d|d|d |)d!|$d"|*d#t(        j@                  d$|!d%|,d&|-d'|d(tB        jD                  d|d|	d|
d)|+d*|d|d|d|d|d|d|d|d+|d,|d-|0d.|d/| 	 |1rtU        | j                  |.|       	 t        ||||!|||2      S c c}%w c c}(}'w # tF        $ rE}2d0}1tI        jJ                         d1   }3tM        jN                  |2      }4|4jQ                  |3      d d }2~2wtR        $ r d0}1 w xY w# |1rtU        | j                  |.|       w w xY w)4N)NNrf   )r/   r0   z
udtf-level)r+   Fr+   r,   r-   rX   rY   r.   rZ   r[   r\   rb   r]   r^   r_   r`   ra   rc   rS   rW   rP   arg    _suppress_local_package_warnings)rS   rq   rW   rT   ry   funcreturn_type
input_argsopt_arg_defaultsr*   object_typeobject_nameall_importsall_packagesraw_importsregistration_typeinline_python_coderh   rg   rd   runtime_versionrT   rU   T   )r.   r/   r0   rM   )+rk   rR   
_ast_batchbindr   exprudtfuidr)   r   r   
isinstancer$   _validate_output_schema_namesnamesr#   	col_namesr'   tupler   r   rn   
structuredr   rangelenzipr   r   !_runtime_version_from_requirementr   r   FUNCTIONr   UDTFr   sysexc_infor   $SQL_EXCEPTION_FROM_PROGRAMMING_ERRORwith_tracebackBaseExceptionr   )5r7   r*   r+   r,   rV   r-   rX   rY   r.   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rg   rS   rh   rq   rW   rd   rT   rU   r;   rp   astast_idstmtr{   r   is_pandas_udfis_dataframe_inputr}   i	arg_namesdtarg_namer|   rs   coder   r   upload_file_stage_location%custom_python_runtime_version_allowed runtime_version_from_requirementraisedpetbnes5                                                        r8   ro   z"UDTFRegistration._do_register_udtf  s\   @ !V::/0<}}//446'		=+01  	!/7L1mZ0)-*=*=>'K M':;)-*A*AB'K M8
 "-0M)-8K (MM))'
	
 $)  ==++002D#DIINND9CXXF , (	
   .   "   , "  .   .J   !" $#$  %& "2'( *)* +, )4/4  S5[AQ;R#Sac!a%M#S	8;K8S
(4HIb(#

 
 )MM))-)E% 3-3ZZ2E.#
	
&10 ,0(==$?? - 5()IJ2	#  * &	
 "2 % +33 ( ( * $ #3"7"7 *   ,  $(!" !0#$ %& '( .J)*  +, $-. "2/0  12 #434 (56 !A78 %89: %8;\ 5MM#=~ (
 	
A $T
^   	2F"B0UUB ##B'T1 	F	 5MM#=~ s2   M3(M8 BM> >	OA OOO O6)NNNFNNNFF   FFNNFNNF)NNNFNNNFFr   FFNNFNF)NNNFFr   FFNNFNN)rE   rF   rG   rH   r   r9   r   r
   r   r$   r'   rI   r   r"   rL   r	   r   rK   r   r)   ri   ru   r   r   ro   rM   r:   r8   rO   rO      s   jX )E F  4   
 15+/48"(,?C;?#<@,0(,!%!+h. 6:-18<5hh Z#8MMNh d8n-	h
 d3i(h uS(3-/01h h !h $uS%S/%9:;<h 4c:o 678h h h h h h  '/tCy&9!h" $sCx.)#h$ %h& !'h( #)h* +h. #4S>2/h0 &c]1h2 &d38n53h4 5h8 
"9h hT  15+/48"(,?C;?#<@,0!%!+k. 6:-2-18<7kk k Z#8MMN	k
 d8n-k d3i(k uS(3-/01k k !k $uS%S/%9:;<k 4c:o 678k k k k k  !k" '/tCy&9#k$ $sCx.)%k& 'k( #)k* +k. #4S>2/k0 '+1k2 &c]3k4 &d38n55k6 7k: 
";k kh )-?C;?#<@,0(,!%'_
* 7;59-2"!-18<;_
xsCx01_
 Z#8MMN_
 d8n-	_

 d3i(_
 sm_
 !_
 $uS%S/%9:;<_
 4c:o 678_
 _
 _
 _
 _
 _
 '/tCy&9_
  $sCx.)!_
" #_
$ !%_
& #'_
* $DcN3+_
, #4S>2-_
. /_
0 '+1_
2 3_
4 5_
6 &c]7_
8 &d38n59_
: ;_
> 
"?_
r:   rO   r   r1   c                 (    | D ]  }t        |        y r3   )r    )r   r-   s     r8   r   r   r  s     #T"#r:   )>rH   r   typesr   typingr   r   r   r   r   r	   r
   r   snowflake.snowpark	snowflake4snowflake.snowpark._internal.proto.generated.ast_pb2snowpark	_internalr>   	generatedast_pb2snowflake.connectorr   &snowflake.snowpark._internal.ast.utilsr   r   r   *snowflake.snowpark._internal.error_messager   +snowflake.snowpark._internal.open_telemetryr   'snowflake.snowpark._internal.type_utilsr   &snowflake.snowpark._internal.udf_utilsr   r   r   r   r   r   r   r   r   "snowflake.snowpark._internal.utilsr   r   r   r   r    !snowflake.snowpark.table_functionr!   snowflake.snowpark.typesr"   r#   r$   version_infor'   collections.abcr)   rO   rI   r   rM   r:   r8   <module>r      s   "   J J J  D D D 0 
 W A
 
 
  @ N N
 v(># >#Bh
 h
V## #4 #r:   