
    ɯeiZy                    .   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlmc m Z  d dl!mc m"c m#c m$c m%Z# d dl&m'Z'm(Z(m)Z) d d	l*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8mZ9m:Z:m;Z; d d
l<m=Z= d dl>m?Z?m@Z@mAZAmBZBmCZCmDZD d dlEmFZFmGZGmHZHmIZImJZJmKZKmLZL d dlMmNZN d dlOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZW d dlXmYZYmZZZm[Z[m\Z\ d dl]m^Z^m_Z_m`Z`maZa d dlbmcZcmdZdmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZmmnZn d dlompZpmqZqmrZrmsZsmtZtmuZumvZvmwZwmxZxmyZymzZzm{Z{m|Z|m}Z}m~Z~mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d lmZ d d!lmZ d d"lmZ d d#lmZmZmZmZmZmZmZmZmZmZmZmZmZ d d$lmZ d d%lmZ d d&lmZmZmZmZmZ d d'lmZmZmZmZmZmZmZmZmZmZmZmZmZ ej                  d(k  rd d)lmZ nd d)lmZ er
d dlZd d*lmZ  ee      Zd+Zd,Z ej                  d-e d.      Zd/ed0efd1Z d/ed2ee   fd3Zd4ed0ee   fd5Zd6d7d8ed/ee   d9ee   d:ee   f
d;Zd<d<d=d>d7d?d7d@e2dAee   dBedCed0edD   fdEZ G dF d7      ZddddGddGddHdIedJedKee   dLeee      dMeeeeeeef   f         dNeeeeef         dOedPeeeee   f      dQedRee   fdSZddddGddTdIedJedUeeef   dPeeeee   f      dMeeeeeeef   f         dNeeeeef         dOedRee   fdVZ	e	Z
y)W    N)Counter)cached_property)	getLogger)
ModuleType)TYPE_CHECKINGAnyCallableDictIteratorListOptionalSetTupleUnionoverload)ZoneInfo)installed_pandaspandaspyarrow)AsOfCrossExcept	FullOuterInner	IntersectJoinJoinTypeLeftAnti	LeftOuterLeftSemiNaturalJoinLateralJoin
RightOuterr   	UsingJoincreate_join_type)unquote_if_quoted)	Attribute
ExpressionLiteralNamedExpressionStarUnresolvedAttribute)
SET_EXCEPTSET_INTERSECT	SET_UNIONSET_UNION_ALLSelectSnowflakePlanSelectStatementSelectTableFunction)PlanQueryType)CopyIntoTableNodeDynamicTableCreateModeLimitLogicalPlanSaveModeSnowflakeCreateTableTableCreationSourceReadFileNode)	Ascending
Descending	SortOrderSortByAllOrder)FlattenFunctionLateralTableFunctionExpressionTableFunctionJoin)CreateDynamicTableCommandCreateViewCommandDistinctFilterLocalTempViewPersistedViewProjectRenameSampleSortUnpivotViewType)add_intermediate_stmtbuild_expr_from_dict_str_strbuild_expr_from_python_valbuild_expr_from_snowpark_column+build_expr_from_snowpark_column_or_col_name*build_expr_from_snowpark_column_or_sql_str+build_expr_from_snowpark_column_or_table_fnbuild_indirect_table_fn_applydebug_check_missing_astfill_ast_for_columnfill_save_modewith_src_positionDATAFRAME_AST_PARAMETERbuild_view_namebuild_table_name
build_name)SnowparkClientExceptionMessages)open_telemetry_context_manager)ResourceUsageCollectoradd_api_calladjust_api_subcallsdf_api_usagedf_collect_api_telemetry#df_to_relational_group_df_api_usage)ColumnOrNameColumnOrSqlExprLiteralType$format_day_time_interval_for_display&format_year_month_interval_for_displaysnow_type_to_dtype_strtype_string_to_type_object) add_package_to_existing_packages)SKIP_LEVELS_THREESKIP_LEVELS_TWOTempObjectTypecheck_agg_exprscheck_flatten_modecolumn_to_bool0create_or_update_statement_params_with_query_tag
deprecatedescape_quotesexperimentalgenerate_random_alphanumericget_copy_into_table_options'is_snowflake_quoted_id_case_insensitive-is_snowflake_unquoted_suffix_case_insensitiveis_sql_select_statementparse_positional_args_to_list&parse_positional_args_to_list_variadicparse_table_nameprepare_pivot_arguments	publicapi
quote_namerandom_name_for_temp_objectstr_to_enumvalidate_object_nameglobal_counterstring_half_widthwarning)"track_data_source_statement_params)AsyncJob_AsyncResultType)Column_to_col_if_sql_expr_to_col_if_str)DataFrameAIFunctions)DataFrameAnalyticsFunctions)DataFrameNaFunctions)DataFrameStatFunctions)DataFrameWriter)SnowparkDataframeException)QueryProfiler)abscolcounthashlitmaxmeanminrandom
row_numbersql_exprstddevto_char)MockSelectStatementRow)TableFunctionCall!_create_table_function_expression_ExplodeFunctionCall_get_cols_after_explode_join_get_cols_after_join_table)	ArrayTypeDataTypeDayTimeIntervalTypeMapTypePandasDataFrameType
StringTypeStructField
StructType_NumericType_FractionalTypeTimestampTypeTimestampTimeZoneYearMonthIntervalType)   	   )Iterable)Tablei@B    z._[a-zA-Z0-9]{z}_(.*)prefixreturnc                 ,    |  dt        t               dS )N_)r{   _NUM_PREFIX_DIGITS)r   s    ^/var/www/html/glpi_dashboard/venv/lib/python3.12/site-packages/snowflake/snowpark/dataframe.py_generate_prefixr      s    XQ34FGHJJ    exclude_prefixesc                 0   t        j                         }|  d|dd}t        |D cg c]  }|j                  |       c}      rF|  d|dd}t        j                         }t        |D cg c]  }|j                  |       c}      rF|S c c}w c c}w )z\
    Generate deterministic prefix while ensuring it doesn't exist in the exclude list.
    r   04)r   nextany
startswith)r   r   countercandidate_prefixps        r   _generate_deterministic_prefixr      s     !!#G 72,a0
7GH!q||,-H
I$XQwrl!4 %%' 7GH!q||,-H
I  IHs   B,Bcol_namec                     g }| }t         j                  |      x}r:|j                  d      }|j                  |       t         j                  |      x}r:|S )N   )_UNALIASED_REGEXmatchgroupappend)r   	unaliasedcr   s       r   _get_unaliasedr     s]    IA#))!,
,%
,KKN $))!,
,%
, r   df	DataFramer   suffixcommon_col_namesc           	      z   | j                  |d      }|j                  d      }||v r~|rdt        |      }t        |      } |j                  |r|rd| |j                          d      S d| t        |j                  d             d      S  |j                  d| | d      S  |j                  d| d      S )NF	_emit_ast")r   stripr}   r~   aliasupperry   )	r   r   r   r   r   r   unquoted_col_namecolumn_case_insensitive suffix_unqouted_case_insensitives	            r   _alias_if_neededr     s     &&e&
$C&Ma&P#=fE - 399*/O %&v||~&6a8  ,-mFLL<M.N-OqS 
 syy1VH%6$7q9::syy1./q122r    lsuffixrsuffixlhsrhs	join_typeusing_columnsr   r   )r   r   c                   ||k(  r|rt        d|d      |D ch c]  }t        |       }}| j                  D cg c]  }|j                   }	}|j                  D cg c]  }|j                   }
}|	D cg c]  }|t	        |
      v r||vr| }}|	|
z   D cg c]  }t        |       }}|r.| j                  j                  j                  j                          |xs | j                  }|xs |j                  }|xs |}|st        d|      nd}|st        d|      nd}| j                  |	D cg c])  }t        | |||t        |t        t         f      rg n|      + c}d      }|j                  |
D cg c]  }t        |||||       c}d      }||fS c c}w c c}w c c}w c c}w c c}w c c}w c c}w )NzH'lsuffix' and 'rsuffix' must be different if they're not empty. You set z	 to both.lr   rFr   )
ValueErrorr   _outputnamesetr&   _session_conn_telemetry_clientsend_alias_in_join_telemetry_aliasr   selectr   
isinstancer    r   )r   r   r   r   r   r   r   normalized_using_columnsattr	lhs_names	rhs_namesnr   	all_namessuffix_provided
lhs_prefix
rhs_prefixr   lhs_remappedrhs_remappeds                       r   _disambiguater  +  s    'gVW^Vaajk
 	
 8EE!
1EE
 (+{{3t3I3'*{{3t3I3 I1,D#D 	
 
 099/DE!"1%EIE,,IIK#G#G(O>M&sI6SU  ?N&sI6SU  :: "		
   Xx,@AGW		
   L :: "	
 S$
G=MN	
   L %%c  F
 43
 F"		
	
s(   FF# F(9F-F25.F7;F<c            $       ~   e Zd ZdZe	 	 	 	 	 dded   dee   ded	eej                     d
eddfd       Z
deddfdZedefd       Zedefd       Zedee   fd       Zej(                  dee   ddfd       Zeedddddddeeeef      dededed
edee   fd              Zeedddddddeeeef      dededed
edefd              Zeedddddddeeeef      dededed
edeee   ef   fd              Zeeddddddeeeef      deded
edef
d              Zddej@                  ddddeeeef      dedededed edeee   ef   fd!Z! ee!      Z"edd"deeeef      defd#       Z#eeddddd$deeeef      deded
ede$e   f
d%              Z%eeddddd$deeeef      deded
edef
d&              Z%eeddddd$deeeef      deded
edee$e   ef   f
d'              Z%defd(Z&dd)Z'dd*Z(e)r8d+dl*Z*eedddd,deeeef      ded
ed eeef   de*jV                  f
d-              Z,eedddd,deeeef      ded
ed eeef   def
d.              Z,eedddd,deeeef      ded
ed eeef   ded/ef   f
d0              Z,e)r;d+dl*Z*eedddd,deeeef      ded
ed eeef   de$e*jV                     f
d1              Z-eedddd,deeeef      ded
ed eeef   def
d2              Z-eedddd,deeeef      ded
ed eeef   dee$d/   ef   f
d3              Z- e.d45      eedddd,deeeef      ded
ed eeef   ded6ef   f
d7                     Z/ e.d45      eedddd,deeeef      ded
ed eeef   dee$d6   ef   f
d8                     Z0e1edd9d:eee2e   f   d
edd fd;              Z3ee	 	 	 	 dd<eeeee   f      d=eee      d>ed
edd?f
d@              Z4dAeee5ee6ef   fdBZ7dCefdDZ8edee   fdE       Z9eddFed
ede5fdG       Z:e1edddHdIeee;e<f   e2ee;e<f      f   d	eej                     d
edd fdJ              Z=e1edddHdKeee2e   f   d	ej                  d
edd fdL              Z>e>Z?edd9dIee;e2e;   f   d
edd fdM       Z@e1e	 	 ddNed	ej                  d
edd fdO              ZAe1e	 	 ddPeBd	ej                  d
edd fdQ              ZCe1edddRdIee;e2e;   f   dSeeeeeeeef      f      d
edd fdT              ZD e.dU5      eddCed
efdV              ZEe1edd9dKee5e6e;ef   eeef   f   d
edd fdW              ZFeGedd9dIee;e2e;   f   d
eddXfdY              ZHeGedddHdIee;e2e;   f   d	eej                     d
eddXfdZ              ZIeGedd9d[ed\e2d\   f   d
eddXfd]              ZJeGedd9dIee;e2e;   f   d
eddXfd^              ZKe	 dd	ej                  d
edd fd_       ZLedddHd`eee2e   f   d	ej                  d
edd fda       ZMeGe	 	 	 ddbe;dceee2eN   ddf      deeeN   d
eddXf
df              ZOe1e	 	 ddgedhediee;   djed
edd fdk              ZPe1e	 	 	 ddledmed	ej                  d
edd f
dn              ZQe1eddod d
edd fdp              ZRe1eddod d
edd fdq              ZSe1e	 	 ddod dred
edd fds              ZTe1e	 	 ddod dred
edd fdt              ZU	 	 	 d	dod duedred	ej                  dd f
dvZVe1eddod d
edd fdw              ZWe1eddod d
edd fdx              ZXe1e	 	 ddyd dzee   d
edd fd{              ZYe1e	 d
d|d|dd}dyd d~ee5   deded
edd fd              ZZe1e	 	 dd|d|ddddyd d~eee;e2e   f      dzee   dededee5   d
edd fd              Z[e1edd9deeee   e<f   de;d
ede;dd f
d              Z\e1ed|d|dd}dyd deded
edd f
d              Z]d|d|ddddyd dee5e2e   f   de^dededee5   d	ej                  dd fdZ`d|d|ddddyd de^dee5   dededee5   d	ej                  dd fdZae1edddddFedee5e<f   dedej                  d
edd fd              Zbe1edddddee   dceee5e<f      ded	ej                  d
edd fd              Zceedddd,deeeef      ded
edefd              Zdeedddd,deeeef      ded
edefd              Zdedddd,deeeef      ded
edeeef   fd       Zdedeefd       Zfeedddddddddd	deee2e   f   dee2e      dNee   dee   dee2e      dee2e;      deeeef      deeeef      deeg   d
ededee   fd              Zhee	 	 dddddlededeeeef      d
eddf
d              Zi ejddd      e1e	 	 	 	 	 dde;dee   dededed
edd fd                     Zk	 d
deld	ej                  dd fdZmdled
efdZn	 ddleded
edefdZo	 	 	 	 	 ddedeeef   dedee   dedefdZpdedCeee2e   f   defdZqeeddddddCeee2e   f   dee   deeeef      ded
edee   fd              ZreedddddddddddddĜdCeee2e   f   dededee   dedee   dee   dee2e;      dedee   dee   deeeef      deeg   ded
edee   f d̈́              ZseeddddddCeee2e   f   dee   deeeef      ded
edee   fd΄              ZteeddddϜdCeee2e   f   dee   deeeef      d
edee   f
dЄ              Zu	 	 	 ddedevdee   deded	eej                     fdԄZw	 	 	 	 	 	 	 	 	 ddCedededexdee   dee   dee   dee2e;      dedee   dee   deeg   defdքZyee	 d
dddd,dlee   deeeef      ded
edeee   ee   f   f
dׄ              Zzee	 d
dddd,dlee   deeeef      ded
edef
d؄              Zze	 d
dddd,dlee   deeeef      ded
edeee   ee   ef   f
dل       ZzezZ{e1e	 	 	 ddee|   dlee   d
edd fdۄ              Z}e~ddee|   dlee   fd܄       Zedefd݄       Zedefdބ       Zedd߄       ZeddddIeeee   f   d
edd fd       Ze1e	 	 ddee;egf   ded
efd              Ze1e	 	 dde;ded	eej                     d
edd f
d              Zeeddddeeeef      d
eddfd              Ze	 d
ddddee|   dee   deeeef      d
eded    f
d       Z e.d5      	 ddee   deddfd       Zedeeee   f   fd       ZddZdefdZdFedeeef   fdZedee   fd       Zedefd       Zedee6eef      fd       Zd
ddZdKee;   defdZdedIee;e2e;   f   dee   fdZ	 	 	 ddee   deeg   deeg   defdZd
dee   ddfd ZeCZej:                  xZZej>                  Zej@                  ZejB                  ZejD                  xZZej                  ZejJ                  ZejN                  ZetZerZeuZe]ZeMZeIZeXxZZe3Ze,ZeSZeUZeTZebZeZe%ZeZeDZeZeZy(  r   aU)  Represents a lazily-evaluated relational dataset that contains a collection
    of :class:`Row` objects with columns defined by a schema (column name and type).

    A DataFrame is considered lazy because it encapsulates the computation or query
    required to produce a relational dataset. The computation is not performed until
    you call a method that performs an action (e.g. :func:`collect`).

    **Creating a DataFrame**

    You can create a DataFrame in a number of different ways, as shown in the examples
    below.

    Creating tables and data to run the sample code:
        >>> session.sql("create or replace temp table prices(product_id varchar, amount number(10, 2))").collect()
        [Row(status='Table PRICES successfully created.')]
        >>> session.sql("insert into prices values ('id1', 10.0), ('id2', 20.0)").collect()
        [Row(number of rows inserted=2)]
        >>> # Create a CSV file to demo load
        >>> import tempfile
        >>> with tempfile.NamedTemporaryFile(mode="w+t") as t:
        ...     t.writelines(["id1, Product A", "\n" "id2, Product B"])
        ...     t.flush()
        ...     create_stage_result = session.sql("create temp stage test_stage").collect()
        ...     put_result = session.file.put(t.name, "@test_stage/test_dir")

    Example 1
        Creating a DataFrame by reading a table in Snowflake::

            >>> df_prices = session.table("prices")

    Example 2
        Creating a DataFrame by reading files from a stage::

            >>> from snowflake.snowpark.types import StructType, StructField, IntegerType, StringType
            >>> df_catalog = session.read.schema(StructType([StructField("id", StringType()), StructField("name", StringType())])).csv("@test_stage/test_dir")
            >>> df_catalog.show()
            ---------------------
            |"ID"  |"NAME"      |
            ---------------------
            |id1   | Product A  |
            |id2   | Product B  |
            ---------------------
            <BLANKLINE>

    Example 3
        Creating a DataFrame by specifying a sequence or a range::

            >>> session.create_dataframe([(1, "one"), (2, "two")], schema=["col_a", "col_b"]).show()
            ---------------------
            |"COL_A"  |"COL_B"  |
            ---------------------
            |1        |one      |
            |2        |two      |
            ---------------------
            <BLANKLINE>
            >>> session.range(1, 10, 2).to_df("col1").show()
            ----------
            |"COL1"  |
            ----------
            |1       |
            |3       |
            |5       |
            |7       |
            |9       |
            ----------
            <BLANKLINE>

    Example 4
        Create a new DataFrame by applying transformations to other existing DataFrames::

            >>> df_merged_data = df_catalog.join(df_prices, df_catalog["id"] == df_prices["product_id"])

    **Performing operations on a DataFrame**

    Broadly, the operations on DataFrame can be divided into two types:

    - **Transformations** produce a new DataFrame from one or more existing DataFrames. Note that transformations are lazy and don't cause the DataFrame to be evaluated. If the API does not provide a method to express the SQL that you want to use, you can use :func:`functions.sqlExpr` as a workaround.
    - **Actions** cause the DataFrame to be evaluated. When you call a method that performs an action, Snowpark sends the SQL query for the DataFrame to the server for evaluation.

    **Transforming a DataFrame**

    The following examples demonstrate how you can transform a DataFrame.

    Example 5
        Using the :func:`select()` method to select the columns that should be in the
        DataFrame (similar to adding a ``SELECT`` clause)::

            >>> # Return a new DataFrame containing the product_id and amount columns of the prices table.
            >>> # This is equivalent to: SELECT PRODUCT_ID, AMOUNT FROM PRICES;
            >>> df_price_ids_and_amounts = df_prices.select(col("product_id"), col("amount"))

    Example 6
        Using the :func:`Column.as_` method to rename a column in a DataFrame (similar
        to using ``SELECT col AS alias``)::

            >>> # Return a new DataFrame containing the product_id column of the prices table as a column named
            >>> # item_id. This is equivalent to: SELECT PRODUCT_ID AS ITEM_ID FROM PRICES;
            >>> df_price_item_ids = df_prices.select(col("product_id").as_("item_id"))

    Example 7
        Using the :func:`filter` method to filter data (similar to adding a ``WHERE`` clause)::

            >>> # Return a new DataFrame containing the row from the prices table with the ID 1.
            >>> # This is equivalent to:
            >>> # SELECT * FROM PRICES WHERE PRODUCT_ID = 1;
            >>> df_price1 = df_prices.filter((col("product_id") == 1))

    Example 8
        Using the :func:`sort()` method to specify the sort order of the data (similar to adding an ``ORDER BY`` clause)::

            >>> # Return a new DataFrame for the prices table with the rows sorted by product_id.
            >>> # This is equivalent to: SELECT * FROM PRICES ORDER BY PRODUCT_ID;
            >>> df_sorted_prices = df_prices.sort(col("product_id"))

    Example 9
        Using :meth:`agg` method to aggregate results.

            >>> import snowflake.snowpark.functions as f
            >>> df_prices.agg(("amount", "sum")).collect()
            [Row(SUM(AMOUNT)=Decimal('30.00'))]
            >>> df_prices.agg(f.sum("amount")).collect()
            [Row(SUM(AMOUNT)=Decimal('30.00'))]
            >>> # rename the aggregation column name
            >>> df_prices.agg(f.sum("amount").alias("total_amount"), f.max("amount").alias("max_amount")).collect()
            [Row(TOTAL_AMOUNT=Decimal('30.00'), MAX_AMOUNT=Decimal('20.00'))]

    Example 10
        Using the :func:`group_by()` method to return a
        :class:`RelationalGroupedDataFrame` that you can use to group and aggregate
        results (similar to adding a ``GROUP BY`` clause).

        :class:`RelationalGroupedDataFrame` provides methods for aggregating results, including:

        - :func:`RelationalGroupedDataFrame.avg()` (equivalent to AVG(column))
        - :func:`RelationalGroupedDataFrame.count()` (equivalent to COUNT())
        - :func:`RelationalGroupedDataFrame.max()` (equivalent to MAX(column))
        - :func:`RelationalGroupedDataFrame.median()` (equivalent to MEDIAN(column))
        - :func:`RelationalGroupedDataFrame.min()` (equivalent to MIN(column))
        - :func:`RelationalGroupedDataFrame.sum()` (equivalent to SUM(column))

        >>> # Return a new DataFrame for the prices table that computes the sum of the prices by
        >>> # category. This is equivalent to:
        >>> #  SELECT CATEGORY, SUM(AMOUNT) FROM PRICES GROUP BY CATEGORY
        >>> df_total_price_per_category = df_prices.group_by(col("product_id")).sum(col("amount"))
        >>> # Have multiple aggregation values with the group by
        >>> import snowflake.snowpark.functions as f
        >>> df_summary = df_prices.group_by(col("product_id")).agg(f.sum(col("amount")).alias("total_amount"), f.avg("amount")).sort(col("product_id"))
        >>> df_summary.show()
        -------------------------------------------------
        |"PRODUCT_ID"  |"TOTAL_AMOUNT"  |"AVG(AMOUNT)"  |
        -------------------------------------------------
        |id1           |10.00           |10.00000000    |
        |id2           |20.00           |20.00000000    |
        -------------------------------------------------
        <BLANKLINE>

    Example 11
        Using windowing functions. Refer to :class:`Window` for more details.

            >>> from snowflake.snowpark import Window
            >>> from snowflake.snowpark.functions import row_number
            >>> df_prices.with_column("price_rank",  row_number().over(Window.order_by(col("amount").desc()))).show()
            ------------------------------------------
            |"PRODUCT_ID"  |"AMOUNT"  |"PRICE_RANK"  |
            ------------------------------------------
            |id2           |20.00     |1             |
            |id1           |10.00     |2             |
            ------------------------------------------
            <BLANKLINE>

    Example 12
        Handling missing values. Refer to :class:`DataFrameNaFunctions` for more details.

            >>> df = session.create_dataframe([[1, None, 3], [4, 5, None]], schema=["a", "b", "c"])
            >>> df.na.fill({"b": 2, "c": 6}).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |1    |2    |3    |
            |4    |5    |6    |
            -------------------
            <BLANKLINE>

    **Performing an action on a DataFrame**

    The following examples demonstrate how you can perform an action on a DataFrame.

    Example 13
        Performing a query and returning an array of Rows::

            >>> df_prices.collect()
            [Row(PRODUCT_ID='id1', AMOUNT=Decimal('10.00')), Row(PRODUCT_ID='id2', AMOUNT=Decimal('20.00'))]

    Example 14
        Performing a query and print the results::

            >>> df_prices.show()
            ---------------------------
            |"PRODUCT_ID"  |"AMOUNT"  |
            ---------------------------
            |id1           |10.00     |
            |id2           |20.00     |
            ---------------------------
            <BLANKLINE>

    Example 15
        Calculating statistics values. Refer to :class:`DataFrameStatFunctions` for more details.

            >>> df = session.create_dataframe([[1, 2], [3, 4], [5, -1]], schema=["a", "b"])
            >>> df.stat.corr("a", "b")
            -0.5960395606792697

    Example 16
        Performing a query asynchronously and returning a list of :class:`Row` objects::

            >>> df = session.create_dataframe([[float(4), 3, 5], [2.0, -4, 7], [3.0, 5, 6], [4.0, 6, 8]], schema=["a", "b", "c"])
            >>> async_job = df.collect_nowait()
            >>> async_job.result()
            [Row(A=4.0, B=3, C=5), Row(A=2.0, B=-4, C=7), Row(A=3.0, B=5, C=6), Row(A=4.0, B=6, C=8)]

    Example 17
        Performing a query and transforming it into :class:`pandas.DataFrame` asynchronously::

            >>> async_job = df.to_pandas(block=False)
            >>> async_job.result()
                 A  B  C
            0  4.0  3  5
            1  2.0 -4  7
            2  3.0  5  6
            3  4.0  6  8
    NFTsessionsnowflake.snowpark.Sessionplan	is_cached	_ast_stmtr   r   c                 D   || _         |+| j                   j                  j                  |      | _        nd| _        t	        |t
        t        f      rf|| _        |j                  j                  | j                  j                         |j                  j                  | j                  j                         nd| _        d| _        |r||j                  nd| _        d| _        || _        d| _        t	        |t
        t        f      r~t	        |j"                  t$              xr\ t	        |j"                  j&                  j(                  t*              xr, |j"                  j&                  j(                  j,                  du| _        n.t1        t	        |t*              xr |j,                  du      | _        d| _        t5        | d      | _        t9        |       | _        t=        |       | _        | j:                  j@                  x| _!        | _         | j:                  jD                  | _"        | j:                  jF                  | _#        | j:                  jH                  | _$        | j:                  jJ                  x| _&        | _%        tO        |       | _(        | jP                  jR                  | _*        | jP                  jV                  | _,        | jP                  jZ                  | _-        t]        |       | _/        d| _0        tb        jd                  r| j                  jf                   yy)a=  
        :param int _ast_stmt: The AST Bind atom corresponding to this dataframe value. We track its assigned ID in the
                             slot self._ast_id. This allows this value to be referred to symbolically when it's
                             referenced in subsequent dataframe expressions.
        NFr   )4r   	_analyzerresolve_planr   r2   r   _select_statementexpr_to_aliasupdate$df_aliased_col_name_to_real_col_name_DataFrame__ast_iduid_ast_id_statement_paramsr  _ops_after_aggfrom_r1   snowflake_plansource_planr<   xml_reader_udtf_all_variant_colsbool_readerr   _writerr   _statr   
_analyticsapprox_quantileapproxQuantilecorrcovcrosstab	sample_bysampleByr   _nadropdropnafillfillnareplacer   _air   context_debug_eager_schema_validation
attributes)selfr  r  r  r  r   s         r   __init__zDataFrame.__init__V  s^     0088>DJDJd_.ABC%)D"%%djj&>&>?55<<

?? &*D" ,5,A9==tDL!%("
 d_.ABC4::':; Vtzz88DDlSVJJ--99IIQUU " &*4.S43G3Gt3S&D" HL&tu=+D1
5d;59ZZ5O5OOd2JJOO	::>>

++)-)=)=='-hhmmhhmmxx'''-%)11 JJ!! 2r   dataframe_expr_builderc                 |    t        | j                  | j                  |        | j                  |j                  _        y)zx
        Given a field builder expression of the AST type Expr, points the builder to reference this dataframe.
        N)rY   r  r   dataframe_refid)r;  r=  s     r   _set_ast_refzDataFrame._set_ast_ref  s+    
 	 dmmTB26,,,,/r   c                     | j                   S N)r(  r;  s    r   statzDataFrame.stat  s    zzr   c                     | j                   S rC  )r)  rD  s    r   	analyticszDataFrame.analytics  s    r   c                     | j                   S rC  )r  rD  s    r   r  zDataFrame._ast_id  s    }}r   valuec                     || _         | j                  || j                  j                  |       | j                  || j                  j                  |       y y y rC  )r  r  add_df_ast_idr  )r;  rI  s     r   r  zDataFrame._ast_id  sX    ::!e&7JJ$$U+!!-%2C""007 3D-r   )statement_paramsblocklog_on_exceptioncase_sensitiver   rL  rM  rN  rO  c                     y rC   r;  rL  rM  rN  rO  r   s         r   collectzDataFrame.collect       	r   c                     y rC  rQ  rR  s         r   rS  zDataFrame.collect  rT  r   c          	      \   i }|r| j                   j                  j                         }t        |j                  j
                        }| j                  |j                         |t        |j                  |       ||_
        ||_        ||_        d|_        | j                   j                  j                  |       | j                   j                  j                  |      \  }	|t         <   t#        | j$                  |       5   | j&                  d||||d|cddd       S # 1 sw Y   yxY w)a  Executes the query representing this DataFrame and returns the result as a
        list of :class:`Row` objects.

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.
            case_sensitive: A bool value which controls the case sensitivity of the fields in the
                :class:`Row` objects returned by the ``collect``. Defaults to ``True``.

        See also:
            :meth:`collect_nowait()`
        NF)rL  rM  rN  rO  rQ  )r   
_ast_batchbindr\   exprdataframe_collectrA  r   rR   rL  rM  rO  rN  no_waitevalflushr]   rb   rS  '_internal_collect_with_tag_no_telemetry)
r;  rL  rM  rN  rO  r   kwargsstmtrY  r   s
             r   rS  zDataFrame.collect  s   4 ==++002D$TYY%@%@ADdgg&+,T-B-BDTUDJ"0D$4D! DLMM$$))$/ 261I1I1O1OPT1U.Av-.+DLL$? 	?4?? !1!1-	
 	 	 	s   D""D+)rL  rN  rO  r   c          
      l   i }|r| j                   j                  j                         }t        |j                  j
                        }| j                  |j                         |t        |j                  |       ||_
        ||_        d|_        | j                   j                  j                  |       | j                   j                  j                  |      \  }|t        <   t!        | j"                  |       5   | j$                  d|dt&        j(                  ||d|cddd       S # 1 sw Y   yxY w)a4  Executes the query representing this DataFrame asynchronously and returns: class:`AsyncJob`.
        It is equivalent to ``collect(block=False)``.

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            case_sensitive: A bool value which is controls the case sensitivity of the fields in the
                :class:`Row` objects after collecting the result using :meth:`AsyncJob.result`. Defaults to
                ``True``.

        See also:
            :meth:`collect()`
        NTFrL  rM  	data_typerN  rO  rQ  )r   rW  rX  r\   rY  rZ  rA  r   rR   rL  rO  rN  r[  r\  r]  r]   rb   collect_nowaitr^  r   ROW)	r;  rL  rN  rO  r   r_  r`  rY  r   s	            r   rd  zDataFrame.collect_nowait  s   , ==++002D$TYY%@%@ADdgg&+,T-B-BDTU"0D$4D!DLMM$$))$/ 261I1I1O1OPT1U.Av-.+D,?,?F 	?4?? !1*..!1- 	 	 	s   :&D**D3rb  rc  r_  c                P   t        | |xs | j                        } | j                  j                  j                  | j
                  f||t        |xs | j                  | j                  j                  t        | j                  j                  j                  d            ||d|S )Ncollect_stacktrace_in_query_tagcollect_stacktrace)rM  rc  r  rN  rO  )r   r  r   r   executer  rw   	query_tagrq   confget)r;  rL  rM  rc  rN  rO  r_  s          r   r^  z1DataFrame._internal_collect_with_tag_no_telemetry?  s     >"<d&<&<
 +t}}""**JJ
N :D$:$:''!#'==#5#5#9#95$	 .)
 
 	
r   )rL  c                h   t        | j                  |       5  | j                  j                  j	                  | j
                  t        |xs | j                  | j                  j                  t        | j                  j                  j                  d                  cddd       S # 1 sw Y   yxY w)z.This method is only used in stored procedures.rg  rh  r  N)rb   _execute_and_get_query_idr   r   get_result_query_idr  rw   r  rk  rq   rl  rm  )r;  rL  s     r   rp  z#DataFrame._execute_and_get_query_idd  s    
 ,D,J,JDQ 	==&&::

"R$>(>(>MM++%'+}}'9'9'='=9(	# ; 
	 	 	s   BB((B1)rL  rM  rO  r   c                     y rC  rQ  r;  rL  rM  rO  r   s        r   to_local_iteratorzDataFrame.to_local_iteratorv       	r   c                     y rC  rQ  rs  s        r   rt  zDataFrame.to_local_iterator  ru  r   c                   i }|r| j                   j                  j                         }t        |j                  j
                        }| j                  |j                         |t        |j                  |       ||_
        ||_        | j                   j                  j                  |       | j                   j                  j                  |      \  }|t        <    | j                   j                  j                   | j"                  fd|t$        j&                  t)        |xs | j*                  | j                   j,                  t.        | j                   j0                  j3                  d            |d|S )aT  Executes the query representing this DataFrame and returns an iterator
        of :class:`Row` objects that you can use to retrieve the results.

        Unlike :meth:`collect`, this method does not load all data into memory
        at once.

        Example::

            >>> df = session.table("prices")
            >>> for row in df.to_local_iterator():
            ...     print(row)
            Row(PRODUCT_ID='id1', AMOUNT=Decimal('10.00'))
            Row(PRODUCT_ID='id2', AMOUNT=Decimal('20.00'))

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.
            case_sensitive: A bool value which controls the case sensitivity of the fields in the
                :class:`Row` objects returned by the ``to_local_iterator``. Defaults to ``True``.
        Trg  rh  )to_iterrM  rc  r  rO  )r   rW  rX  r\   rY  dataframe_to_local_iteratorrA  r   rR   rL  rM  rO  r\  r]  r]   r   rj  r  r   ITERATORrw   r  rk  rq   rl  rm  )	r;  rL  rM  rO  r   r_  r`  rY  r   s	            r   rt  zDataFrame.to_local_iterator  s;   B ==++002D$TYY%J%JKDdgg&+,T-B-BDTUDJ"0DMM$$))$/ 261I1I1O1OPT1U.Av-.*t}}""**JJ
&//N :D$:$:''!#'==#5#5#9#95$	 *
 
 	
r   c                 r   | j                   rt        j                  | j                         }| j                   j                  |_        | j                   j                  |_        | j                   j
                  |_        | j                   j                  |_        |S t        j                  | j                        S )z4Returns a shallow copy of the plan of the DataFrame.)
r  copycolumn_statesprojection_in_str_projection_in_strschema_query_schema_queryquery_params_query_paramsr  )r;  new_plans     r   
_copy_planzDataFrame._copy_plan  s    !!yy!7!78H%)%;%;%I%IH"*.*@*@*R*RH'%)%;%;%H%HH"%)%;%;%H%HH"O99TZZ((r   c                 N    t        | j                  | j                         d      S )z?Returns a shallow copy of the DataFrame without AST generation.Fr   )r   r   r  rD  s    r   _copy_without_astzDataFrame._copy_without_ast  s    (9UKKr   c                 f   d}| j                   j                  r_| j                   j                  j                         }t	        |j
                  j                  |       | j                  |j
                         t        | j                   | j                         || j                   j                        S )z4Implements shallow copy protocol for copy.copy(...).Nr  r   )
r   ast_enabledrW  rX  r\   rY  r?  rA  r   r  )r;  r`  s     r   __copy__zDataFrame.__copy__  s    ==$$==++002Ddii55t<dii(MMOOmm//	
 	
r   r   )rL  rM  r   c                     y rC  rQ  r;  rL  rM  r   r_  s        r   	to_pandaszDataFrame.to_pandas       r   c                     y rC  rQ  r  s        r   r  zDataFrame.to_pandas  ru  r   zpandas.DataFramec                   |r| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         |t        |j                  |       ||_
        | j                   j                  j                  |       | j                   j                  j                  |      \  }|t        <   t        | j                  |       5   | j                   j                   j"                  | j$                  fd|t&        j(                  t+        |xs | j,                  | j                   j.                  t0        | j                   j2                  j5                  d            d|}ddd       |rt7        t8        j:                        s| j$                  j<                  d   j>                  jA                         jC                         }	tE        |	      }
|
rtF        jI                  d       t9        j:                  || j$                  jJ                  D cg c]%  }|
rtM        |jN                        n|jN                  ' c}      S S # 1 sw Y   xY wc c}w )	a  
        Executes the query representing this DataFrame and returns the result as a
        `pandas DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_.

        When the data is too large to fit into memory, you can use :meth:`to_pandas_batches`.

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.

        Note:
            1. This method is only available if pandas is installed and available.

            2. If you use :func:`Session.sql` with this method, the input query of
            :func:`Session.sql` can only be a SELECT statement.

            3. For TIMESTAMP columns:
            - TIMESTAMP_LTZ and TIMESTAMP_TZ are both converted to `datetime64[ns, tz]` in pandas,
            as pandas cannot distinguish between the two.
            - TIMESTAMP_NTZ is converted to `datetime64[ns]` (without timezone).
        NTrg  rh  )r  rM  rc  r  zThe query result format is set to JSON. The result of to_pandas() may not align with the result returned in the ARROW format. For best compatibility with to_pandas(), set the query result format to ARROW.)columns)(r   rW  rX  r\   rY  dataframe_to_pandasrA  r   rR   rL  rM  r\  r]  r]   rb   r  r   rj  r  r   PANDASrw   r  rk  rr   rl  rm  r   r   r   queriessqlr   lowerr   _loggerr   r:  r&   r   )r;  rL  rM  r   r_  r`  astr   resultqueryis_select_statementr  s               r   r  zDataFrame.to_pandas
  s   D ==++002D#DII$A$A4HCcff%+,S-A-ACSTCIMM$$))$/ 261I1I1O1OPT1U.Av-.+DNNDA 	0T]]((00

*11"R$>(>(>MM++#'+}}'9'9'='=9(	# F	" ff&6&67

**2.2288:@@B&=e&D#&OOi
 '' %)JJ$9$9 !  3 .dii8!%+
 
 O	 	:s   +BI'3*I3
'I0c                     y rC  rQ  r  s        r   to_pandas_batcheszDataFrame.to_pandas_batchesd  r  r   c                     y rC  rQ  r  s        r   r  zDataFrame.to_pandas_batchesp  ru  r   c                   |r| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         |t        |j                  |       ||_
        | j                   j                  j                  |       | j                   j                  j                  |      \  }|t        <    | j                   j                  j                  | j                   fdd|t"        j$                  t'        |xs | j(                  | j                   j*                  t,        | j                   j.                  j1                  d            d|S )a  
        Executes the query representing this DataFrame and returns an iterator of
        pandas dataframes (containing a subset of rows) that you can use to
        retrieve the results.

        Unlike :meth:`to_pandas`, this method does not load all data into memory
        at once.

        Example::

            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> for pandas_df in df.to_pandas_batches():
            ...     print(pandas_df)
               A  B
            0  1  2
            1  3  4

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.

        Note:
            1. This method is only available if pandas is installed and available.

            2. If you use :func:`Session.sql` with this method, the input query of
            :func:`Session.sql` can only be a SELECT statement.
        Trg  rh  )r  rx  rM  rc  r  )r   rW  rX  r\   rY  dataframe_to_pandas_batchesrA  r   rR   rL  rM  r\  r]  r]   r   rj  r  r   PANDAS_BATCHrw   r  rk  rr   rl  rm  )r;  rL  rM  r   r_  r`  r  r   s           r   r  zDataFrame.to_pandas_batches|  s.   N ==++002D#DII$I$I4PCcff%+,S-A-ACSTCIMM$$))$/ 261I1I1O1OPT1U.Av-.*t}}""**JJ
&33N :D$:$:''#'==#5#5#9#95$	
 
 	
r   z1.28.0)versionzpyarrow.Tablec                    | j                   j                  j                  | j                  fddd|t	        |xs | j
                  | j                   j                  t        | j                   j                  j                  d            d|S )a  
        Executes the query representing this DataFrame and returns the result as a
        `pyarrow Table <https://arrow.apache.org/docs/python/generated/pyarrow.Table.html>`.

        When the data is too large to fit into memory, you can use :meth:`to_arrow_batches`.

        This function requires the optional dependenct snowflake-snowpark-python[pandas] be installed.

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.
        FTrg  rh  )r  rx  to_arrowrM  r  )
r   r   rj  r  rw   r  rk  rr   rl  rm  r  s        r   r  zDataFrame.to_arrow  s    2 +t}}""**JJ
N :D$:$:''#'==#5#5#9#95$	
 
 	
r   c                :    | j                   j                  j                  | j                  fddd|t        j
                  t        |xs | j                  | j                   j                  t        | j                   j                  j                  d            d|S )a  
        Executes the query representing this DataFrame and returns an iterator of
        pyarrow Tables (containing a subset of rows) that you can use to
        retrieve the results.

        Unlike :meth:`to_arrow`, this method does not load all data into memory
        at once.

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.
        FTrg  rh  )r  rx  r  rM  rc  r  )r   r   rj  r  r   rz  rw   r  rk  rr   rl  rm  r  s        r   to_arrow_batcheszDataFrame.to_arrow_batches  s    2 +t}}""**JJ
&//N :D$:$:''#'==#5#5#9#95$	
 
 	
r   r   namesc                   t        | \  }}t        d |D              st        d      t        | j                        t        |      k7  rct        dt        | j                         ddj                  d | j                  D               dt        |       ddj                  |       d	      d	}|r| j                  j                  j                         }t        |j                  j                  |      }|D ]0  }t        |j                  j                  j!                         |       2 ||j                  _        | j%                  |j&                         g }t)        | j                  |      D ].  \  }	}
|j+                  t-        |	      j/                  |
             0 | j1                  |||
      }|r|j2                  |_        |S )a  
        Creates a new DataFrame containing columns with the specified names.

        The number of column names that you pass in must match the number of columns in the existing
        DataFrame.

        Examples::

            >>> df1 = session.range(1, 10, 2).to_df("col1")
            >>> df2 = session.range(1, 10, 2).to_df(["col1"])

        Args:
            names: list of new column names
        c              3   <   K   | ]  }t        |t                y wrC  r   str.0r  s     r   	<genexpr>z"DataFrame.to_df.<locals>.<genexpr>)  s     9!:a%9   z>Invalid input type in to_df(), expected str or a list of strs.z7The number of columns doesn't match. Old column names (z): ,c              3   4   K   | ]  }|j                     y wrC  )r   )r  r  s     r   r  z"DataFrame.to_df.<locals>.<genexpr>2  s     ?$DII?s   z. New column names (.Nr  )r   all	TypeErrorlenr   r   joinr   rW  rX  r\   rY  dataframe_to_dfrS   	col_namesargsaddvariadicrA  r   zipr   r   r   r   r  r  )r;  r   r  r  is_variadicr`  r  r   new_colsr  r   r   s               r   to_dfzDataFrame.to_df  s   & "H!O	;9y99P  t||I.%%(%6$7s88?$,,??@ A%%(^$4C8K7LAO  ==++002D#DII$=$=tDC  J*3==+=+=+A+A+CSIJ%0CMM"cff%dllI6 	6JD$OOF4L..t45	6[[TY[GBJ	r   	index_colr  enforce_orderingzmodin.pandas.DataFramec                    ddl }ddlm} d}|r| j                  j                  j                         }t        |j                  j                  |      }| j                  |j                         |.|j                  j                  t        |t              r|n|g       |.|j                  j                  t        |t              r|n|g       d}	|s+t!        | j"                  d         dkD  rd}	t%        ddd	       |s|	rht'        t(        j*                        }
| j,                  }d| _        | j.                  j1                  |
d
dd       || _        |j3                  |
||d      }n%|j3                  | j"                  d   d   ||d      }|rC|j4                  |j6                  j8                  j:                  j<                  j>                  _        |S )a  
        Convert the Snowpark DataFrame to Snowpark pandas DataFrame.

        Args:
            index_col: A column name or a list of column names to use as index.
            columns: A list of column names for the columns to select from the Snowpark DataFrame. If not specified, select
                all columns except ones configured in index_col.
            enforce_ordering: If False, Snowpark pandas will provide relaxed consistency and ordering guarantees for the returned
                DataFrame object. Otherwise, strict consistency and ordering guarantees are provided. Please refer to the
                documentation of :func:`~modin.pandas.read_snowflake` for more details. If DDL or DML queries have been
                used in this query this parameter is ignored and ordering is enforced.


        Returns:
            :class:`~modin.pandas.DataFrame`
                A Snowpark pandas DataFrame contains index and data columns based on the snapshot of the current
                Snowpark DataFrame, which triggers an eager evaluation.

                If index_col is provided, the specified index_col is selected as the index column(s) for the result dataframe,
                otherwise, a default range index from 0 to n - 1 is created as the index column, where n is the number
                of rows. Please note that is also used as the start row ordering for the dataframe, but there is no
                guarantee that the default row ordering is the same for two Snowpark pandas dataframe created from
                the same Snowpark Dataframe.

                If columns are provided, the specified columns are selected as the data column(s) for the result dataframe,
                otherwise, all Snowpark DataFrame columns (exclude index_col) are selected as data columns.

        Note:
            Transformations performed on the returned Snowpark pandas Dataframe do not affect the Snowpark DataFrame
            from which it was created. Call
            - :func:`modin.pandas.to_snowpark <modin.pandas.to_snowpark>`
            to transform a Snowpark pandas DataFrame back to a Snowpark DataFrame.

            The column names used for columns or index_cols must be Normalized Snowflake Identifiers, and the
            Normalized Snowflake Identifiers of a Snowpark DataFrame can be displayed by calling df.show().
            For details about Normalized Snowflake Identifiers, please refer to the Note in :func:`~modin.pandas.read_snowflake`

            `to_snowpark_pandas` works only when the environment is set up correctly for Snowpark pandas. This environment
            may require version of Python and pandas different from what Snowpark Python uses If the environment is setup
            incorrectly, an error will be raised when `to_snowpark_pandas` is called.

            For Python version support information, please refer to:
            - the prerequisites section https://docs.snowflake.com/en/developer-guide/snowpark/python/snowpark-pandas#prerequisites
            - the installation section https://docs.snowflake.com/en/developer-guide/snowpark/python/snowpark-pandas#installing-the-snowpark-pandas-api

        See also:
            - :func:`modin.pandas.to_snowpark <modin.pandas.to_snowpark>`
            - :func:`modin.pandas.DataFrame.to_snowpark <modin.pandas.DataFrame.to_snowpark>`
            - :func:`modin.pandas.Series.to_snowpark <modin.pandas.Series.to_snowpark>`

        Example::
            >>> df = session.create_dataframe([[1, 2, 3]], schema=["a", "b", "c"])
            >>> snowpark_pandas_df = df.to_snowpark_pandas()  # doctest: +SKIP
            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
               A  B  C
            0  1  2  3

            >>> snowpark_pandas_df = df.to_snowpark_pandas(index_col='A')  # doctest: +SKIP
            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
               B  C
            A
            1  2  3
            >>> snowpark_pandas_df = df.to_snowpark_pandas(index_col='A', columns=['B'])  # doctest: +SKIP
            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
               B
            A
            1  2
            >>> snowpark_pandas_df = df.to_snowpark_pandas(index_col=['B', 'A'], columns=['A', 'C', 'A'])  # doctest: +SKIP
            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
                 A  C  A
            B A
            2 1  1  3  1
        r   NFr  r   Tenforce_ordering_ddlzTenforce_ordering is enabled when using DML/DDL operations regardless of user setting)warning_timeserrorifexists	temporary)mode
table_typer   )name_or_queryr  r  r  ) snowflake.snowpark.modin.pluginmodin.pandasr   r   rW  rX  r\   rY  to_snowpark_pandasrA  r   r  extendr   listr  r  r  r   r   rs   TABLEr  writesave_as_tableread_snowflaker  _query_compiler_modin_frameordered_dataframe_dataframe_refsnowpark_dataframe)r;  r  r  r  r   	snowflakepdr`  r  has_existing_ddl_dml_queriestemporary_table_nameast_idsnowpandas_dfs                r   r  zDataFrame.to_snowpark_pandasJ  s   h 	/! ==++002D#DII$@$@$GCcff%$$$!+It!<I9+ """j$.G7gYW',$CY(?$@1$D+/(&f ;#>$$$  \\FDLJJ$$$$&	 %  "DL--2#!%	 . M --"ll95a8#!&	 . M   ))66HHWWjjr r   itemc                    | j                   d uxr | j                  j                  }t        |t              r| j                  ||      S t        |t              r| j                  ||      S t        |t        t        f      r| j                  ||      S t        |t              r| j                  | j                  |         S t        dt        |             )Nr   zUnexpected item type: )r  r   r  r   r  r   r   filterr  tupler   int__getitem__r  r  type)r;  r  r   s      r   r  zDataFrame.__getitem__  s    LL,J1J1J	dC 88DI866f%;;ty;99tUm,;;ty;99c"##DLL$6774T$ZLABBr   r   c                     |j                         | j                  D cg c]  }|j                          c}vr$t        | j                  j                   d|       | j                  |      S c c}w )Nz object has no attribute )r  r  AttributeError	__class____name__r   )r;  r   r   s      r   __getattr__zDataFrame.__getattr__  sb    ::<4<<@a	@@ >>**++DTFK  xx~	  As   A.c                 .    | j                   j                  S )a  Returns all column names as a list.

        The returned column names are consistent with the Snowflake database object `identifier syntax <https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html>`_.

        ==================================   ==========================
        Column name used to create a table   Column name returned in str
        ==================================   ==========================
        a                                    'A'
        A                                    'A'
        "a"                                  '"a"'
        "a b"                                '"a b"'
        "a""b"                               '"a""b"'
        ==================================   ==========================
        )schemar  rD  s    r   r  zDataFrame.columns  s      {{   r   r   c                 "   d}|rKt        j                         }t        |j                        }| j	                  |j
                         ||_        |dk(  r t        t        | j                        |      S t        | j                  |      |      S )z1Returns a reference to a column in the DataFrame.N*)_ast)protoExprr\   dataframe_colrA  r   r   r   r+   r   _resolve)r;  r   r   rY  col_expr_asts        r   r   zDataFrame.col  st     ::<D,T-?-?@Lloo.$,L!s?$t||,488$--1==r   r  colsc          	      b
   t        | \  }}|st        d      g }d}d}g }	d}
g }|D ]  }t        |t              r| j                  rK|j                  t        |j                  |j                  |j                  d      j                                n|j                  |j                                t        |j                  t        t        f      r%|	j                  |j                  j                         |s||j                  |j                         t        |t              rd}|r4|2t        j                          }t#        ||d       |j                  |       t        ||| j                        }|j                   |j                                |	j                  |       t        |t$              r^|r&t        d|j&                   d|j&                   d      |}|rS|Qt)        | j*                  j,                  |       t        j                          }t/        ||       |j                  |       t1        |	      }t        |t2              r%t5        || j6                  | j8                        \  }}nT| j*                  j:                  j=                  t?        | j6                  |            }tA        || j6                  |      \  }}}|jC                  |xs |       |D cg c](  }| j*                  j:                  jE                  |i       * }}tG        d
       |]	 tI        |	      dz   tI        |      k(  r|	}ndg}| j*                  j:                  j=                  t?        | j6                  ||            }
|}d}|r|| j*                  j,                  jK                         }tM        |jN                  jP                  |      }| jS                  |jT                         ||jV                  _,        d|_-        |D ]*  }||jV                  j\                  j                  |       , | j8                  r|
r| j_                  | j*                  j:                  ja                  | j*                  j:                  jc                  |
| j*                  j:                        | j*                  j:                        je                  |      |      S | j_                  | j8                  je                  |      |      S | j_                  tg        ||
xs | j6                        |      S c c}w )aQ  Returns a new DataFrame with the specified Column expressions as output
        (similar to SELECT in SQL). Only the Columns specified as arguments will be
        present in the resulting DataFrame.

        You can use any :class:`Column` expression or strings for named columns.

        Example 1::
            >>> df = session.create_dataframe([[1, "some string value", 3, 4]], schema=["col1", "col2", "col3", "col4"])
            >>> df_selected = df.select(col("col1"), col("col2").substr(0, 10), df["col3"] + df["col4"])

        Example 2::

            >>> df_selected = df.select("col1", "col2", "col3")

        Example 3::

            >>> df_selected = df.select(["col1", "col2", "col3"])

        Example 4::

            >>> df_selected = df.select(df["col1"], df.col2, df.col("col3"))

        Example 5::

            >>> from snowflake.snowpark.functions import table_function
            >>> split_to_table = table_function("split_to_table")
            >>> df.select(df.col1, split_to_table(df.col2, lit(" ")), df.col("col3")).show()
            -----------------------------------------------
            |"COL1"  |"SEQ"  |"INDEX"  |"VALUE"  |"COL3"  |
            -----------------------------------------------
            |1       |1      |1        |some     |3       |
            |1       |1      |2        |string   |3       |
            |1       |1      |3        |value    |3       |
            -----------------------------------------------
            <BLANKLINE>

        Note:
            A `TableFunctionCall` can be added in `select` when the dataframe results from another join. This is possible because we know
            the hierarchy in which the joins are applied.

        Args:
            *cols: A :class:`Column`, :class:`str`, :class:`table_function.TableFunctionCall`, or a list of those. Note that at most one
                   :class:`table_function.TableFunctionCall` object is supported within a select call.
            _ast_stmt: when invoked internally, supplies the AST to use for the resulting dataframe.
            _emit_ast: Whether to emit AST statements.
        z%The input of select() cannot be emptyNT)_is_qualified_name)r  r  zDAt most one table function can be called inside a select(). Called 'z' and 'z'.)funczWThe input of select() must be Column, column name, TableFunctionCall, or a list of themr   r  )	left_cols
right_colsFanalyzerr   r  r  )4r   r   r   r   r$  r   _expr1_expr2r  _named_expressionr'   r,   r   r  r  r  rZ   r   user_visible_namerQ   r   rW  rX   r   r   r   r  r  r  r  rD   r   r  analyzer  r  rX  r\   rY  dataframe_selectrA  r   r  r  expr_variantr  
_with_plancreate_select_statementcreate_select_snowflake_planr   rK   )r;  r  r   r  exprsr  r  
table_functable_func_col_namesstring_col_names	join_planast_colser  r   ast_col	func_exprr  
alias_colstemp_join_planr   r  r`  r  s                           r   r   zDataFrame.select  s   r DTJ{DEE
#	 F	A!V$))LLHHahh4 &( LL,ammi9L-MN$++AMM,>,>?!2OOAFF+As##!2#(::<L'a>OOL1LT=S=S ZSZZ\* ''*A01$##-#?#?"@H[H[G\\^`  
!2)$--*B*BJO#jjlG1':FOOG,=:N	a!56+G4::t'='=,(Hj &*]]%<%<%D%D)$**i@&N /I!4::~/+Ax Z384HP(ADDMM++33C<($ (  m IF	P !: #$q(CJ6,	 E	//77!JJ'3	I  *==++002D#DII$>$>ECcff% +CHH$C $ 2&HHMM((12 !!MM++CC"mm55RR%0G0G S  "&!8!8	 D 
 fUm" '   ??4#9#9#@#@#GSW?XXwui.E4::FRVWWi(s   )-T,r  c          	      
   t        | \  }}|st        d      d}|r|| j                  j                  j	                         }t        |j                  j                  |      }| j                  |j                         ||j                  _        d|_        |D ]0  }t        |j                  j                  j                         |       2 n|}| j!                  t#        | D cg c]  }t%        |d       c}|      S c c}w )a  
        Projects a set of SQL expressions and returns a new :class:`DataFrame`.
        This method is equivalent to ``select(sql_expr(...))`` with :func:`select`
        and :func:`functions.sql_expr`.

        :func:`selectExpr` is an alias of :func:`select_expr`.

        Args:
            exprs: The SQL expressions.

        Examples::

            >>> df = session.create_dataframe([-1, 2, 3], schema=["a"])  # with one pair of [], the dataframe has a single column and 3 rows.
            >>> df.select_expr("abs(a)", "a + 2", "cast(a as string)").show()
            --------------------------------------------
            |"ABS(A)"  |"A + 2"  |"CAST(A AS STRING)"  |
            --------------------------------------------
            |1         |1        |-1                   |
            |2         |4        |2                    |
            |3         |5        |3                    |
            --------------------------------------------
            <BLANKLINE>

        z*The input of select_expr() cannot be emptyNTFr   r  )r   r   r   rW  rX  r\   rY  r  rA  r   r  r  r  rS   r  r  r   r   r   )r;  r  r   r  r  r`  r  rY  s           r   select_exprzDataFrame.select_expr  s    @ DUK{IJJ  }}//446'		(B(BDI!!#&&)$/!#' ! JD.sxx}}/@/@/BDIJ !{{ :5A /   
 	
s   #D c                   |st        d      t        | \  }}d}|r| j                  j                  j	                         }t        |j                  j                  |      }| j                  |j                         |D ]0  }t        |j                  j                  j                         |       2 ||j                  _        t               5 }g }	|D ]  }t!        |t"              r|	j%                  |       &t!        |t&              rt!        |j(                  t*              rddlm}
 t!        | j                  j0                  |
      r| j2                   |	j%                  | j4                  j6                  j9                  |j(                  j:                  |j(                  j<                               t!        |t&              rt!        |j(                  t>              ru|j(                  j@                  r_|	j%                  | j4                  jB                  j9                  |j(                  j<                  |j(                  j<                               t!        |t&              rAt!        |j(                  tD              r'|	j%                  |j(                  j<                         tG        jH                  t#        |             |	D ch c]  }tK        |       }}| jL                  D cg c]  }|j<                   }}|D cg c]	  }||vs| }}|stG        jN                         | jP                  r| j                  jR                  j9                  d      r^|D cg c]	  }||v s| }}|s| jU                  | jP                        }nH| jU                  | jP                  jW                  ||            }n| jY                  t[        |      d      }ddd       | j                  jR                  j9                  d      rt]        dj_                                nta        d	d
j_                                |r|jb                  |_2        |S c c}w c c}w c c}w c c}w # 1 sw Y   xY w)aW  Returns a new DataFrame that excludes the columns with the specified names
        from the output.

        This is functionally equivalent to calling :func:`select()` and passing in all
        columns except the ones to exclude. This is a no-op if schema does not contain
        the given column name(s).

        Example::

            >>> df = session.create_dataframe([[1, 2, 3]], schema=["a", "b", "c"])
            >>> df.drop("a", "b").show()
            -------
            |"C"  |
            -------
            |3    |
            -------
            <BLANKLINE>

        Args:
            *cols: the columns to exclude, as :class:`str`, :class:`Column` or a list
                of those.

        Raises:
            :class:`SnowparkClientException`: if the resulting :class:`DataFrame`
                contains no output columns.
        z#The input of drop() cannot be emptyNr   MockServerConnectionuse_simplified_query_generationFr   zDataFrame.drop[exclude]zDataFrame.drop[select]r   len_subcallsresource_usage)3r   r   r   rW  rX  r\   rY  dataframe_droprA  r   rU   r  r  r  r  rc   r   r  r   r   r  r'   #snowflake.snowpark.mock._connectionr  r   r  r  r  rm  expr_idr   r,   df_aliasr  r*   ra   DF_CANNOT_DROP_COLUMN_NAMEr   r   DF_CANNOT_DROP_ALL_COLUMNSr  rl  r  excluder   r  rd   get_resource_usagere   r  r  )r;  r   r  r  r  r`  r  r   resource_usage_collectorr  r  r  normalized_namesr  existing_nameskeep_col_namesr   drop_normalized_namesr   s                      r   r2  zDataFrame.drop+  s   > BCCCTJ{ ==++002D#DII$<$<dCCcff% T;CHHMM<M<M<OQRST +CHH#% 9	H)AE a%LLO6*z!--/SX!$--"5"57KLLL

0044MM111==3E3E q&)"1==2EF..LL

GGKKMM..0B0B
  6*zMM?0 LL!3!349TTA ;B 8==!
1==48LLADdiiANA)7UA1DT;TaUNU!5PPRR%%$--*<*<*@*@1+
 &6)!9OD)% ) -)?)?@B..661>B [[n!5[Gs9	Hv ==!!"CD)(;;=  (7JJL	 BJ	S  >AU)[9	H 9	HsQ   G8QQQ&Q9Q?	Q	QAQ	Q$Q(A(QQQ(patternc                    d}|rk|g| j                   j                  j                         }t        |j                  j
                  |      }||_        | j                  |j                         n|}| j                  r-| j                  | j                  j                  |      |      }n)| j                  t        g | j                  |      |      }t        |d       |S )an  Returns a new DataFrame with only the columns whose names match the specified
        pattern using case-insensitive ILIKE matching (similar to SELECT * ILIKE 'pattern' in SQL).

        Args:
            pattern: The ILIKE pattern to match column names against. You can use the following wildcards:
                - Use an underscore (_) to match any single character.
                - Use a percent sign (%) to match any sequence of zero or more characters.
                - To match a sequence anywhere within the column name, begin and end the pattern with %.

        Returns:
            DataFrame: A new DataFrame containing only columns matching the pattern.

        Raises:
            ValueError: If SQL simplifier is not enabled.
            SnowparkSQLException: If no columns match the specified pattern.

        Examples::

            >>> # Select all columns containing 'id' (case-insensitive)
            >>> df = session.create_dataframe([[1, "John", 101], [2, "Jane", 102]],
            ...                                 schema=["USER_ID", "Name", "dept_id"])
            >>> df.col_ilike("%id%").show()
            -------------------------
            |"USER_ID"  |"DEPT_ID"  |
            -------------------------
            |1          |101        |
            |2          |102        |
            -------------------------
            <BLANKLINE>
        Nr  )ilike_patternzDataFrame.col_ilike)r   rW  rX  r\   rY  dataframe_col_iliker,  rA  r   r  r  ilikerK   r  rd   )r;  r,  r  r   r`  r  r   s          r   	col_ilikezDataFrame.col_ilike  s    N  }}//446'		(E(EtL%!!#&&) !!!7!7!=!=g!FRVWBDJJg>$ ! B 	R./	r   rY  c                 6   t        |d      j                  }d}|rz|v| j                  j                  j	                         }t        |j                  j                  |      }| j                  |j                         t        |j                  |       n|}t        j                  r| j                  d| j                  vrt        || j                   d      }| j"                  r| j%                  | j                  j&                  j)                  | j                  j&                  j+                  || j                  j&                        | j                  j&                        |      }n| j%                  ||      }| j                  j-                         |_        |j                  j/                  d       |S | j"                  r,| j%                  | j"                  j1                  |      |      S | j%                  t        || j                   d	      |      S )
a$  Filters rows based on the specified conditional expression (similar to WHERE
        in SQL).

        Examples::

            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["A", "B"])
            >>> df_filtered = df.filter((col("A") > 1) & (col("B") < 100))  # Must use parenthesis before and after operator &.

            >>> # The following two result in the same SQL query:
            >>> df.filter(col("a") > 1).collect()
            [Row(A=3, B=4)]
            >>> df.filter("a > 1").collect()  # use SQL expression
            [Row(A=3, B=4)]

        Args:
            expr: a :class:`Column` expression or SQL text.
            _ast_stmt: when invoked internally, supplies the AST to use for the resulting dataframe.

        :meth:`where` is an alias of :meth:`filter`.
        zfilter/whereNr  T)	is_havingr  r  r  F)r   r  r   rW  rX  r\   rY  dataframe_filterrA  r   rV   	conditionr8  $_is_snowpark_connect_compatible_moder  rH   r  r  r  r  r	  r
  r|  r  r  )	r;  rY  r  r   filter_col_exprr`  r  having_planr   s	            r   r  zDataFrame.filter  s   < .dNCOO  }}//446'		(B(BDI!!#&&):3==$O 
 88##/ 3 33 $**MK%%__MM++CC"mm55RR'$--2I2I S  "&!8!8	 D  # %  __[D_A $ 3 3 8 8 :B!!(+I%%**11/B" '   ??#JJ#
  #  r   )	ascendingr   r9  c          	      
    | j                   dg| }| xs | }t        j                  r|rt        d      |r#|!t	        |t
        t        f      st        d      d}|r| j                  j                  j                         }t        | \  }}t        |j                  j                  |      }	|D ]0  }
t        |	j                   j"                  j%                         |
       2 ||	j                   _        | j)                  |	j*                         t-        j.                         }|dn|}t	        |t0        t2        f      rt|D ]n  }t-        j.                         }t	        |t
              r||j4                  _        n||j8                  _        |j:                  j<                  j?                  |       p nIt	        |t
        t        f      r3t	        |t
              r||j4                  _        n||j8                  _        |	j@                  jC                  |       |r3|dn|}t        |      r
tE               n	tG               }tI        |      g}nxg }|t	        |t0        t2        f      r$|D cg c]  }|r
tE               n	tG                }}nZt	        |t
        t        f      r|r
tE               n	tG               g}n,t        djK                  tM        tO        |                        tQ        |      tQ        |      k7  r-t        djK                  tQ        |      tQ        |                  g }tS        tQ        |            D ]  }t	        ||   tT              rA|j?                  |r*tU        ||   jV                  ||   ||   jX                        n||          W|j?                  tU        ||   |r||   n	tE                             t        j                  r| jZ                  d| jZ                  vrt]        || j^                  d	      }| j`                  r| jc                  | j                  jd                  jg                  | j                  jd                  ji                  || j                  jd                  
      | j                  jd                        |      }n| jc                  ||      }| jZ                  jk                         |_-        |jZ                  j%                  d       |S | j`                  r*| jc                  | j`                  jm                  |            n&| jc                  t]        || j^                  d	            }|r|jn                  |_8        |S c c}w )a=  Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL).

        When called with no column arguments, sorts by all columns (ORDER BY ALL).

        Examples::

            >>> from snowflake.snowpark.functions import col

            >>> df = session.create_dataframe([[1, 2], [3, 4], [1, 4]], schema=["A", "B"])
            >>> df.sort(col("A"), col("B").asc()).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            |1    |4    |
            |3    |4    |
            -------------
            <BLANKLINE>

            >>> df.sort(col("a"), ascending=False).show()
            -------------
            |"A"  |"B"  |
            -------------
            |3    |4    |
            |1    |2    |
            |1    |4    |
            -------------
            <BLANKLINE>

            >>> # The values from the list overwrite the column ordering.
            >>> df.sort(["a", col("b").desc()], ascending=[1, 1]).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            |1    |4    |
            |3    |4    |
            -------------
            <BLANKLINE>

            >>> # Sort by all columns (ORDER BY ALL) - no columns specified
            >>> df.sort().show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            |1    |4    |
            |3    |4    |
            -------------
            <BLANKLINE>

            >>> # Sort by all columns (ORDER BY ALL) - no columns specified
            >>> df.sort([], ascending=False).show()
            -------------
            |"A"  |"B"  |
            -------------
            |3    |4    |
            |1    |4    |
            |1    |2    |
            -------------
            <BLANKLINE>

        Args:
            *cols: Column names as :class:`str`, :class:`Column` objects, or a list of
             columns to sort by. If no columns are provided, the DataFrame is sorted
             by all columns in the order they appear (equivalent to ``ORDER BY ALL`` in SQL).
            ascending: Sort order specification.

             - When sorting **specific columns**: A :class:`bool`, :class:`int`, or list of
               :class:`bool`/:class:`int` values. ``True`` (or 1) for ascending, ``False``
               (or 0) for descending. If a list is provided, its length must match the number
               of columns.
             - When sorting **all columns** (no columns specified): Must be a single
               :class:`bool` or :class:`int`, not a list. Applies the same sort order to
               all columns.
             - Defaults to ``True`` (ascending) when not specified.

        Note:
            The aliases ``order_by()`` and ``orderBy()`` have the same behavior.
        zsort()z*sort() needs at least one sort expression.NzWhen no columns are specified (ORDER BY ALL), ascending must be bool or int, not a list. To sort specific columns with different orders, specify the columns.Tz1ascending can only be boolean or list, but got {}zHThe length of col ({}) should be same with the length of ascending ({}).sort)is_order_by_appendr  r  r  F)9_convert_cols_to_exprsr8  r6  r   r   r%  r  r  r   rW  rX  r   r\   rY  dataframe_sortrU   r  r  r  r  rA  r   r  r  r  r  bool_valv	int64_vallist_valvsr   r9  CopyFromr=   r>   r@   formatr  r  r  ranger?   childnull_orderingr  rN   r  r  r  r  r	  r
  r|  r;  r  r  )r;  r9  r   r  r  is_order_by_allr`  _colsr  r  r   asc_expr_ast	asc_valueascasc_astorder
sort_exprsordersidx	sort_planr   s                        r   r;  zDataFrame.sort2  s   t ,++H<t<"(/%i77OIJJ %y4+6W  ==++002D "H!NE;#DII$<$<dCC T;CHHMM<M<M<OQRST +CHHcff% !::<L ) 1yI)dE]3$ =C#jjlG!#t,-0((*.1))+ )),,33G<= Ic{3i..7L))+/8L**,MM""<0  ) 1yI#'	?IK
E(/0JF$i$7NWXsSikjlBXFX	D#;7-6ikJLIF#&&,fSi-A&B 
 u:V,$99?E
CPVK9X 
 JSZ(  eCj)4%% " "!#J,,fSk5:;S;S #3Z %%!%*VfSkU& 88##/d111ZMI%%__MM++CC"mm55RR%0G0G S  "&!8!8	 D  # %  __Y$_? $ 3 3 8 8 :B!!&)I ))  6 6 ; ;J GH__TZZEJ  !XX
IG Ys   .Uz1.5.0c                 <   d}|rf| j                   j                  j                         }t        |j                  j
                  |      }||_        | j                  |j                         | j                         }||_
        | j                  j                  D ]n  }|j                  r0|j                  |j                  j                  |   |j                  <   |j                  |j                  j                  |   |j                  <   p |r|j                  |_        |S )a  Returns an aliased dataframe in which the columns can now be referenced to using `col(<df alias>, <column name>)`.

        Examples::
            >>> from snowflake.snowpark.functions import col
            >>> df1 = session.create_dataframe([[1, 6], [3, 8], [7, 7]], schema=["col1", "col2"])
            >>> df2 = session.create_dataframe([[1, 2], [3, 4], [5, 5]], schema=["col1", "col2"])

            Join two dataframes with duplicate column names
            >>> df1.alias("L").join(df2.alias("R"), col("L", "col1") == col("R", "col1")).select(col("L", "col1"), col("R", "col2")).show()
            ---------------------
            |"COL1L"  |"COL2R"  |
            ---------------------
            |1        |2        |
            |3        |4        |
            ---------------------
            <BLANKLINE>

            Self join:
            >>> df1.alias("L").join(df1.alias("R"), on="col1").select(col("L", "col1"), col("R", "col2")).show()
            --------------------
            |"COL1"  |"COL2R"  |
            --------------------
            |1       |6        |
            |3       |8        |
            |7       |7        |
            --------------------
            <BLANKLINE>

        Args:
            name: The alias as :class:`str`.
        N)r   rW  rX  r\   rY  dataframe_aliasr   rA  r   r  r   r  r:  r  r  r  r  )r;  r   r   r`  r  _copyr  s          r   r   zDataFrame.alias		  s    H ==++002D#DII$=$=tDCCHcff%&&(JJ)) 	D&& II ''LLTRII
 		 KK<<TB			  HHEMr   c                   t        |       d}|r| j                  j                  j                         }t	        |j
                  j                  |      }t        | \  }}|D ]0  }t        |j                  j                  j                         |       2 ||j                  _        | j                  |j                          | j                  d      j                   |ddi}|r|j"                  |_        |S )a  Aggregate the data in the DataFrame. Use this method if you don't need to
        group the data (:func:`group_by`).

        Args:
            exprs: A variable length arguments list where every element is

                - A Column object
                - A tuple where the first element is a column object or a column name and the second element is the name of the aggregate function
                - A list of the above

                or a ``dict`` maps column names to aggregate function names.

        Examples::

            >>> from snowflake.snowpark.functions import col, stddev, stddev_pop

            >>> df = session.create_dataframe([[1, 2], [3, 4], [1, 4]], schema=["A", "B"])
            >>> df.agg(stddev(col("a"))).show()
            ----------------------
            |"STDDEV(A)"         |
            ----------------------
            |1.1547003940416753  |
            ----------------------
            <BLANKLINE>

            >>> df.agg(stddev(col("a")), stddev_pop(col("a"))).show()
            -------------------------------------------
            |"STDDEV(A)"         |"STDDEV_POP(A)"     |
            -------------------------------------------
            |1.1547003940416753  |0.9428091005076267  |
            -------------------------------------------
            <BLANKLINE>

            >>> df.agg(("a", "min"), ("b", "max")).show()
            -----------------------
            |"MIN(A)"  |"MAX(B)"  |
            -----------------------
            |1         |4         |
            -----------------------
            <BLANKLINE>

            >>> df.agg({"a": "count", "b": "sum"}).show()
            -------------------------
            |"COUNT(A)"  |"SUM(B)"  |
            -------------------------
            |3           |10        |
            -------------------------
            <BLANKLINE>

        Note:
            The name of the aggregate function to compute must be a valid Snowflake `aggregate function
            <https://docs.snowflake.com/en/sql-reference/functions-aggregation.html>`_.

        See also:
            - :meth:`RelationalGroupedDataFrame.agg`
            - :meth:`DataFrame.group_by`
        NFr   r   )rt   r   rW  rX  r\   rY  dataframe_aggr   rS   r  r  r  r  rA  r   group_byaggr  r  )r;  r   r  r`  rY  r  r  r   s           r   rZ  zDataFrame.aggC	  s    B 	 ==++002D$TYY%<%<dCD!G!OE; E*4::??+>+>+@!DE"-DJJdgg&/T]]U]+//H%HBJ	r   z-snowflake.snowpark.RelationalGroupedDataFramec                    | j                   dg| }d}|r| j                  j                  j                         }t	        |j
                  j                  |      }| j                  |j                         t        | \  }|j                  _        |D ]0  }t        |j                  j                  j                         |       2 t        j                   j#                  | |t        j                   j$                  j'                         |      S )zPerforms a SQL
        `GROUP BY ROLLUP <https://docs.snowflake.com/en/sql-reference/constructs/group-by-rollup.html>`_.
        on the DataFrame.

        Args:
            cols: The columns to group by rollup.
        zrollup()Nr  )r=  r   rW  rX  r\   rY  dataframe_rolluprA  r   r   r  r  rU   r  r  r  snowparkRelationalGroupedDataFramerelational_grouped_dataframe_RollupType)r;  r   r  rollup_exprsr`  rY  col_listr   s           r   rollupzDataFrame.rollup	  s     3t22:EE ==++002D$TYY%?%?FDdgg&+QSW+X(Hdii( U;DIINN<N<N<PRSTU !!<<;;GGI	 = 
 	
r   c                R    | j                   dg| }d}|r|| j                  j                  j                         }t	        |j
                  j                  |      }t        | \  }|j                  _	        |D ]0  }t        |j                  j                  j                         |       2 | j                  |j                         n|}t        j                   j#                  | |t        j                   j$                  j'                         |      }	|r|j(                  |	_        |	S )a  Groups rows by the columns specified by expressions (similar to GROUP BY in
        SQL).

        This method returns a :class:`RelationalGroupedDataFrame` that you can use to
        perform aggregations on each group of data.

        Args:
            *cols: The columns to group by.

        Valid inputs are:

            - Empty input
            - One or multiple :class:`Column` object(s) or column name(s) (:class:`str`)
            - A list of :class:`Column` objects or column names (:class:`str`)

        Examples:

            >>> from snowflake.snowpark.functions import col, lit, sum as sum_, max as max_
            >>> df = session.create_dataframe([(1, 1),(1, 2),(2, 1),(2, 2),(3, 1),(3, 2)], schema=["a", "b"])
            >>> df.group_by().agg(sum_("b")).collect()
            [Row(SUM(B)=9)]
            >>> df.group_by("a").agg(sum_("b")).sort("a").collect()
            [Row(A=1, SUM(B)=3), Row(A=2, SUM(B)=3), Row(A=3, SUM(B)=3)]
            >>> df.group_by("a").agg(sum_("b").alias("sum_b"), max_("b").alias("max_b")).sort("a").collect()
            [Row(A=1, SUM_B=3, MAX_B=2), Row(A=2, SUM_B=3, MAX_B=2), Row(A=3, SUM_B=3, MAX_B=2)]
            >>> df.group_by(["a", lit("snow")]).agg(sum_("b")).sort("a").collect()
            [Row(A=1, LITERAL()='snow', SUM(B)=3), Row(A=2, LITERAL()='snow', SUM(B)=3), Row(A=3, LITERAL()='snow', SUM(B)=3)]
            >>> df.group_by("a").agg((col("*"), "count"), max_("b")).sort("a").collect()
            [Row(A=1, COUNT(LITERAL())=2, MAX(B)=2), Row(A=2, COUNT(LITERAL())=2, MAX(B)=2), Row(A=3, COUNT(LITERAL())=2, MAX(B)=2)]
            >>> df.group_by("a").median("b").sort("a").collect()
            [Row(A=1, MEDIAN(B)=Decimal('1.500')), Row(A=2, MEDIAN(B)=Decimal('1.500')), Row(A=3, MEDIAN(B)=Decimal('1.500'))]
            >>> df.group_by("a").function("avg")("b").sort("a").collect()
            [Row(A=1, AVG(B)=Decimal('1.500000')), Row(A=2, AVG(B)=Decimal('1.500000')), Row(A=3, AVG(B)=Decimal('1.500000'))]
        z
group_by()Nr  )r=  r   rW  rX  r\   rY  dataframe_group_byr   r  r  rU   r  r  rA  r   r  r]  r^  r_  _GroupByTyper  r  )
r;  r  r   r  grouping_exprsr`  rY  rb  r   r   s
             r   rY  zDataFrame.group_by	  s   V 544\IDI  }}//446()E)EtL/U0,$)), " YA?		@R@R@TVWXY !!$''* ::;;HHJ	 ; 
 BJ	r   grouping_setszsnowflake.snowpark.GroupingSetsc                @   d}|r| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         t        | \  }|j                  _
        |D ]1  }|j                  j                  j                  |j                         3 t        j                  j!                  | t#        | D cg c]  }|j$                   c}t        j                  j&                  j)                         |      S c c}w )aq  Performs a SQL
        `GROUP BY GROUPING SETS <https://docs.snowflake.com/en/sql-reference/constructs/group-by-grouping-sets.html>`_.
        on the DataFrame.

        GROUP BY GROUPING SETS is an extension of the GROUP BY clause
        that allows computing multiple GROUP BY clauses in a single statement.
        The group set is a set of dimension columns.

        GROUP BY GROUPING SETS is equivalent to the UNION of two or
        more GROUP BY operations in the same result set.


        Examples::

            >>> from snowflake.snowpark import GroupingSets
            >>> df = session.create_dataframe([[1, 2, 10], [3, 4, 20], [1, 4, 30]], schema=["A", "B", "C"])
            >>> df.group_by_grouping_sets(GroupingSets([col("a")])).count().sort("a").collect()
            [Row(A=1, COUNT=2), Row(A=3, COUNT=1)]
            >>> df.group_by_grouping_sets(GroupingSets(col("a"))).count().sort("a").collect()
            [Row(A=1, COUNT=2), Row(A=3, COUNT=1)]
            >>> df.group_by_grouping_sets(GroupingSets([col("a")], [col("b")])).count().sort("a", "b").collect()
            [Row(A=None, B=2, COUNT=1), Row(A=None, B=4, COUNT=2), Row(A=1, B=None, COUNT=2), Row(A=3, B=None, COUNT=1)]
            >>> df.group_by_grouping_sets(GroupingSets([col("a"), col("b")], [col("c")])).count().sort("a", "b", "c").collect()
            [Row(A=None, B=None, C=10, COUNT=1), Row(A=None, B=None, C=20, COUNT=1), Row(A=None, B=None, C=30, COUNT=1), Row(A=1, B=2, C=None, COUNT=1), Row(A=1, B=4, C=None, COUNT=1), Row(A=3, B=4, C=None, COUNT=1)]


        Args:
            grouping_sets: The list of :class:`GroupingSets` to group by.
        Nr  )r   rW  rX  r\   rY   dataframe_group_by_grouping_setsrA  r   r   rh  r  r  r   r  r  r]  r^  r   _to_expressionr_  rf  )r;  r   rh  r`  rY  grouping_set_listgss          r   group_by_grouping_setsz DataFrame.group_by_grouping_sets
  s    R ==++002D$TYY%O%OQUVDdgg& 7F!""+' 8""''..rww78 !!<<)F)VW2RW;;HHJ	 = 
 	
Ws   D
c                    | j                   dg| }d}|r| j                  j                  j                         }t	        |j
                  j                  |      }| j                  |j                         t        | \  }|j                  _        |D ]0  }t        |j                  j                  j                         |       2 t        j                   j#                  | |t        j                   j$                  j'                         |      S )zPerforms a SQL
        `GROUP BY CUBE <https://docs.snowflake.com/en/sql-reference/constructs/group-by-cube.html>`_.
        on the DataFrame.

        Args:
            cols: The columns to group by cube.
        zcube()Nr  )r=  r   rW  rX  r\   rY  dataframe_cuberA  r   r   r  r  rU   r  r  r  r]  r^  r_  	_CubeType)r;  r   r  
cube_exprsr`  rY  rb  r   s           r   cubezDataFrame.cube>
  s     1T00ADA
 ==++002D$TYY%=%=tDDdgg&+QSW+X(Hdii( U;DIINN<N<N<PRSTU !!<<;;EEG	 = 
 	
r   c                    d}|rf|`| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         n|}d}| j                   j                  j                  d      rt               5 }| j                  r,| j                  | j                  j                         |      }n&| j                  t        | j                        |      }ddd       t!        dj#                                nt               5 }| j%                  | j&                  j(                  D cg c](  }| j+                  t-        |j.                        d      * c}d      j1                  d      }ddd       t3        dd	j#                         
       |r|j4                  |_        |S # 1 sw Y   xY wc c}w # 1 sw Y   LxY w)zReturns a new DataFrame that contains only the rows with distinct values
        from the current DataFrame.

        This is equivalent to performing a SELECT DISTINCT in SQL.
        Nr  r  zDataFrame.distinct[select])r  Fr   zDataFrame.distinct[group_by]   r  )r   rW  rX  r\   rY  dataframe_distinctrA  r   rl  rm  rc   r  r  distinctrG   r  rd   r&  rY  r  fieldsr   r   r   rZ  re   r  r  )r;  r  r   r`  r  r'  r   fs           r   rw  zDataFrame.distinct_
  s     }}//446'		(D(DdK!!#&&) ==!!"CD') O-E))..779T ) B $**)=NBO ,7JJL () '-E]] "&!3!3 AFF!3uE $ #  ##& '  .7JJL	 BJ	?O O' 's+   AG($G-G
9GGGG&subsetc                t   t        | \  }}d}|r|| j                  j                  j                         }t	        |j
                  j                  |      }||j                  _        |D ]0  }t        |j                  j                  j                         |       2 | j                  |j                         n|}|s5| j                  d      }t        |dd       |r|j                   |_        |S t%               5 }	|D 
cg c]  }
| j'                  |
       }}
| j(                  D cg c]  }| j'                  |       }}t+               j-                   t/        j0                  j2                  j4                  | j6                  |       }t9               } | j:                  g ||j=                  |      ddij?                  t'        |      dk(  d      j;                  |d      }ddd       t        dd	jA                         	       |r|j                   |_        |S c c}
w c c}w # 1 sw Y   ExY w)
a  Creates a new DataFrame by removing duplicated rows on given subset of columns.

        If no subset of columns is specified, this function is the same as the :meth:`distinct` function.
        The result is non-deterministic when removing duplicated rows from the subset of columns but not all columns.

        For example, if we have a DataFrame ``df``, which has columns ("a", "b", "c") and contains three rows ``(1, 1, 1), (1, 1, 2), (1, 2, 3)``,
        the result of ``df.dropDuplicates("a", "b")`` can be either
        ``(1, 1, 1), (1, 2, 3)``
        or
        ``(1, 1, 2), (1, 2, 3)``

        Args:
            subset: The column names on which duplicates are dropped.

        :meth:`dropDuplicates` is an alias of :meth:`drop_duplicates`.
        NFr   zDataFrame.drop_duplicatesr   r  r   r   r  )!r   r   rW  rX  r\   rY  dataframe_drop_duplicatesr  r  rS   r  r  rA  r   rw  re   r  r  rc   r   r  r   overr  r]  Windowpartition_byorder_byr{   r   as_wherer&  )r;  r  r   rz  r  r`  r  argr   r'  xfilter_colsr   output_colsrownumrownum_names                   r   drop_duplicateszDataFrame.drop_duplicates
  s   . EfM  }}//446'		(K(KTR$/!! IC.sxx}}/@/@/BCHI!!#&&) /B$?aP!XX
I#% 	)A0671488A;7K7>BllK(488H-KKK\&&M	""))66DMM F
 78KS[S&**[*ASUSs;'1,>u5 	 	'3FFH		
 BJ	1 8K	 	s+   ;H. H$H.(H) B*H.$
H..H7	pivot_colvalueszsnowflake.snowpark.DataFramedefault_on_nullc           	         d}|r| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         t        |j                  |       t        |j                  |       t        |j                  |       t        | d|||      \  }}}	}t        j                  j!                  |g t        j                  j"                  j%                  |d   |	|      |      S )a3	  Rotates this DataFrame by turning the unique values from one column in the input
        expression into multiple columns and aggregating results where required on any
        remaining column values.

        Only one aggregate is supported with pivot.

        Example::

            >>> create_result = session.sql('''create or replace temp table monthly_sales(empid int, amount int, month text)
            ... as select * from values
            ... (1, 10000, 'JAN'),
            ... (1, 400, 'JAN'),
            ... (2, 4500, 'JAN'),
            ... (2, 35000, 'JAN'),
            ... (1, 5000, 'FEB'),
            ... (1, 3000, 'FEB'),
            ... (2, 200, 'FEB') ''').collect()
            >>> df = session.table("monthly_sales")
            >>> df.pivot("month", ['JAN', 'FEB']).sum("amount").sort(df["empid"]).show()
            -------------------------------
            |"EMPID"  |"'JAN'"  |"'FEB'"  |
            -------------------------------
            |1        |10400    |8000     |
            |2        |39500    |200      |
            -------------------------------
            <BLANKLINE>

            >>> df = session.table("monthly_sales")
            >>> df.pivot("month").sum("amount").sort("empid").show()
            -------------------------------
            |"EMPID"  |"'FEB'"  |"'JAN'"  |
            -------------------------------
            |1        |8000     |10400    |
            |2        |200      |39500    |
            -------------------------------
            <BLANKLINE>

            >>> subquery_df = session.table("monthly_sales").select(col("month")).filter(col("month") == "JAN")
            >>> df = session.table("monthly_sales")
            >>> df.pivot("month", values=subquery_df).sum("amount").sort("empid").show()
            ---------------------
            |"EMPID"  |"'JAN'"  |
            ---------------------
            |1        |10400    |
            |2        |39500    |
            ---------------------
            <BLANKLINE>

        Args:
            pivot_col: The column or name of the column to use.
            values: A list of values in the column,
                or dynamic based on the DataFrame query,
                or None (default) will use all values of the pivot column.
            default_on_null: Expression to replace empty result values.
        NzDataFrame.pivotr   r  )r   rW  rX  r\   rY  dataframe_pivotrA  r   rU   r  rS   r  r  r   r  r]  r^  r_  
_PivotType)
r;  r  r  r  r   r`  r  	target_dfpcpivot_valuess
             r   pivotzDataFrame.pivot
  s    D ==++002D#DII$=$=tDCcff%7yQ&szz6:&s':':OL7N#Y8
4	2|_ !!<<;;FF1|_  = 
 	
r   value_columnname_columncolumn_listinclude_nullsc                 0   | j                  d|      }d}|r| j                  j                  j                         }t	        |j
                  j                  |      }| j                  |j                         ||_	        ||_
        ||_        |D ]&  }	t        |j                  j                         |	       ( t        ||||| j                         }
ddlm} | j&                  rt)        | j                  j*                  |      r | j                  j*                  j,                  sO| j/                  t1        t3        |
| j                  j4                        | j                  j4                              n| j/                  |
|      }|r|j6                  |_        |S )ah  Rotates a table by transforming columns into rows.
        UNPIVOT is a relational operator that accepts two columns (from a table or subquery), along with a list of columns, and generates a row for each column specified in the list. In a query, it is specified in the FROM clause after the table name or subquery.
        Note that UNPIVOT is not exactly the reverse of PIVOT as it cannot undo aggregations made by PIVOT.

        Args:
            value_column: The name to assign to the generated column that will be populated with the values from the columns in the column list.
            name_column: The name to assign to the generated column that will be populated with the names of the columns in the column list.
            column_list: The names of the columns in the source table or subequery that will be narrowed into a single pivot column. The column names will populate ``name_column``, and the column values will populate ``value_column``.
            include_nulls: If True, include rows with NULL values in ``name_column``. The default value is False.
        Example::

            >>> df = session.create_dataframe([
            ...     (1, 'electronics', 100, 200),
            ...     (2, 'clothes', 100, 300)
            ... ], schema=["empid", "dept", "jan", "feb"])
            >>> df = df.unpivot("sales", "month", ["jan", "feb"]).sort("empid")
            >>> df.show()
            ---------------------------------------------
            |"EMPID"  |"DEPT"       |"MONTH"  |"SALES"  |
            ---------------------------------------------
            |1        |electronics  |JAN      |100      |
            |1        |electronics  |FEB      |200      |
            |2        |clothes      |JAN      |100      |
            |2        |clothes      |FEB      |300      |
            ---------------------------------------------
            <BLANKLINE>
        z	unpivot()Nr   r  r  r  r  )r=  r   rW  rX  r\   rY  dataframe_unpivotrA  r   r  r  r  rU   r  r  rO   r  r   r  r  r   r   _suppress_not_implemented_errorr  r2   r1   r  r  r  )r;  r  r  r  r  r   column_exprsr`  r  r   unpivot_planr  r   s                r   unpivotzDataFrame.unpivot7  s]   L 22;L ==++002D#DII$?$?FCcff%+C)CO -C  V;COO<O<O<QSTUV +|]DJJ

 	M %%4==..0DEMM''GG OO-$t}}/F/F "]]44	 > 	" BJ	r   r  offsetc                 N   |ru|n| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         ||_        ||_	        n|}d}nd}t        j                  r(| j                  d| j                  vrt        t        |      t        |      | j                  d      }| j                   r| j#                  | j                   j$                  j'                  | j                   j$                  j)                  || j                   j$                        | j                   j$                        |      }n| j#                  ||      }| j                  j+                         |_        |j                  j-                  d       |S | j                   r.| j#                  | j                   j/                  ||      |      S | j#                  t        t        |      t        |      | j                        |      S )	a0  Returns a new DataFrame that contains at most ``n`` rows from the current
        DataFrame, skipping ``offset`` rows from the beginning (similar to LIMIT and OFFSET in SQL).

        Note that this is a transformation method and not an action method.

        Args:
            n: Number of rows to return.
            offset: Number of rows to skip before the start of the result set. The default value is 0.
            _ast_stmt: Overridding AST statement. Used in cases where this function is invoked internally.
            _emit_ast: Whether to emit AST statements.

        Example::

            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df.limit(1).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            -------------
            <BLANKLINE>
            >>> df.limit(1, offset=1).show()
            -------------
            |"A"  |"B"  |
            -------------
            |3    |4    |
            -------------
            <BLANKLINE>
        NlimitT)is_limit_appendr  r  r  )r  )r   rW  rX  r\   rY  dataframe_limitrA  r   r  r  r8  r6  r  r7   r)   r  r  r  r  r	  r
  r|  r  r  )	r;  r  r  r  r   r`  r  
limit_planr   s	            r   r  zDataFrame.limit  s   N  }}//446'		(A(A4H!!#&&)#
 D
 88##/t222
GFOTZZJ %%__MM++CC"mm55RR&1H1H S  "&!8!8	 D  # %  __Z4_@ $ 3 3 8 8 :B!!'*I%%**0060Bd '   ??gaj'&/4::>$ #  r   otherc                    |r| j                   j                  j                         }t        |j                  j
                  |      }d|_        d|_        d|_        |j                  |j                         | j                  |j                         | j                  rg| j                  | j                  j                  |j                  xs+ t        |j                   | j                   j"                        t$                    n0| j                  t'        | j                   |j                   d            }|rj(                  |_        |S )a  Returns a new DataFrame that contains all the rows in the current DataFrame
        and another DataFrame (``other``), excluding any duplicate rows. Both input
        DataFrames must contain the same number of columns.

        Example::
            >>> df1 = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[0, 1], [3, 4]], schema=["c", "d"])
            >>> df1.union(df2).sort("a").show()
            -------------
            |"A"  |"B"  |
            -------------
            |0    |1    |
            |1    |2    |
            |3    |4    |
            -------------
            <BLANKLINE>

        Args:
            other: the other :class:`DataFrame` that contains the rows to include.
        Fr  operatoris_all)r   rW  rX  r\   rY  dataframe_unionr  by_nameallow_missing_columnsrA  r  r   r  r  set_operatorr1   r  r  r/   	UnionPlanr  r  r;  r  r   r`  r  r   s         r   unionzDataFrame.union  s   0 ==++002D#DII$=$=tDCCGCK(-C%syy)cff% %% OO&&33++ *dmm.E.E ' 4  4::u{{5!QR 	 BJ	r   c                    |r| j                   j                  j                         }t        |j                  j
                  |      }d|_        d|_        d|_        |j                  |j                         | j                  |j                         | j                  rg| j                  | j                  j                  |j                  xs+ t        |j                   | j                   j"                        t$                    n0| j                  t'        | j                   |j                   d            }|rj(                  |_        |S )a  Returns a new DataFrame that contains all the rows in the current DataFrame
        and another DataFrame (``other``), including any duplicate rows. Both input
        DataFrames must contain the same number of columns.

        Example::

            >>> df1 = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[0, 1], [3, 4]], schema=["c", "d"])
            >>> df1.union_all(df2).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            |3    |4    |
            |0    |1    |
            |3    |4    |
            -------------
            <BLANKLINE>

        Args:
            other: the other :class:`DataFrame` that contains the rows to include.
        TFr  r  r  )r   rW  rX  r\   rY  r  r  r  r  rA  r  r   r  r  r  r1   r  r  r0   r  r  r  r  s         r   	union_allzDataFrame.union_all  s   6 ==++002D#DII$=$=tDCCGCK(-C%syy)cff% %% OO&&33++ *dmm.E.E + 4  4::u{{4!PQ 	 BJ	r   r  c                 R   d}|r| j                   j                  j                         }t        |j                  j
                  |      }d|_        d|_        ||_        | j                  |j                         |j                  |j                         | j                  |d||      S )a  Returns a new DataFrame that contains all the rows in the current DataFrame
        and another DataFrame (``other``), excluding any duplicate rows.

        This method matches the columns in the two DataFrames by their names, not by
        their positions. The columns in the other DataFrame are rearranged to match
        the order of columns in the current DataFrame.

        Example::

            >>> df1 = session.create_dataframe([[1, 2]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[2, 1]], schema=["b", "a"])
            >>> df1.union_by_name(df2).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            -------------
            <BLANKLINE>

        Example::

            >>> df1 = session.create_dataframe([[1, 2]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[2, 1, 3]], schema=["b", "a", "c"])
            >>> df1.union_by_name(df2, allow_missing_columns=True).sort("c").show()
            --------------------
            |"A"  |"B"  |"C"   |
            --------------------
            |1    |2    |NULL  |
            |1    |2    |3     |
            --------------------
            <BLANKLINE>

        Args:
            other: the other :class:`DataFrame` that contains the rows to include.
            allow_missing_columns: When true includes missing columns in the final result. Missing values are Null filled. Default False.
        NFTr  r  r  r   rW  rX  r\   rY  r  r  r  r  rA  r   r  _union_by_name_internalr;  r  r  r   r`  r  s         r   union_by_namezDataFrame.union_by_nameH  s    Z ==++002D#DII$=$=tDCCGCK(=C%cff%syy)++"7	 , 
 	
r   c                 R   d}|r| j                   j                  j                         }t        |j                  j
                  |      }d|_        d|_        ||_        | j                  |j                         |j                  |j                         | j                  |d||      S )a  Returns a new DataFrame that contains all the rows in the current DataFrame
        and another DataFrame (``other``), including any duplicate rows.

        This method matches the columns in the two DataFrames by their names, not by
        their positions. The columns in the other DataFrame are rearranged to match
        the order of columns in the current DataFrame.

        Example::

            >>> df1 = session.create_dataframe([[1, 2]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[2, 1]], schema=["b", "a"])
            >>> df1.union_all_by_name(df2).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            |1    |2    |
            -------------
            <BLANKLINE>

        Example::

            >>> df1 = session.create_dataframe([[1, 2], [1, 2]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[2, 1, 3]], schema=["b", "a", "c"])
            >>> df1.union_all_by_name(df2, allow_missing_columns=True).show()
            --------------------
            |"A"  |"B"  |"C"   |
            --------------------
            |1    |2    |NULL  |
            |1    |2    |NULL  |
            |1    |2    |3     |
            --------------------
            <BLANKLINE>

        Args:
            other: the other :class:`DataFrame` that contains the rows to include.
            allow_missing_columns: When true includes missing columns in the final result. Missing values are Null filled. Default False.
        NTr  r  r  s         r   union_all_by_namezDataFrame.union_all_by_name  s    ^ ==++002D#DII$=$=tDCCGCK(=C%cff%syy)++"7	 , 
 	
r   r  c                    | j                   D ch c]  }|j                   }}| j                   D ci c]  }|j                  | }}|j                   D ch c]  }|j                   }}|j                   D ci c]  }|j                  | }	}||z
  }
||z
  }dt        t           dt        dt        dt        fd}|
s|rH|r0| }|}|
r
 ||
||      }|r
 ||||      }|j                  |||      S t        j                  |
|      |j                         D cg c]  }|	|   	 }}| j                  j                  }|r7|j                  r+| j                  |j                  j                  |            }n%| j                  t        ||j                              }|rs| j                  | j                  j!                  |j                  xs+ t#        |j                  | j                  j$                        |rt&        nt(              |	      }|S | j                  t+        | j                  |j                  |      |	      }|S c c}w c c}w c c}w c c}w c c}w )
Nmissing_colsr  from_dfr   c           
      |   |j                   j                  D ci c]  }|j                  |j                   }}| D ch c]   }t	        |t                     j                  " }} |j                  dg|D cg c].  }t        d      j                  ||         j                  |      0 c} S c c}w c c}w c c}w )zr
            Adds null filled columns to a dataframe using typing information from another dataframe.
            r  N)
r  rx  r   datatyper   r   r   r   castr   )r  r  r  fielddt_mapr   materialized_namess          r   	add_nullsz4DataFrame._union_by_name_internal.<locals>.add_nulls  s     ?Fnn>S>STUejj%..0TFT >J"69C,11" "  5<<DVWS#d)..-33C8W  U" Xs   B/%B463B9
)r  r  r  r  r  )r   r   r   r  r   r  ra   #DF_CANNOT_RESOLVE_COLUMN_NAME_AMONGkeysr   sql_simplifier_enabledr  r  r   rK   r  r  r1   r  r0   r/   r  )r;  r  r  r  r  r  r  left_attr_mapr  right_attr_mapmissing_leftmissing_rightr  leftrightr   r  r  right_childr   s                       r   r  z!DataFrame._union_by_name_internal  sS    ,0<<84TYY8	859\\BTDBB,1MM:Ddii:
:6;mmDd$))T/DD!I-!J.	c(	+4	?H		$ =$$\4?D %mUDAE33&I 4   6YY -  1>0B0B0DE$EE!%!E!E!e&=&=//%*A*A*H*H*OPK//'%*EFK!&&3311 *#))DMM4K4K /5]) 4  $ ! 	B 	 $**k&7&7@I ! B 	A 9B:DN Fs   II
I9IIc                 l   d}|rz| j                   j                  j                         }t        |j                  j
                  |      }|j                  |j                         | j                  |j                         | j                  rg| j                  | j                  j                  |j                  xs+ t        |j                  | j                   j                        t                    n.| j                  t!        | j                  |j                              }|r|j"                  |_        |S )a  Returns a new DataFrame that contains the intersection of rows from the
        current DataFrame and another DataFrame (``other``). Duplicate rows are
        eliminated.

        Example::

            >>> df1 = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[1, 2], [5, 6]], schema=["c", "d"])
            >>> df1.intersect(df2).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            -------------
            <BLANKLINE>

        Args:
            other: the other :class:`DataFrame` that contains the rows to use for the
                intersection.
        Nr  r  )r   rW  rX  r\   rY  dataframe_intersectrA  r  r   r  r  r  r1   r  r  r.   r   r  r  r  s         r   	intersectzDataFrame.intersect  s    0 ==++002D#DII$A$A4HCsyy)cff% %% OO&&33++ *dmm.E.E + 4  4::u{{!CD 	 BJ	r   c                 n   d}|rz| j                   j                  j                         }t        |j                  j
                  |      }|j                  |j                         | j                  |j                         | j                  rh| j                  | j                  j                  |j                  xs+ t        |j                  | j                   j                        t                    }n/| j                  t!        | j                  |j                              }|r|j"                  |_        |S )a  Returns a new DataFrame that contains all the rows from the current DataFrame
        except for the rows that also appear in the ``other`` DataFrame. Duplicate rows are eliminated.

        Example::

            >>> df1 = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[1, 2], [5, 6]], schema=["c", "d"])
            >>> df1.subtract(df2).show()
            -------------
            |"A"  |"B"  |
            -------------
            |3    |4    |
            -------------
            <BLANKLINE>

        :meth:`minus` and :meth:`subtract` are aliases of :meth:`except_`.

        Args:
            other: The :class:`DataFrame` that contains the rows to exclude.
        Nr  r  )r   rW  rX  r\   rY  dataframe_exceptrA  r  r   r  r  r  r1   r  r  r-   r   r  r  r  s         r   except_zDataFrame.except_A  s    0 ==++002D#DII$>$>ECsyy)cff%!!&&33++ *dmm.E.E ( 4 B 

EKK!@ABBJ	r   r  howc                    t        |j                  d      xs |xs d      }t        | j                  |j                  t	        |      dd      }d}|r| j
                  j                  j                         }t        |j                  j                  |      }| j                  |j                         |j                  |j                         t        |t              rd|j                   _        ntt        |t$              rd|j                   _        nRt        |t(              rd|j                   _        n0t        |t,              rd|j                   _        nt1        d|       | j2                  r| j
                  j4                  j7                  | j
                  j4                  j9                  || j
                  j4                        | j
                  j4                        }	| j;                  |	|      S | j;                  ||      S )	aw  Performs a natural join of the specified type (``how``) with the
        current DataFrame and another DataFrame (``right``).

        Args:
            right: The other :class:`DataFrame` to join.
            how: We support the following join types:

                - Inner join: "inner" (the default value)
                - Left outer join: "left", "leftouter"
                - Right outer join: "right", "rightouter"
                - Full outer join: "full", "outer", "fullouter"

                You can also use ``join_type`` keyword to specify this condition.
                Note that to avoid breaking changes, currently when ``join_type`` is specified,
                it overrides ``how``.

        Examples::
            >>> df1 = session.create_dataframe([[1, 2], [3, 4], [5, 6]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[1, 7], [3, 8]], schema=["a", "c"])
            >>> df1.natural_join(df2).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |1    |2    |7    |
            |3    |4    |8    |
            -------------------
            <BLANKLINE>

            >>> df1 = session.create_dataframe([[1, 2], [3, 4], [5, 6]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[1, 7], [3, 8]], schema=["a", "c"])
            >>> df1.natural_join(df2, "left").show()
            --------------------
            |"A"  |"B"  |"C"   |
            --------------------
            |1    |2    |7     |
            |3    |4    |8     |
            |5    |6    |NULL  |
            --------------------
            <BLANKLINE>
        r   innerNTUnsupported join type r  r  r  )r%   rm  r   r  r!   r   rW  rX  r\   rY  dataframe_natural_joinrA  r   r   r   r   r   join_type__innerr   join_type__left_outerr#   join_type__right_outerr   join_type__full_outerr   r  r  r	  r
  r  )
r;  r  r  r   r_  r   r  r`  r  select_plans
             r   natural_joinzDataFrame.natural_joinr  s   b %VZZ%<%N%NwO	JJKK	"
	 ==++002D#DII$D$DdKCcgg&sww')U+15.Iy16:3Iz27;4Iy16:3 #9)!EFF!!--11IImm--JJ!]]44 K  00 J K ??;$???yD99r   r   )r   r   r   onr   r   c                h   t               }t        | ||g ||      \  }}||j                  nd}	t        |j                  |j                  ||	d      }
d}|r| j
                  j                  j                         }t        |j                  j                  |      }| j                  |j                         |j                  |j                         |	t        |j                  |       |r||j                   _        |r||j$                  _        | j&                  r| j
                  j(                  j+                  | j
                  j(                  j-                  |
| j
                  j(                        | j
                  j(                        }| j/                  ||      S | j/                  |
|      S )aj  Performs an inner lateral join with the current DataFrame and another DataFrame (``right``).

        Args:
            right: The other :class:`DataFrame` to join.
            on: A :class:`Column` expression for the lateral join condition.
                This condition will be used to filter the right DataFrame in the
                lateral subquery (e.g., `WHERE t1.a = t2.a`).
            lsuffix: Suffix to add to the overlapping columns of the left DataFrame.
            rsuffix: Suffix to add to the overlapping columns of the right DataFrame.

        Note:
            When both ``lsuffix`` and ``rsuffix`` are empty, the overlapping columns will have random column names in the resulting DataFrame.
            You can reference to these randomly named columns using :meth:`Column.alias`.

        Examples::
            >>> df1 = session.create_dataframe([[1, 2], [3, 4], [5, 6]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[1, 7], [3, 8]], schema=["a", "c"])
            >>> df1.lateral_join(df2, df1.a == df2.a).select(df1.a.alias("a_1"), df2.a.alias("a_2"), df1.b, df2.c).show()
            -----------------------------
            |"A_1"  |"A_2"  |"B"  |"C"  |
            -----------------------------
            |1      |1      |2    |7    |
            |3      |3      |4    |8    |
            -----------------------------
            <BLANKLINE>

            >>> # With lsuffix and rsuffix for column disambiguation
            >>> df1.lateral_join(df2, df1.b * 2 > df2.c, lsuffix="_l", rsuffix="_r").select("*").show()
            -----------------------------
            |"A_L"  |"B"  |"A_R"  |"C"  |
            -----------------------------
            |3      |4    |1      |7    |
            |5      |6    |1      |7    |
            |5      |6    |3      |8    |
            -----------------------------
            <BLANKLINE>
        r   Nr  r  r  )r"   r  r  r   r  r   rW  rX  r\   rY  dataframe_lateral_joinrA  r   r   rT   	join_exprr   rI  r   r  r  r	  r
  r  )r;  r  r  r   r   r   lateral_join_typer   r   on_exprr  r`  r  r  s                 r   lateral_joinzDataFrame.lateral_join  sw   ` (M"%*B

c %'N"..IIII
	 ==++002D#DII$D$DdKCcgg&sww'"/rB$+!$+!!!--11IImm--JJ!]]44 K  00 J K ??;$???yD99r   )r   r   match_conditionr   r  c          	      	   |j                  d      xs |}	|j                  d      xs |}
|
xs d}t        |      }t        |t              rx| |u s| j                  |j                  u rt        j                         t        |t              sMt        |t              rS|j                         j                         j                  dd      j                  d      rt        |	      rt        d      t        |t              s1t        |t              r.|j                         j                         dk(  r|t!        d
      |t!        d|
 d      t        |	      du rg }	nt        |	t              r|	g}	nt        |	t"              r|	}	nt        |	t$              rmt'        |	      dkD  r_t)        |	D cg c]  }t        |t               c}      s8t+        d t-        |	      D              \  }}t/        dt1        |       d|       t        |	t$              st/        dt1        |	             d	}|r~| j2                  j4                  j7                         }t9        |j:                  j<                  |      }| j?                  |j@                         |j?                  |jB                         t        |tD              rd|jF                  _$        nt        |tJ              rd|jF                  _&        nt        |tN              rd|jF                  _(        nt        |tR              rd|jF                  _*        nt        |t              rd|jF                  _+        ntt        |tX              rd|jF                  _-        nRt        |t\              rd|jF                  _/        n0t        |t              rd|jF                  _0        nt!        d|       |j                  d|      }|t        |t"        t        f      rtc        |jd                  |       ngt        |t$              r@|D ]:  }tc        |jd                  jf                  jh                  jk                         |       < nt/        dt1        |             |tm        |jn                  |       |r||jp                  _9        |r||jt                  _9        | jw                  ||	|||||      S t/        d      c c}w )a-  Performs a join of the specified type (``how``) with the current
        DataFrame and another DataFrame (``right``) on a list of columns
        (``on``).

        Args:
            right: The other :class:`DataFrame` to join.
            on: A column name or a :class:`Column` object or a list of them to be used for the join.
                When a list of column names are specified, this method assumes the named columns are present in both dataframes.
                You can use keyword ``using_columns`` to specify this condition. Note that to avoid breaking changes, when
                `using_columns`` is specified, it overrides ``on``.
            how: We support the following join types:

                - Inner join: "inner" (the default value)
                - Left outer join: "left", "leftouter"
                - Right outer join: "right", "rightouter"
                - Full outer join: "full", "outer", "fullouter"
                - Left semi join: "semi", "leftsemi"
                - Left anti join: "anti", "leftanti"
                - Cross join: "cross"
                - Asof join: "asof"

                You can also use ``join_type`` keyword to specify this condition.
                Note that to avoid breaking changes, currently when ``join_type`` is specified,
                it overrides ``how``.
            lsuffix: Suffix to add to the overlapping columns of the left DataFrame.
            rsuffix: Suffix to add to the overlapping columns of the right DataFrame.
            match_condition: The match condition for asof join.

        Note:
            When both ``lsuffix`` and ``rsuffix`` are empty, the overlapping columns will have random column names in the resulting DataFrame.
            You can reference to these randomly named columns using :meth:`Column.alias` (See the first usage in Examples).

        See Also:
            - Usage notes for asof join: https://docs.snowflake.com/sql-reference/constructs/asof-join#usage-notes

        Examples::
            >>> from snowflake.snowpark.functions import col
            >>> df1 = session.create_dataframe([[1, 2], [3, 4], [5, 6]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[1, 7], [3, 8]], schema=["a", "c"])
            >>> df1.join(df2, df1.a == df2.a).select(df1.a.alias("a_1"), df2.a.alias("a_2"), df1.b, df2.c).show()
            -----------------------------
            |"A_1"  |"A_2"  |"B"  |"C"  |
            -----------------------------
            |1      |1      |2    |7    |
            |3      |3      |4    |8    |
            -----------------------------
            <BLANKLINE>
            >>> # refer a single column "a"
            >>> df1.join(df2, "a").select(df1.a.alias("a"), df1.b, df2.c).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |1    |2    |7    |
            |3    |4    |8    |
            -------------------
            <BLANKLINE>
            >>> # rename the ambiguous columns
            >>> df3 = df1.to_df("df1_a", "b")
            >>> df4 = df2.to_df("df2_a", "c")
            >>> df3.join(df4, col("df1_a") == col("df2_a")).select(col("df1_a").alias("a"), "b", "c").show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |1    |2    |7    |
            |3    |4    |8    |
            -------------------
            <BLANKLINE>

            >>> # join multiple columns
            >>> mdf1 = session.create_dataframe([[1, 2], [3, 4], [5, 6]], schema=["a", "b"])
            >>> mdf2 = session.create_dataframe([[1, 2], [3, 4], [7, 6]], schema=["a", "b"])
            >>> mdf1.join(mdf2, ["a", "b"]).show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            |3    |4    |
            -------------
            <BLANKLINE>
            >>> mdf1.join(mdf2, (mdf1["a"] < mdf2["a"]) & (mdf1["b"] == mdf2["b"])).select(mdf1["a"].as_("new_a"), mdf1["b"].as_("new_b")).show()
            ---------------------
            |"NEW_A"  |"NEW_B"  |
            ---------------------
            |5        |6        |
            ---------------------
            <BLANKLINE>
            >>> # use lsuffix and rsuffix to resolve duplicating column names
            >>> mdf1.join(mdf2, (mdf1["a"] < mdf2["a"]) & (mdf1["b"] == mdf2["b"]), lsuffix="_left", rsuffix="_right").show()
            -----------------------------------------------
            |"A_LEFT"  |"B_LEFT"  |"A_RIGHT"  |"B_RIGHT"  |
            -----------------------------------------------
            |5         |6         |7          |6          |
            -----------------------------------------------
            <BLANKLINE>
            >>> mdf1.join(mdf2, (mdf1["a"] < mdf2["a"]) & (mdf1["b"] == mdf2["b"]), rsuffix="_right").show()
            -------------------------------------
            |"A"  |"B"  |"A_RIGHT"  |"B_RIGHT"  |
            -------------------------------------
            |5    |6    |7          |6          |
            -------------------------------------
            <BLANKLINE>
            >>> # examples of different joins
            >>> df5 = session.create_dataframe([3, 4, 5, 5, 6, 7], schema=["id"])
            >>> df6 = session.create_dataframe([5, 6, 7, 7, 8, 9], schema=["id"])
            >>> # inner join
            >>> df5.join(df6, "id", "inner").sort("id").show()
            --------
            |"ID"  |
            --------
            |5     |
            |5     |
            |6     |
            |7     |
            |7     |
            --------
            <BLANKLINE>
            >>> # left/leftouter join
            >>> df5.join(df6, "id", "left").sort("id").show()
            --------
            |"ID"  |
            --------
            |3     |
            |4     |
            |5     |
            |5     |
            |6     |
            |7     |
            |7     |
            --------
            <BLANKLINE>
            >>> # right/rightouter join
            >>> df5.join(df6, "id", "right").sort("id").show()
            --------
            |"ID"  |
            --------
            |5     |
            |5     |
            |6     |
            |7     |
            |7     |
            |8     |
            |9     |
            --------
            <BLANKLINE>
            >>> # full/outer/fullouter join
            >>> df5.join(df6, "id", "full").sort("id").show()
            --------
            |"ID"  |
            --------
            |3     |
            |4     |
            |5     |
            |5     |
            |6     |
            |7     |
            |7     |
            |8     |
            |9     |
            --------
            <BLANKLINE>
            >>> # semi/leftsemi join
            >>> df5.join(df6, "id", "semi").sort("id").show()
            --------
            |"ID"  |
            --------
            |5     |
            |5     |
            |6     |
            |7     |
            --------
            <BLANKLINE>
            >>> # anti/leftanti join
            >>> df5.join(df6, "id", "anti").sort("id").show()
            --------
            |"ID"  |
            --------
            |3     |
            |4     |
            --------
            <BLANKLINE>

        Note:
            When performing chained operations, this method will not work if there are
            ambiguous column names. For example,

            >>> df1.filter(df1.a == 1).join(df2, df1.a == df2.a).select(df1.a.alias("a"), df1.b, df2.c) # doctest: +SKIP

            will not work because ``df1.filter(df1.a == 1)`` has produced a new dataframe and you
            cannot refer to ``df1.a`` anymore. Instead, you can do either

            >>> df1.join(df2, (df1.a == 1) & (df1.a == df2.a)).select(df1.a.alias("a"), df1.b, df2.c).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |1    |2    |7    |
            -------------------
            <BLANKLINE>

            or

            >>> df3 = df1.filter(df1.a == 1)
            >>> df3.join(df2, df3.a == df2.a).select(df3.a.alias("a"), df3.b, df2.c).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |1    |2    |7    |
            -------------------
            <BLANKLINE>

        Examples::
            >>> # asof join examples
            >>> df1 = session.create_dataframe([['A', 1, 15, 3.21],
            ...                                 ['A', 2, 16, 3.22],
            ...                                 ['B', 1, 17, 3.23],
            ...                                 ['B', 2, 18, 4.23]],
            ...                                schema=["c1", "c2", "c3", "c4"])
            >>> df2 = session.create_dataframe([['A', 1, 14, 3.19],
            ...                                 ['B', 2, 16, 3.04]],
            ...                                schema=["c1", "c2", "c3", "c4"])
            >>> df1.join(df2, on=["c1", "c2"], how="asof", match_condition=(df1.c3 >= df2.c3)) \
            ...     .select(df1.c1, df1.c2, df1.c3.alias("C3_1"), df1.c4.alias("C4_1"), df2.c3.alias("C3_2"), df2.c4.alias("C4_2")) \
            ...     .order_by("c1", "c2").show()
            ---------------------------------------------------
            |"C1"  |"C2"  |"C3_1"  |"C4_1"  |"C3_2"  |"C4_2"  |
            ---------------------------------------------------
            |A     |1     |15      |3.21    |14      |3.19    |
            |A     |2     |16      |3.22    |NULL    |NULL    |
            |B     |1     |17      |3.23    |NULL    |NULL    |
            |B     |2     |18      |4.23    |16      |3.04    |
            ---------------------------------------------------
            <BLANKLINE>
            >>> df1.join(df2, on=(df1.c1 == df2.c1) & (df1.c2 == df2.c2), how="asof",
            ...     match_condition=(df1.c3 >= df2.c3), lsuffix="_L", rsuffix="_R") \
            ...     .order_by("C1_L", "C2_L").show()
            -------------------------------------------------------------------------
            |"C1_L"  |"C2_L"  |"C3_L"  |"C4_L"  |"C1_R"  |"C2_R"  |"C3_R"  |"C4_R"  |
            -------------------------------------------------------------------------
            |A       |1       |15      |3.21    |A       |1       |14      |3.19    |
            |A       |2       |16      |3.22    |NULL    |NULL    |NULL    |NULL    |
            |B       |1       |17      |3.23    |NULL    |NULL    |NULL    |NULL    |
            |B       |2       |18      |4.23    |B       |2       |16      |3.04    |
            -------------------------------------------------------------------------
            <BLANKLINE>
            >>> df1 = df1.alias("L")
            >>> df2 = df2.alias("R")
            >>> df1.join(df2, using_columns=["c1", "c2"], how="asof",
            ...         match_condition=(df1.c3 >= df2.c3)).order_by("C1", "C2").show()
            -----------------------------------------------
            |"C1"  |"C2"  |"C3L"  |"C4L"  |"C3R"  |"C4R"  |
            -----------------------------------------------
            |A     |1     |15     |3.21   |14     |3.19   |
            |A     |2     |16     |3.22   |NULL   |NULL   |
            |B     |1     |17     |3.23   |NULL   |NULL   |
            |B     |2     |18     |4.23   |16     |3.04   |
            -----------------------------------------------
            <BLANKLINE>
        r   r   r  r   r   crossz)Cross joins cannot take columns as input.asofNz9match_condition cannot be None when performing asof join.z?match_condition is only accepted with join type 'asof' given: ''Fr   c              3   J   K   | ]  \  }}t        |t              s||f  y wrC  r  )r  rR  r   s      r   r  z!DataFrame.join.<locals>.<genexpr>W  s*      ( S%c3/ #J(s   !#zIAll list elements for 'on' or 'using_columns' must be string type. Got: 'z' at index z$Invalid input type for join column: Tr  r   r   r  r  z(Invalid type for join. Must be Dataframe)<rm  r%   r   r   r  ra   DF_SELF_JOIN_NOT_SUPPORTEDr   r  r   r  r6  r   rv   	Exceptionr   r   r   r   r  r  r   	enumerater  r  r   rW  rX  r\   rY  dataframe_joinrA  r   r   r   r   r  r   r  r#   r  r   r  join_type__crossr    join_type__left_semir   join_type__left_antijoin_type__asofrU   r  rB  rC  r  rT   r  r   rI  r   _join_dataframes)r;  r  r  r  r   r   r  r   r_  r   original_join_typejoin_type_argr   r   bad_idxbad_colr`  r  	join_colsr   s                       r   r  zDataFrame.join  s2   ` 

?39r#ZZ4;*5g$]3	eY'u}

ekk 95PPRR)U+9c*OO%++-55c2>II'R!-0#$OPP 9d+i-OO%++-7"*$S  #.$YZlYmmno 
 m,5 "M3/!.M62 -=(3&*]KcZS1KL#' ($-m$<( $ 
  !']O;wiA   x8:4;N:OP 
 D}}//446'		(@(@$G!!#''*""377+i/59CMM2	95:>CMM7	:6;?CMM8	95:>CMM7	5159CMM2	849=CMM6	849=CMM6	4048CMM1$'=m_%MNN"JJ;	(!)fc];CMM9 $Ix8!* AG # 6 6 9 9 = = ?
 (B4	?BST  #.3++_ (/CKK%(/CKK%(( / )   BCCY Ls   -Sr  func_argumentsfunc_named_argumentsc                   d}d}|rt        | j                  j                  |       | j                  j                  j                         }t	        |j
                  j                  |      }| j                  |j                         t        |j                  |g|i | t        |g|i |}d}d}	|j                  r| j                  j                  j                  t        | j                   |            }
t#        || j                   |
      \  }}}|D cg c](  }| j                  j                  j%                  |i       * }	}| j                  j                  j                  t        | j                   ||	            }g ||}| j                  j&                  r| j                  j                  j)                  t+        || j                   | j                  j                  |	      | j                  j                        }|r|j-                  |      }| j/                  ||      S |r| j/                  t1        |      |      S | j/                  t        | j                   ||	      |      S c c}w )a|  Lateral joins the current DataFrame with the output of the specified table function.

        References: `Snowflake SQL functions <https://docs.snowflake.com/en/sql-reference/functions-table.html>`_.

        Example 1
            Lateral join a table function by using the name and parameters directly:

            >>> df = session.sql("select 'James' as name, 'address1 address2 address3' as addresses")
            >>> df.join_table_function("split_to_table", df["addresses"], lit(" ")).show()
            --------------------------------------------------------------------
            |"NAME"  |"ADDRESSES"                 |"SEQ"  |"INDEX"  |"VALUE"   |
            --------------------------------------------------------------------
            |James   |address1 address2 address3  |1      |1        |address1  |
            |James   |address1 address2 address3  |1      |2        |address2  |
            |James   |address1 address2 address3  |1      |3        |address3  |
            --------------------------------------------------------------------
            <BLANKLINE>

        Example 2
            Lateral join a table function by calling:

            >>> from snowflake.snowpark.functions import table_function
            >>> split_to_table = table_function("split_to_table")
            >>> df = session.sql("select 'James' as name, 'address1 address2 address3' as addresses")
            >>> df.join_table_function(split_to_table(df["addresses"], lit(" "))).show()
            --------------------------------------------------------------------
            |"NAME"  |"ADDRESSES"                 |"SEQ"  |"INDEX"  |"VALUE"   |
            --------------------------------------------------------------------
            |James   |address1 address2 address3  |1      |1        |address1  |
            |James   |address1 address2 address3  |1      |2        |address2  |
            |James   |address1 address2 address3  |1      |3        |address3  |
            --------------------------------------------------------------------
            <BLANKLINE>

        Example 3
            Lateral join a table function with the partition and order by clause:

            >>> from snowflake.snowpark.functions import table_function
            >>> split_to_table = table_function("split_to_table")
            >>> df = session.create_dataframe([
            ...     ["John", "James", "address1 address2 address3"],
            ...     ["Mike", "James", "address4 address5 address6"],
            ...     ["Cathy", "Stone", "address4 address5 address6"],
            ... ],
            ... schema=["first_name", "last_name", "addresses"])
            >>> df.join_table_function(split_to_table(df["addresses"], lit(" ")).over(partition_by="last_name", order_by="first_name")).show()
            ----------------------------------------------------------------------------------------
            |"FIRST_NAME"  |"LAST_NAME"  |"ADDRESSES"                 |"SEQ"  |"INDEX"  |"VALUE"   |
            ----------------------------------------------------------------------------------------
            |John          |James        |address1 address2 address3  |1      |1        |address1  |
            |John          |James        |address1 address2 address3  |1      |2        |address2  |
            |John          |James        |address1 address2 address3  |1      |3        |address3  |
            |Mike          |James        |address4 address5 address6  |2      |1        |address4  |
            |Mike          |James        |address4 address5 address6  |2      |2        |address5  |
            |Mike          |James        |address4 address5 address6  |2      |3        |address6  |
            |Cathy         |Stone        |address4 address5 address6  |3      |1        |address4  |
            |Cathy         |Stone        |address4 address5 address6  |3      |2        |address5  |
            |Cathy         |Stone        |address4 address5 address6  |3      |3        |address6  |
            ----------------------------------------------------------------------------------------
            <BLANKLINE>

        Example 4
            Lateral join a table function with aliasing the output column names:

            >>> from snowflake.snowpark.functions import table_function
            >>> split_to_table = table_function("split_to_table")
            >>> df = session.sql("select 'James' as name, 'address1 address2 address3' as addresses")
            >>> df.join_table_function(split_to_table(col("addresses"), lit(" ")).alias("seq", "idx", "val")).show()
            ------------------------------------------------------------------
            |"NAME"  |"ADDRESSES"                 |"SEQ"  |"IDX"  |"VAL"     |
            ------------------------------------------------------------------
            |James   |address1 address2 address3  |1      |1      |address1  |
            |James   |address1 address2 address3  |1      |2      |address2  |
            |James   |address1 address2 address3  |1      |3      |address3  |
            ------------------------------------------------------------------
            <BLANKLINE>

        Args:

            func_name: The SQL function name.
            func_arguments: The positional arguments for the SQL function.
            func_named_arguments: The named arguments for the SQL function, if it accepts named arguments.

        Returns:
            A new :class:`DataFrame` that has the columns carried from this :class:`DataFrame`, plus new columns and rows from the lateral join with the table function.

        See Also:
            - :meth:`Session.table_function`, which creates a new :class:`DataFrame` by using the SQL table function.

        N)r  )
other_planr  r  r  r  )rQ   r   rW  rX  r\   rY  dataframe_join_table_functionrA  r   rX   fnr   aliasesr  r  rD   r  r   r  r  r	  r3   r   r  rK   )r;  r  r   r  r  r`  r  r  project_colsnew_col_namesr  old_colsr  r  r   r  r  s                    r   join_table_functionzDataFrame.join_table_function  sJ   H !$--":":DA==++002D#DII$K$KTRCcgg&)   '	 6
!
%9
	 !]]44<<!$**i8N .H4::~.*Hh
 EM=@''//R8M  //77!$**iMRI 4X3
3L==//--11II)#zz!]]44,	 00 J K )00>??;$?????7<#Ct?TTdjj)N  
 	
=s   -I1c                   d}|r| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         |j                  |j                         |r||j                  _
        |r||j                  _
        | j                  |t        d      d|||      S )a  Performs a cross join, which returns the Cartesian product of the current
        :class:`DataFrame` and another :class:`DataFrame` (``right``).

        If the current and ``right`` DataFrames have columns with the same name, and
        you need to refer to one of these columns in the returned DataFrame, use the
        :func:`col` function on the current or ``right`` DataFrame to disambiguate
        references to these columns.

        Example::

            >>> df1 = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df2 = session.create_dataframe([[5, 6], [7, 8]], schema=["c", "d"])
            >>> df1.cross_join(df2).sort("a", "b", "c", "d").show()
            -------------------------
            |"A"  |"B"  |"C"  |"D"  |
            -------------------------
            |1    |2    |5    |6    |
            |1    |2    |7    |8    |
            |3    |4    |5    |6    |
            |3    |4    |7    |8    |
            -------------------------
            <BLANKLINE>
            >>> df3 = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df4 = session.create_dataframe([[5, 6], [7, 8]], schema=["a", "b"])
            >>> df3.cross_join(df4, lsuffix="_l", rsuffix="_r").sort("a_l", "b_l", "a_r", "b_r").show()
            ---------------------------------
            |"A_L"  |"B_L"  |"A_R"  |"B_R"  |
            ---------------------------------
            |1      |2      |5      |6      |
            |1      |2      |7      |8      |
            |3      |4      |5      |6      |
            |3      |4      |7      |8      |
            ---------------------------------
            <BLANKLINE>

        Args:
            right: the right :class:`DataFrame` to join.
            lsuffix: Suffix to add to the overlapping columns of the left DataFrame.
            rsuffix: Suffix to add to the overlapping columns of the right DataFrame.

        Note:
            If both ``lsuffix`` and ``rsuffix`` are empty, the overlapping columns will have random column names in the result DataFrame.
            If either one is not empty, the overlapping columns won't have random names.
        Nr  r   r   r  )r   rW  rX  r\   rY  dataframe_cross_joinrA  r   r   r   rI  r   _join_dataframes_internalr%   )r;  r  r   r   r   r`  r  s          r   
cross_joinzDataFrame.cross_joinE  s    n ==++002D#DII$B$BDICcgg&sww'$+!$+!--W% . 
 	
r   r  r   r   c          	      N   t        |t              r| j                  |||||||      S t        |t        t        f      rct        t        d            }|D ]3  }	t        |	      }
|| j                  |
      |j                  |
      k(  z  }5 | j                  ||||||      S t        | |||||      \  }}t        |t              st        ||      }t        |j                  |j                  |d ||j                  nd       }| j                  r| j                  | j                   j"                  j%                  | j                   j"                  j'                  || j                   j"                        | j                   j"                        |      S | j                  ||      S )N)
join_exprsr   r   r  r  Tr	  r   r  r  r  )r   r   r  r    r   r)   r   r   r  r   r$   r   r  r  r  r  r   r  r	  r
  )r;  r  r   r   r   r   r  r  	join_condr   quotedr   r   join_logical_plans                 r   r  zDataFrame._join_dataframes  s    mV,11( /# 2   i(H!56wt}-I" P#A%&)9UYYv=N)NO	P 11# 2   %HC 5 &i?	 $				/>/J++PT! %%MM++CC"mm55RR-8O8O S  "&!8!8	 D  ( '   ??#4	?JJr   r  c                   t        | ||g ||      \  }}	||j                  nd }
||j                  nd }t        |j                  |	j                  ||
|      }| j                  r| j                  | j                  j                  j                  | j                  j                  j                  || j                  j                        | j                  j                        |      S | j                  ||      S )Nr   r  r  r  )
r  r  r   r  r  r  r   r  r	  r
  )r;  r  r   r  r   r   r  r  r   r   join_condition_exprmatch_condition_exprr  s                r   r  z#DataFrame._join_dataframes_internal  s    #%B

c 9C8Nj44TX+:+FO''D 	 !IIII 
 !!??''??--11NN)!%!8!8 O  "]]44 @  $ # 	 	 0IFFr   )keep_column_orderast_stmtr   r   r  r  c                \   |~|r|| j                   j                  j                         }t        |j                  j
                  |      }||_        t        |j                  |       | j                  |j                         | j                  |g|g||d      }|r|j                  |_        |S )a  
        Returns a DataFrame with an additional column with the specified name
        ``col_name``. The column is computed by using the specified expression ``col``.

        If a column with the same name already exists in the DataFrame, that column is
        replaced by the new column.

        Example 1::

            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df.with_column("mean", (df["a"] + df["b"]) / 2).show()
            ------------------------
            |"A"  |"B"  |"MEAN"    |
            ------------------------
            |1    |2    |1.500000  |
            |3    |4    |3.500000  |
            ------------------------
            <BLANKLINE>

        Example 2::

            >>> from snowflake.snowpark.functions import udtf
            >>> @udtf(output_schema=["number"])
            ... class sum_udtf:
            ...     def process(self, a: int, b: int) -> Iterable[Tuple[int]]:
            ...         yield (a + b, )
            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df.with_column("total", sum_udtf(df.a, df.b)).sort(df.a).show()
            -----------------------
            |"A"  |"B"  |"TOTAL"  |
            -----------------------
            |1    |2    |3        |
            |3    |4    |7        |
            -----------------------
            <BLANKLINE>

        Args:
            col_name: The name of the column to add or replace.
            col: The :class:`Column` or :class:`table_function.TableFunctionCall` with single column output to add or replace.
            keep_column_order: If ``True``, the original order of the columns in the DataFrame is preserved when reaplacing a column.
        Fr  r  r   )r   rW  rX  r\   rY  dataframe_with_columnr   rW   r   rA  r   with_columnsr  r  )r;  r   r   r  r  r   rY  r   s           r   with_columnzDataFrame.with_column  s    h 	}}//446H$X]]%H%H(SD$DM7#Fdgg&JE/  
 !BJ	r   r  r  c                   |D cg c]  }t        |       }}t        |      }t        |      t        |      k7  rt        d      t	        d |D              }	|	dk(  rjt        |      t        |      k7  r$t        dt        |       dt        |       d      t        ||      D 
cg c]  \  }
} |j                  |
       }}
}n|	dkD  rt        d|	 d	      t        |      t        |      k  rt        d
      g }d}t        t        |            D ]  }||   }t        |t              r*|||z      }
|j                   |j                  |
             Bt        |      t        |      z
  }||||z   dz    }|j                   |j                  |         ||r| j                  j                  j                         }t        |j                  j                   |      }|D ]  }|j"                  j                  |        |D ]&  }t%        |j&                  j)                         |       ( | j+                  |j,                         |	dkD  s|s7| j.                  D cg c]  }|j0                  |vrt        |       }}g ||}nt        ||      D 
ci c]  \  }
}|
|
 }}
}g }t               }| j.                  D ][  }t        |j0                        }||v r&|j                  ||          |j)                  |       B|j                  t        |             ] |j3                         D ]  \  }
}|
|vs|j                  |        | j5                  ||d      }|r|j6                  |_        |S c c}w c c}}
w c c}w c c}}
w )a	  Returns a DataFrame with additional columns with the specified names
        ``col_names``. The columns are computed by using the specified expressions
        ``values``.

        If columns with the same names already exist in the DataFrame, those columns
        are removed and appended at the end by new columns.

        Example 1::

            >>> from snowflake.snowpark.functions import udtf
            >>> @udtf(output_schema=["number"])
            ... class sum_udtf:
            ...     def process(self, a: int, b: int) -> Iterable[Tuple[int]]:
            ...         yield (a + b, )
            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df.with_columns(["mean", "total"], [(df["a"] + df["b"]) / 2, sum_udtf(df.a, df.b)]).sort(df.a).show()
            ----------------------------------
            |"A"  |"B"  |"MEAN"    |"TOTAL"  |
            ----------------------------------
            |1    |2    |1.500000  |3        |
            |3    |4    |3.500000  |7        |
            ----------------------------------
            <BLANKLINE>

        Example 2::

            >>> from snowflake.snowpark.functions import table_function
            >>> split_to_table = table_function("split_to_table")
            >>> df = session.sql("select 'James' as name, 'address1 address2 address3' as addresses")
            >>> df.with_columns(["seq", "idx", "val"], [split_to_table(df.addresses, lit(" "))]).show()
            ------------------------------------------------------------------
            |"NAME"  |"ADDRESSES"                 |"SEQ"  |"IDX"  |"VAL"     |
            ------------------------------------------------------------------
            |James   |address1 address2 address3  |1      |1      |address1  |
            |James   |address1 address2 address3  |1      |2      |address2  |
            |James   |address1 address2 address3  |1      |3      |address3  |
            ------------------------------------------------------------------
            <BLANKLINE>

        Args:
            col_names: A list of the names of the columns to add or replace.
            values: A list of the :class:`Column` objects or :class:`table_function.TableFunctionCall` object
                    to add or replace.
            keep_column_order: If ``True``, the original order of the columns in the DataFrame is preserved when reaplacing a column.
        zGThe same column name is used multiple times in the col_names parameter.c              3   D   K   | ]  }t        |t              rd nd  yw)r   r   N)r   r   )r  r   s     r   r  z)DataFrame.with_columns.<locals>.<genexpr>  s$      #
?BC!23A:#
s    r   zThe size of column names (z') is not equal to the size of columns ()r   zAOnly one table function call accepted inside with_columns call, (z
) providedzaThe size of column names must be equal to the size of the output columns. Fewer columns provided.Fr  )r   r   r  r   sumr  r  rF  r   r   r   r   rW  rX  r\   rY  dataframe_with_columnsr  rW   r  r  rA  r   r   r   itemsr   r  r  )r;  r  r  r  r  r   r  qualified_namesnew_column_namesnum_table_func_callsr   r   r  r  ir  rY  r   rI  r  r  
final_colsnew_colreplaced_mapusedfield_quotedr   r   s                               r   r  zDataFrame.with_columnsB  s   r 3<<Q:a=<</y>S!122Y   # #
FL#
  
  1$9~V, 0Y0@@ghklrhsgttuv  8;?F7ST)$THT!A%SThSiist  9~F+ w  HF3v;' 5Qic6*$QZ0DOOGCGGDM2 ^c&k9F%a!f*q.9EOOGCGGUO45 00557I$Y^^%J%JIVD% 0%%h/0 V;DKKOO<MuUVdgg&  !#+< "\\::%55 uH 
 08/h/J
 473Q"/$gL  J5D 5)%**5</%%l<&@AHH\* %%fUm45 (--/ )at#%%a()
 [[yE[J"BJ	o =  UFs   M3'M8 M>Nc                     y rC  rQ  r;  rL  rM  r   s       r   r   zDataFrame.count       	r   c                     y rC  rQ  r,  s       r   r   zDataFrame.count  r-  r   c                   i }|r| j                   j                  j                         }t        |j                  j
                        }| j                  |j                         |t        |j                  |       ||_
        | j                   j                  j                  |       | j                   j                  j                  |      \  }|t        <   t        | j                  |       5  | j!                  dd      }t#        |dd        |j$                  d
||t&        j(                  d|}	|r|	d	   d	   n|	cddd       S # 1 sw Y   yxY w)a!  Executes the query representing this DataFrame and returns the number of
        rows in the result (similar to the COUNT function in SQL).

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.
        N)r  r   Fr   zDataFrame.countr   r|  )rL  rM  rc  r   rQ  )r   rW  rX  r\   rY  dataframe_countrA  r   rR   rL  rM  r\  r]  r]   rb   r   rZ  re   _internal_collect_with_tagr   COUNT)
r;  rL  rM  r   r_  r`  rY  r   r   r  s
             r   r   zDataFrame.count  s+   $ ==++002D$TYY%>%>?Ddgg&+,T-B-BDTUDJMM$$))$/ 261I1I1O1OPT1U.Av-.+DJJ= 		5.E:B$5AF2R22 !1*00 	F $)6!9Q<f		5 		5 		5s   ,AEEc                 p   | j                   | j                  j                  | j                  j                  rst        j                         }t        |j                         || j                  _        | j                  | j                  j                  j                  j                         | j                  S )ac  Returns a new :class:`DataFrameWriter` object that you can use to write the data in the :class:`DataFrame` to
        a Snowflake database or a stage location

        Example::
            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> df.write.mode("overwrite").save_as_table("saved_table", table_type="temporary")
            >>> session.table("saved_table").show()
            -------------
            |"A"  |"B"  |
            -------------
            |1    |2    |
            |3    |4    |
            -------------
            <BLANKLINE>
            >>> stage_created_result = session.sql("create temp stage if not exists test_stage").collect()
            >>> df.write.copy_into_location("@test_stage/copied_from_dataframe")  # default CSV
            [Row(rows_unloaded=2, input_bytes=8, output_bytes=28)]
        )r  r'  r  r   r  r  r  r\   dataframe_writerrA  r   )r;  writers     r   r  zDataFrame.write  s~    * LL$!!)))ZZ\Ff556 &DLLdll//@@CCD||r   )	filesr,  validation_modetarget_columnstransformationsformat_type_optionsrL  iceberg_configr   
table_namer6  r7  r8  r9  r:  r;  copy_optionsc       	            i }d}|
rE| j                   j                  j                         }t        |j                  j
                  |      }t        |j                  |       ||j                  j                  |       |||j                  _        |||j                  _        ||j                  j                  |       |+|D ]&  }t        |j                  j!                         |       ( |A|D ]<  }|j"                  j!                         }||_        t        |j&                  ||          > |t)        |j*                  |       |A|D ]<  }|j,                  j!                         }||_        t        |j&                  ||          > |	O|	j/                         D ]<  \  }}|j0                  j!                         }||_        t        |j&                  |       > | j3                  |j4                         | j                   j                  j7                  |       | j                   j                  j9                  |      \  }|t:        <   ddlm} tA        | j                   jB                  |      rZ| j                   jB                  jD                  r:tG        | j                   tI        |d||      ||
      } |jJ                  dd|i|S | jL                  r| jL                  jN                  stQ        d      |rtS        |      nd}|rtS        |      nd}|r<|r:tU        |      tU        |      k7  r#tW        d	tU        |       d
tU        |             tA        |tX              r|ndj[                  |      }t]        |       tA        |tX              rt_        |      n|}|xs% | jL                  j`                  jc                  d      }te        | jL                  j`                        \  }}|xs |}|xs% | jL                  j`                  jc                  d      }|xs% | jL                  j`                  jc                  d      }d}| jL                  jf                  r2|s0|s.| jL                  jh                  }| jL                  jj                  }d}|r|D cg c]  }tm        |d       c}nd}|xs |}|xs% | jL                  j`                  jc                  d      }|r|D cg c]  }to        |       c}nd}|r,|D cg c]   }tA        |tp              r|jr                  n|" c}nd}tG        | j                   tI        || jL                  jN                  || jL                  jt                  ||||||| jL                  jv                  | jL                  j`                  ||	      ||
      } |jJ                  dd|i|S c c}w c c}w c c}w )a9  Executes a `COPY INTO <table> <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html>`__ command to load data from files in a stage location into a specified table.

        It returns the load result described in `OUTPUT section of the COPY INTO <table> command <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#output>`__.
        The returned result also depends on the value of ``validation_mode``.

        It's slightly different from the ``COPY INTO`` command in that this method will automatically create a table if the table doesn't exist and the input files are CSV files whereas the ``COPY INTO <table>`` doesn't.

        To call this method, this DataFrame must be created from a :class:`DataFrameReader`.

        Example::

            >>> # Create a CSV file to demo load
            >>> import tempfile
            >>> with tempfile.NamedTemporaryFile(mode="w+t") as t:
            ...     t.writelines(["id1, Product A", "\n" "id2, Product B"])
            ...     t.flush()
            ...     create_stage_result = session.sql("create temp stage if not exists test_stage").collect()
            ...     put_result = session.file.put(t.name, "@test_stage/copy_into_table_dir", overwrite=True)
            >>> # user_schema is used to read from CSV files. For other files it's not needed.
            >>> from snowflake.snowpark.types import StringType, StructField, StringType
            >>> from snowflake.snowpark.functions import length
            >>> user_schema = StructType([StructField("product_id", StringType()), StructField("product_name", StringType())])
            >>> # Use the DataFrameReader (session.read below) to read from CSV files.
            >>> df = session.read.schema(user_schema).csv("@test_stage/copy_into_table_dir")
            >>> # specify target column names.
            >>> target_column_names = ["product_id", "product_name"]
            >>> drop_result = session.sql("drop table if exists copied_into_table").collect()  # The copy will recreate the table.
            >>> copied_into_result = df.copy_into_table("copied_into_table", target_columns=target_column_names, force=True)
            >>> session.table("copied_into_table").show()
            ---------------------------------
            |"PRODUCT_ID"  |"PRODUCT_NAME"  |
            ---------------------------------
            |id1           | Product A      |
            |id2           | Product B      |
            ---------------------------------
            <BLANKLINE>

        The arguments of this function match the optional parameters of the `COPY INTO <table> <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#optional-parameters>`__.

        Args:
            table_name: A string or list of strings representing table name.
                If input is a string, it represents the table name; if input is of type iterable of strings,
                it represents the fully-qualified object identifier (database name, schema name, and table name).
            files: Specific files to load from the stage location.
            pattern: The regular expression that is used to match file names of the stage location.
            validation_mode: A ``str`` that instructs the ``COPY INTO <table>`` command to validate the data files instead of loading them into the specified table.
                Values can be "RETURN_n_ROWS", "RETURN_ERRORS", or "RETURN_ALL_ERRORS". Refer to the above mentioned ``COPY INTO <table>`` command optional parameters for more details.
            target_columns: Name of the columns in the table where the data should be saved.
            transformations: A list of column transformations.
            format_type_options: A dict that contains the ``formatTypeOptions`` of the ``COPY INTO <table>`` command.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            iceberg_config: A dictionary that can contain the following iceberg configuration values:

                * partition_by: specifies one or more partition expressions for the Iceberg table.
                    Can be a single Column, column name, SQL expression string, or a list of these.
                    Supports identity partitioning (column names) as well as partition transform functions
                    like bucket(), truncate(), year(), month(), day(), hour().

                * external_volume: specifies the identifier for the external volume where
                    the Iceberg table stores its metadata files and data in Parquet format

                * catalog: specifies either Snowflake or a catalog integration to use for this table

                * base_location: the base directory that snowflake can write iceberg metadata and files to

                * target_file_size: specifies a target Parquet file size for the table.
                    Valid values: 'AUTO' (default), '16MB', '32MB', '64MB', '128MB'

                * catalog_sync: optionally sets the catalog integration configured for Polaris Catalog

                * storage_serialization_policy: specifies the storage serialization policy for the table

                * iceberg_version: Overrides the version of iceberg to use. Defaults to 2 when unset.

            copy_options: The kwargs that is used to specify the ``copyOptions`` of the ``COPY INTO <table>`` command.
        Nr   r  test_file_path)	file_pathr=  r:  r  rL  zcTo copy into a table, the DataFrame must be created from a DataFrameReader and specify a file path.z|Number of column names provided to copy into does not match the number of transformations provided. Number of column names: z, number of transformations: r  PATTERNTARGET_COLUMNSTRANSFORMATIONSFTcopy_into_tableVALIDATION_MODE)r@  r6  file_formatr,  column_namesr9  r=  r:  r7  user_schemacur_optionscreate_table_from_infer_schemar;  rQ  )<r   rW  rX  r\   rY  dataframe_copy_into_tabler_   r<  r6  r  r,  rI  r7  r8  rS   r9  r  r:  _1_2rR   rL  r=  r!  r;  rA  r   r\  r]  r]   r   r  r   r   r  r   r5   r^  r&  
_file_pathr   r  r  r   r  r  r   r   _cur_optionsrm  r|   _infer_schema_infer_schema_transformations_infer_schema_target_columnsr   r   r   r  
_file_type_user_schema)r;  r<  r6  r,  r7  r8  r9  r:  rL  r;  r   r=  r_  r`  rY  tkentryr@  r   r  r   full_table_namereader_format_type_optionsreader_copy_optionsrJ  columnr   normalized_column_namestransformation_expss                                 r   rD  zDataFrame.copy_into_table5  s   ~ ==++002D$TYY%H%H$ODT__j9 

!!%("%,"*-<$$*)##**>:*( NA.t/C/C/G/G/I1MN"., QA 4488:E EH.uxx9LQ9OPQ  +,T-B-BDTU'% JA --113E EH.uxxaIJ )*002 8DAq++//1AAD.qttQ78 dgg&MM$$))$/ 261I1I1O1OPT1U.Av-. 	M t}}**,@A##CC !.!-(;	 #
B >2== !15;  ||4<<#:#:,u  3A~.d4C%0N#s?';; O  PS  Tb  Pc  Od  dA  BE  FU  BV  AW  X 
 %Z5J388J;O 	 	_-,6z3,GZ(Z 	 ET\\66::9E:ULL%%;
7"$7 2O5O' 
4<<+D+D+H+H,
 * 
T\\-F-F-J-J.

 */&<<%%."llHHO!\\FFN-1*  FUU6^F$56U 	
 $:':) 
T\\-F-F-J-J.

  3AAhZ!A 	   . '1&@""fL
  	 MM,,11 LL334 3)$7 / LL55 LL55/M-  '
. :r99 
-
17
 	
Y V B
s   V<!W>%WrL  r   	max_widthc                B   t        | j                  |       5  t        | j                  ||t	        |xs | j
                  | j                  j                  t        | j                  j                  j                  d            |             ddd       y# 1 sw Y   yxY w)a  Evaluates this DataFrame and prints out the first ``n`` rows with the
        specified maximum number of characters per column.

        Args:
            n: The number of rows to print out.
            max_width: The maximum number of characters to print out for each column.
                If the number of characters exceeds the maximum, the method prints out
                an ellipsis (...) at the end of the column.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
        rg  rh  )r  r   N)rb   showprint_show_stringrw   r  r   rk  rr   rl  rm  )r;  r  r_  rL  r   s        r   ra  zDataFrame.show0  s    ( ,DIIt< 	!!&V(BD,B,B//'+/==+=+=+A+A=,	' ( " 	 	 	s   A5BBz0.7.0z.Use `DataFrame.join_table_function()` instead.z(Use :meth:`join_table_function` instead.)r  extra_warning_textextra_doc_stringinputpathouter	recursiver  c           	         t        |       d}|r| j                  j                  j                         }t	        |j
                  j                  |      }| j                  |j                         t        |j                  |       |||j                  _        ||_        ||_        |j                         }|j                         dk(  rd|j                   _        n6|j                         dk(  rd|j                   _        nd|j                   _        t)        |t*              r| j-                  |      }| j/                  t1        |j2                  ||||      |      S )a  Flattens (explodes) compound values into multiple rows.

        It creates a new ``DataFrame`` from this ``DataFrame``, carries the existing columns to the new ``DataFrame``,
        and adds the following columns to it:

            - SEQ
            - KEY
            - PATH
            - INDEX
            - VALUE
            - THIS

        References: `Snowflake SQL function FLATTEN <https://docs.snowflake.com/en/sql-reference/functions/flatten.html>`_.

        If this ``DataFrame`` also has columns with the names above, you can disambiguate the columns by renaming them.

        Example::

            >>> table1 = session.sql("select parse_json(numbers) as numbers from values('[1,2]') as T(numbers)")
            >>> flattened = table1.flatten(table1["numbers"])
            >>> flattened.select(table1["numbers"], flattened["value"].as_("flattened_number")).show()
            ----------------------------------
            |"NUMBERS"  |"FLATTENED_NUMBER"  |
            ----------------------------------
            |[          |1                   |
            |  1,       |                    |
            |  2        |                    |
            |]          |                    |
            |[          |2                   |
            |  1,       |                    |
            |  2        |                    |
            |]          |                    |
            ----------------------------------
            <BLANKLINE>

        Args:
            input: The name of a column or a :class:`Column` instance that will be unseated into rows.
                The column data must be of Snowflake data type VARIANT, OBJECT, or ARRAY.
            path: The path to the element within a VARIANT data structure which needs to be flattened.
                The outermost element is to be flattened if path is empty or ``None``.
            outer: If ``False``, any input rows that cannot be expanded, either because they cannot be accessed in the ``path``
                or because they have zero fields or entries, are completely omitted from the output.
                Otherwise, exactly one row is generated for zero-row expansions
                (with NULL in the KEY, INDEX, and VALUE columns).
            recursive: If ``False``, only the element referenced by ``path`` is expanded.
                Otherwise, the expansion is performed for all sub-elements recursively.
            mode: Specifies which types should be flattened "OBJECT", "ARRAY", or "BOTH".

        Returns:
            A new :class:`DataFrame` that has the columns carried from this :class:`DataFrame`, the flattened new columns and new rows.

        See Also:
            - :meth:`Session.flatten`, which creates a new :class:`DataFrame` by flattening compound values into multiple rows.
        NOBJECTTARRAYr  )ru   r   rW  rX  r\   rY  dataframe_flattenrA  r   rS   rf  rg  rI  rh  ri  r   r  flatten_mode_objectflatten_mode_arrayflatten_mode_bothr   r  r   _lateralrA   r  )	r;  rf  rg  rh  ri  r  r   r`  rY  s	            r   flattenzDataFrame.flattenU  s   N 	4  ==++002D$TYY%@%@$GDdgg&&tzz59"&		DJ&DN::<Dzz|x'04		-(/3		,.2		+eS!HHUOE}}E--tUItL  
 	
r   table_functionc                    ddl m} t        | j                  j                  |      rt        | j                  |      S | j                  j                  j                  t        | j                  |            j                  D cg c]  }|j                   }}t        |      j                         D cg c]  \  }}|dkD  s| }}}t        |      dk(  r,t        | j                  t        | j                  |      |      S t        d      }	| j!                  | j"                  D cg c]  }t%        | |j                  |	d |       c}d      }
t        | j                  t        |
j                  |      |      S c c}w c c}}w c c}w )	Nr   r  r  r   a)r   r   Fr   )r   r  r   r   r   r   r  r  rB   r  r:  r   r   r!  r  r   r   r   r   )r;  rs  r  r  r  result_columnsrV  r@  r   r   rG  s              r   rq  zDataFrame._lateral  sh    	Mdmm))+?@T]]i@@ //77

N3j	
 II
 
 +2.*A*G*G*IS$!QQQRUASS A%wtzz>Bi  "#& !LL	  !II%5	   
 MM75;;?9
 	
3
 T	s   E=4FF'!Fc                    | j                   j                  d   j                  j                         j	                         }|r| j
                  j                  j                         }t        |j                  j                        }| j                  |j                         ||_        | j
                  j                  j                  |       | j
                  j                  j                  |      \  }|t         <   t#        |      rI | j
                  j$                  j&                  | j)                  |d      j                   fi |\  }}	||	fS  | j
                  j$                  j&                  | j                   fi |\  }
}	|
d | }||	fS )Nr  Fr   )r  r  r  r   r  r   rW  rX  r\   rY  dataframe_showrA  r   r  r\  r]  r]   r   r   get_result_and_metadatar  )r;  r  r   r_  r  r`  r  r   r  metaress              r   _get_result_and_meta_for_showz'DataFrame._get_result_and_meta_for_show  sO   

""2&**00288:==++002D#DII$<$<=Ccff%CEMM$$))$/151I1I1O1OPT1U.Av-."5)F4==..FF

1
.448>LFD t| D++CC

$IC !WFt|r   c                     | j                   ||fi |\  }}t        |      }g g }|D ]9  }	|	j                  }
j                  t        |
             |j                  |
       ; g }|D ]	  }g }t	        |      D ]i  \  }}|t        |      j                  d      ndg}|D ]/  }t        t        |      |         |<   t        |         |<   1 |j                  |       k t        d |D              }g }t        |      D ]Z  }g }t        t        |            D ].  }t        ||         |kD  r||   |   nd}|j                  |       0 |j                  |       \ |j                  |        D cg c]  }|dz   	 c}t              |z   dz   }d|z  dz   }dt        t
           d	t
        ffd
| |      z   |z   |rdj                  fd|D              z   |z   S  g       z   |z   S c c}w )N
NULLc              3   2   K   | ]  }t        |        y wrC  )r  )r  lis     r   r  z)DataFrame._show_string.<locals>.<genexpr>  s     5SW5   r   ru  r   -rowr   c                 6   g }| rbt        |       D ]R  \  }}t        |      kD  r|d dz
   dz   j                  |d      }n|j                  |d      }|j                  |       T nD cg c]  }d|z  	 }}ddj	                  d |D               dS c c}w )Nr   ... |c              3       K   | ]  }|  y wrC  rQ  )r  toks     r   r  z@DataFrame._show_string.<locals>.row_to_string.<locals>.<genexpr>1  s     66s   |
)r  r  ljustr   r  )r  tokenssegmentsize	formatted	col_widthr_  s        r   row_to_stringz-DataFrame._show_string.<locals>.row_to_string%  s    F%(i%8 -MGT7|i/%,_y1}%=%E$L$LTSV$W	$+MM$$<	MM),- 2;;#*;;sxx6v667s;; <s   ,Bc              3   .   K   | ]  } |        y wrC  rQ  )r  br  s     r   r  z)DataFrame._show_string.<locals>.<genexpr>8  s     6A}Q'6s   )r|  r  r   r   r  r  splitr   r   rF  r  r  r   r  )r;  r  r_  r   r_  r  rz  	col_countheaderr  r   bodyr  linesr%  r@  textsrU  
line_countr{  line_numbernew_linecolIndexwtotal_widthliner  r  s     `                       @@r   rc  zDataFrame._show_string  sZ    :t99!YQ&Q I		 	 E::DSY'MM$	 
  	CE!# $1./mAT*& @A#&s1vy|#<IaL#&y)A,#?IaL@ U#$ 5u55JC$Z0 	% %c%j 1 'H uX/+= h4 
 OOA&' 

8$	% KK-	2 %..qQU.	)ny014[ 4'	<tCy 	<S 	< F#$ ;?rww666	W
 	

 ERRTDU	W
 	
' /s   =G:num_rowstruncatevertical_spark_column_names_spark_session_tzc                   &  | j                   |dz   fddi|\  }}t        |      dk(  r|d   j                  dk(  rg }g }g }dt        dt        dt
        f&fd	&||D 	cg c]  }	|	j                   c}	n|}
g }|D ]  }g }t        |      D ]  \  }} &|| j                  j                  |   j                        }t        |t              r|rdnd}n|}d|cxk  rt        |      k  rn n|dk  r|d
| }n|d
|dz
   dz   }|j                  |        |j                  |        |
g|z   }t        |      dz
  |kD  }|d
|dz    }g }t        |      }d}|s@|g|z  }|D ]0  }t        |      D ]   \  }}t        ||   t        |            ||<   " 2 |D cg c]K  }t        |      D cg c]2  \  }}|dkD  r|j                  ||         n|j!                  ||         4 c}}M }}}}ddj#                  d |D              z   dz   }|j                  |       |j                  ddj#                  |d         z   dz          |j                  |       |dd
 D ](  }|j                  ddj#                  |      z   dz          * |j                  |       n|d   }|dd
 }t        |t        d |D                    }t        |      dk(  r|nt        |t        d |D                    } t        |      D ]  \  }}d| j!                  || z   dz   d      }!|j                  |!dz          t        |      D ]B  \  }"}||"   j!                  |      }#|j!                  |       }$|j                  d|# d|$ d       D  |r#t        |dd
       dk(  r|j                  d       n#|r!|dk(  rdnd}%|j                  d | d|% d       d!j#                  |      S c c}	w c c}}w c c}}}w )"z7Spark's show() logic - translated from scala to python.r   r   Fr   z""cellr  r   c                    dt         j                   dt        fd}| d}nt        | t              r| rdnd}nt        | t              st        | t
              r/ddj                  | D cg c]  }t        |d	       c}       d
}nt        | t         j                         r7t        |t              r'|j                  t        j                  k(  r
 ||       }nKt        | t         j                         r\t        |t              rLr@| j                  4| j                  t                    j                  d       } ||      }n ||       }nt        | t               rTt        |t"              rDddj                  | D cg c]!  } ||j$                  xs
 t'                     # c}      z   d
z   }nqt        | t(              rt        |t*              rddj                  t-        | j/                               D cg c]F  \  }} ||j0                  xs
 t'                      d ||j2                  xs
 t'                      H c}}      z   dz   }nt        | t(              rlt        |t4              r\ddj                  |j6                  D cg c]/  } | |j8                     |j:                  xs
 t'                      1 c}      z   dz   }nSt        | t<              r`t        |t>              rP| t=        d      k(  rd}n!| t=        d      k(  rd}nt        |       j                  dd      j                  dd      }nt        | t              rTt        |t@              rDtC        |dt@        jD                        }	tC        |dt@        jF                        }
tI        | |	|
      }nt        | t        t         jJ                  f      rTt        |tL              rDtC        |dtL        jN                        }	tC        |dtL        jP                        }
tS        | |	|
      }nt        |       }|j                  dd      S c c}w c c}w c c}}w c c}w )Ndtr   c                    | j                   dk(  rU| j                  dd| j                  dd| j                  dd| j                  dd| j
                  dd| j                  dS | j                  dd| j                  dd| j                  dd| j                  dd| j
                  dd| j                  dd| j                   d}|j                  d	      j                  d      S )
Nr   04dr  02dr  :r  06d0)microsecondyearmonthdayhourminutesecondrstrip)r  base_formats     r   format_timestamp_sparkzQDataFrame._show_string_spark.<locals>.cell_to_str.<locals>.format_timestamp_sparkR  s   >>Q& ggc]!BHHS>266#,aPS}TUVXV_V_`cUddefhfofopsetuu%'WWSM288C."&&QrwwWZm[\]_]f]fgj\kklmomvmvwzl{{|}  ~L  ~L  MP  }Q  #RK&--c299#>>r   r  truefalse[r  02X])tzinfoz, {z -> }infInfinityz-infz	-Infinityze+Eze-zE-start_field	end_fieldr~  z\n)*datetimer  r   r%  bytes	bytearrayr  rE  r   tzr   NTZr  
astimezoner   r6  r  r   element_typer   dictr   sortedr!  key_type
value_typer   rx  r   r  floatr   r   getattrYEARMONTHrm   	timedeltar   DAYSECONDrl   )r  r  r  r{  r  converted_dtr@  rV  r  r  r  r  cell_to_strs              r   r  z1DataFrame._show_string_spark.<locals>.cell_to_strQ  s   ?8+<+< ? ? |D$' $f'D%(JtY,G#((d#CF1e$4#CDEQG4!2!23x7KK#4#8#88,T2D("3"34-: %)@#'??8<M3N#O#W#W# $X $L 1>C06CD$'Jx,Kii &* ! (8+@+@+PJLQ   D$'Jx,Iii )/tzz|(< $1  +1h.?.?.O:<PQQUVabcemexex  fI  }G  }I  WJ  VK  L   D$'Jx,Lii *2 %  +4

+;U^^=[z|\]^   D%(Z/-R5<'$CU6]*%Cd)++D#6>>tTJCD#&:h@U+V%m-B-G-G $Hk;P;V;VW	<+y D3(:(:";<-B &h?R?V?VW#Hk;N;U;UV	:4iX$i;;tU++S $D*s   *P7
 &P<
<AQ4Q
N   r   r   r  +c              3   &   K   | ]	  }d |z    yw)r  NrQ  )r  widths     r   r  z/DataFrame._show_string_spark.<locals>.<genexpr>  s      Eu Es   z+
r  r  c              3   2   K   | ]  }t        |        y wrC  r   )r  r   s     r   r  z/DataFrame._show_string_spark.<locals>.<genexpr>  s     &W4'8'>&Wr  c              3   @   K   | ]  }|D ]  }t        |         y wrC  r  )r  r  r  s      r   r  z/DataFrame._show_string_spark.<locals>.<genexpr>   s$     UCQTU)$/U/Us   z-RECORD    r  r~  r  z | z 
z(0 rows)r  rowszonly showing top r   )r|  r  r   r   r   r  r  r  rx  r  r   r%  r   r   r   rjustr  r  )'r;  r  r  r  r  r  r_  r  rz  r  r  res_rowsres_rowprocessed_rowr%  res_cell	str_valuetruncate_lengthtmp_rowshas_more_datar  sbnum_colsminimum_col_width
col_widthsr  r  padded_rowssepfield_names	data_rowsfield_name_col_widthdata_col_width
row_headerj
field_namedata
row_stringr  s'        `                                @r   _show_string_sparkzDataFrame._show_string_spark<  s    :t99qL
$)
-3

 t9>d1glld2DF"$Y	,c Y	,X Y	,# Y	,| #* &**EUZZ*$ 	  	+GM(1 08'$++2D2DQ2G2P2PQ	h-,4b!O&.O7Y7&*$-.>$?	$-.C!0C$Du$L	$$Y/0 OOM*!	+& 8h&H)H4(Q,'t9+,x7J  P(~ PGAt$'
17H7N$OJqMPP  
 
  $-S>  4 $a< 

:a=1!ZZ
167
K 
  E* EEEMC IIcNIIcCHH[^44u<=IIcN #12 7		#-567IIcN q'KQRI $'!3&W;&W#W$ 
 y>Q& "%U9UU  $I. 	<3's^11(>9A=s
 		*t+,(~ <GAt!,Q!5!56J!KJ::n5DII*Sc:;<	< DH*IIj!"*a-VJII)(1ZLCDwwr{Q +Z
s   #OO$07O'O$O$	func_namec                     t        |t              r|S t        |t        t        f      r#t	        d |D              rdj                  |      S t        d| d      )zOHelper function for views to create correct name. Raises TypeError invalid namec              3   <   K   | ]  }t        |t                y wrC  r  r  s     r   r  z2DataFrame._format_name_for_view.<locals>.<genexpr>  s     2T!:a3E2Tr  r  zThe input name of z'() must be a str or list/tuple of strs.)r   r  r  r  r  r  r  )r;  r  r   s      r   _format_name_for_viewzDataFrame._format_name_for_view  sV     dC KdT5M*s2Tt2T/T88D>! +RS
 	
r   )commentrL  copy_grantsr   r  r  c                t   | j                  d|      }d}|r| j                  j                  j                         }t	        |j
                  j                  |      }d|_        ||_        | j                  |j                         t        |j                  |       |||j                  _        |t        |j                   |       | j#                  |t%               ||t'        |xs | j(                  | j                  j*                  t,        | j                  j.                  j1                  d            |      S )a(  Creates a view that captures the computation expressed by this DataFrame.

        For ``name``, you can include the database and schema name (i.e. specify a
        fully-qualified name). If no database name or schema name are specified, the
        view will be created in the current database or schema.

        ``name`` must be a valid `Snowflake identifier <https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html>`_.

        Args:
            name: The name of the view to create or replace. Can be a list of strings
                that specifies the database name, schema name, and view name.
            comment: Adds a comment for the created view. See
                `COMMENT <https://docs.snowflake.com/en/sql-reference/sql/comment>`_.
            copy_grants: A boolean value that specifies whether to retain the access permissions from the original view
                when a new view is created. Defaults to False.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
        create_or_replace_viewNFrg  rh  r  r  r  r  )r  r   rW  rX  r\   rY   dataframe_create_or_replace_viewis_tempr  rA  r   r^   r   r  rI  rR   rL  _do_create_or_replace_viewrJ   rw   r  rk  rr   rl  rm  	r;  r   r  rL  r  r   formatted_namer`  rY  s	            r   r  z DataFrame.create_or_replace_view&  s   : 334LdS ==++002D$TYY%O%OQUVD DL*Ddgg&DIIt,"%,"+,T-B-BDTU..O#N :D$:$:''#'==#5#5#9#95$	  / 
 	
r   	overwrite)r  r  refresh_mode
initializeclustering_keysis_transientdata_retention_timemax_data_extension_timerL  r;  r  r   	warehouselagr  r  r  r  r  r  c                   | j                  d|      }t        |t              st        d      t        |t              st        d      d}|rK| j                  j
                  j                         }t        |j                  j                  |      }| j                  |j                         t        |j                  |       ||_        ||_        |||j                   _        t%        |j&                  |       |||j(                  _        |||j*                  _        |+|D ]&  }t-        |j.                  j1                         |       ( |	|_        |
|
|j4                  _        |||j6                  _        |t9        |j:                  |       ||_        ddlm } t        | j                  jB                  |      r7| j                  jB                  jD                  rtF        jI                  d       g S tK        |jM                         tN        d      }| jQ                  |||||||||	|
|tS        || j                  jT                  tV        | j                  jX                  j[                  d	      
      ||      S )a  Creates a dynamic table that captures the computation expressed by this DataFrame.

        For ``name``, you can include the database and schema name (i.e. specify a
        fully-qualified name). If no database name or schema name are specified, the
        dynamic table will be created in the current database or schema.

        ``name`` must be a valid `Snowflake identifier <https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html>`_.

        Args:
            name: The name of the dynamic table to create or replace. Can be a list of strings
                that specifies the database name, schema name, and view name.
            warehouse: The name of the warehouse used to refresh the dynamic table.
            lag: specifies the target data freshness
            comment: Adds a comment for the created table. See
                `COMMENT <https://docs.snowflake.com/en/sql-reference/sql/comment>`_.
            mode: Specifies the behavior of create dynamic table. Allowed values are:
                - "overwrite" (default): Overwrite the table by dropping the old table.
                - "errorifexists": Throw and exception if the table already exists.
                - "ignore": Ignore the operation if table already exists.
            refresh_mode: Specifies the refresh mode of the dynamic table. The value can be "AUTO",
                "FULL", or "INCREMENTAL".
            initialize: Specifies the behavior of initial refresh. The value can be "ON_CREATE" or
                "ON_SCHEDULE".
            clustering_keys: Specifies one or more columns or column expressions in the table as the clustering key.
                See `Clustering Keys & Clustered Tables <https://docs.snowflake.com/en/user-guide/tables-clustering-keys>`_
                for more details.
            is_transient: A boolean value that specifies whether the dynamic table is transient.
            data_retention_time: Specifies the retention period for the dynamic table in days so that
                Time Travel actions can be performed on historical data in the dynamic table.
            max_data_extension_time: Specifies the maximum number of days for which Snowflake can extend
                the data retention period of the dynamic table to prevent streams on the dynamic table
                from becoming stale.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            iceberg_config: A dictionary that can contain the following iceberg configuration values:

                - partition_by: specifies one or more partition expressions for the Iceberg table.
                  Can be a single Column, column name, SQL expression string, or a list of these.
                  Supports identity partitioning (column names) as well as partition transform functions
                  like bucket(), truncate(), year(), month(), day(), hour().
                - external_volume: specifies the identifier for the external volume where
                  the Iceberg table stores its metadata files and data in Parquet format.
                - catalog: specifies either Snowflake or a catalog integration to use for this table.
                - base_location: the base directory that snowflake can write iceberg metadata and files to.
                - target_file_size: specifies a target Parquet file size for the table.
                  Valid values: 'AUTO' (default), '16MB', '32MB', '64MB', '128MB'
                - catalog_sync: optionally sets the catalog integration configured for Polaris Catalog.
                - storage_serialization_policy: specifies the storage serialization policy for the table.
            copy_grants: A boolean value that specifies whether to retain the access permissions from the original view
                when a new view is created. Defaults to False.


        Note:
            See `understanding dynamic table refresh <https://docs.snowflake.com/en/user-guide/dynamic-tables-refresh>`_.
            for more details on refresh mode.
        create_or_replace_dynamic_tablezKThe warehouse input of create_or_replace_dynamic_table() can only be a str.zEThe lag input of create_or_replace_dynamic_table() can only be a str.Nr   r  z\create_or_replace_dynamic_table not supported in local testing mode, returning empty result.z`mode`rg  rh  )r   r  r  create_moder  r  r  r  r  r  r  r  r;  r  ).r  r   r  r  r   rW  rX  r\   rY  )dataframe_create_or_replace_dynamic_tablerA  r   r_   r   r  r  r  rI  r[   r  r  r  rU   r  r  r  r  r  rR   rL  r  r   r  r   r  r  errorr   r  r6   #_do_create_or_replace_dynamic_tablerw   rk  rr   rl  rm  )r;  r   r  r  r  r  r  r  r  r  r  r  rL  r;  r  r   r  r`  rY  col_or_namer  r  s                         r   r  z)DataFrame.create_or_replace_dynamic_tableb  sT   Z 33-t
 )S)]  #s#W 
 ==++002D$		CCTD dgg&TYY-&DNDH"%,"499d+'*6!!'%(2%*#2 K?,,002K !-D".1D((.&25L,,2+,T-B-BDTU*DL t}}**,@A##CCMMn I!$**,0FQ77#%!+% 3$;N ''#'==#5#5#9#95$	 *#+ 8 
 	
r   c                t   | j                  d|      }d}|r| j                  j                  j                         }t	        |j
                  j                  |      }d|_        | j                  |j                         t        |j                  |       |||j                  _        |t        |j                  |       ||_        | j#                  |t%               ||t'        |xs | j(                  | j                  j*                  t,        | j                  j.                  j1                  d            |      S )a  Creates or replace a temporary view that returns the same results as this DataFrame.

        You can use the view in subsequent SQL queries and statements during the
        current session. The temporary view is only available in the session in which
        it is created.

        For ``name``, you can include the database and schema name (i.e. specify a
        fully-qualified name). If no database name or schema name are specified, the
        view will be created in the current database or schema.

        ``name`` must be a valid `Snowflake identifier <https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html>`_.

        Args:
            name: The name of the view to create or replace. Can be a list of strings
                that specifies the database name, schema name, and view name.
            comment: Adds a comment for the created view. See
                `COMMENT <https://docs.snowflake.com/en/sql-reference/sql/comment>`_.
            copy_grants: A boolean value that specifies whether to retain the access permissions from the original view
                when a new view is created. Defaults to False.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
        create_or_replace_temp_viewNTrg  rh  r  )r  r   rW  rX  r\   rY  r  r  rA  r   r^   r   r  rI  rR   rL  r  r	  rI   rw   r  rk  rr   rl  rm  r
  s	            r   r  z%DataFrame.create_or_replace_temp_view  s   B 334QSWX ==++002D$TYY%O%OQUVDDLdgg&DIIt,"%,"+,T-B-BDTU*D..O#N :D$:$:''#'==#5#5#9#95$	  / 
 	
r   )r  rL  r   c                f   | j                  d|      }d}|r| j                  j                  j                         }t	        |j
                  j                  |      }d|_        | j                  |j                         t        |j                  |       |||j                  _        |t        |j                  |       | j!                  |t#               |dt%        |xs | j&                  | j                  j(                  t*        | j                  j,                  j/                  d            |      S )ak  Creates a temporary view that returns the same results as this DataFrame.
        If it already exists, an exception will be raised.

        You can use the view in subsequent SQL queries and statements during the
        current session. The temporary view is only available in the session in which
        it is created.

        For ``name``, you can include the database and schema name (i.e. specify a
        fully-qualified name). If no database name or schema name are specified, the
        view will be created in the current database or schema.

        ``name`` must be a valid `Snowflake identifier <https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html>`_.

        Args:
            name: The name of the view to create or replace. Can be a list of strings
                that specifies the database name, schema name, and view name.
            comment: Adds a comment for the created view. See
                `COMMENT <https://docs.snowflake.com/en/sql-reference/sql/comment>`_.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
        create_or_temp_viewNTFrg  rh  )r  r6  r  r  )r  r   rW  rX  r\   rY  r  r  rA  r   r^   r   r  rI  rR   rL  r	  rI   rw   r  rk  rr   rl  rm  )r;  r   r  rL  r   r  r`  rY  s           r   create_temp_viewzDataFrame.create_temp_viewF  s   > 334I4P ==++002D$TYY%O%OQUVDDLdgg&DIIt,"%,"+,T-B-BDTU..ON :D$:$:''#'==#5#5#9#95$	  / 
 	
r   	view_name	view_typer6  c                     t        |       t        |||||| j                        } | j                  j                  j
                  | j                  j                  j                  |      fi |S )N)r   r"  r  r6  r  rG  )r   rF   r  r   r   rj  r  r  )	r;  r!  r"  r  r6  r  r  r_  cmds	            r   r	  z$DataFrame._do_create_or_replace_view  sm     	Y'#**
 +t}}""**MM##++C0
4:
 	
r   r  c                 L   t        |       |r$|D cg c]  }t        |d      j                   c}ng }t        |||||||||	|
|| j                  ||      } | j
                  j                  j                  | j
                  j                  j                  |      fi |S c c}w )Nz)DataFrame.create_or_replace_dynamic_table)r   r  r  r  r  r  r  clustering_exprsr  r  r  rG  r;  r  )
r   r   r  rE   r  r   r   rj  r  r  )r;  r   r  r  r  r  r  r  r  r  r  r  r;  r  r_  r   r&  r$  s                     r   r  z-DataFrame._do_create_or_replace_dynamic_table  s    " 	T"  +	  D+  	 (#%!-% 3$;**)#
" +t}}""**MM##++C0
4:
 	
5s   B!c                     y rC  rQ  r;  r  rL  rM  r   s        r   firstzDataFrame.first  ru  r   c                     y rC  rQ  r(  s        r   r)  zDataFrame.first  ru  r   c                X   t        |t              s|t        dt        |             i }|r| j                  j
                  j                         }t        |j                  j                  |      }|t        |j                  |       | j                  |j                         ||_        |dn||_        | j                  j
                  j!                  |       | j                  j
                  j#                  |      \  }|t$        <   |C| j'                  dd      }	t)        |	dd        |	j*                  d
||d|}
|s|
S |
r|
d	   S dS |d	k  r!t-        | d        | j*                  d
||d|S | j'                  |d      }	t)        |	dd        |	j*                  d
||d|S )a  Executes the query representing this DataFrame and returns the first ``n``
        rows of the results.

        Args:
            n: The number of rows to return.
            statement_params: Dictionary of statement level parameters to be set while executing this action.
            block: A bool value indicating whether this function will wait until the result is available.
                When it is ``False``, this function executes the underlying queries of the dataframe
                asynchronously and returns an :class:`AsyncJob`.

        Returns:
             A list of the first ``n`` :class:`Row` objects if ``n`` is not ``None``. If ``n`` is negative or
             larger than the number of rows in the result, returns all rows in the
             results. ``n`` is ``None``, it returns the first :class:`Row` of
             results, or ``None`` if it does not exist.
        Nz,Invalid type of argument passed to first(): r   Fr   zDataFrame.firstr|  )rL  rM  r   rQ  )r   r  r   r  r   rW  rX  r\   rY  dataframe_firstrR   rL  rA  r   rM  numr\  r]  r]   r  re   r1  rd   )r;  r  rL  rM  r   r_  r`  r  r   r   r  s              r   r)  zDataFrame.first  s   4 !S!amKDQRG9UVV ==++002D#DII$=$=tDC+,S-A-ACSTcff%CI9a!CGMM$$))$/ 261I1I1O1OPT1U.Av-.9A/B$5AF2R22 !1BHF  &6!90D0U012422 !1BH  A/B$5AF0200 !1BH r   fracc                    t         j                  ||       d}|r| j                  j                  j	                         }t        |j                  j                  |      }|r||j                  _	        |r||j                  _	        | j                  |j                         t        | j                  ||      }| j                  r| j!                  | j                  j"                  j%                  | j                  j"                  j'                  || j                  j"                        | j                  j"                        |      S | j!                  ||      S )ag  Samples rows based on either the number of rows to be returned or a
        percentage of rows to be returned.

        Args:
            frac: the percentage of rows to be sampled.
            n: the number of rows to sample in the range of 0 to 1,000,000 (inclusive).
        Returns:
            a :class:`DataFrame` containing the sample of rows.
        N)probability_fraction	row_countr  r  r  )r   _validate_sample_inputr   rW  rX  r\   rY  dataframe_sampler0  rI  r-  rA  r   rM   r  r  r  r  r	  r
  )r;  r.  r  r   r`  r  sample_plans          r   samplezDataFrame.sample)  s    " 	((q1==++002D#DII$>$>EC15((. !cff%TZZdaP!!??''??--11NN#dmm.E.E O  "]]44	 @   #   {d;;r   c                     | |t        d      | | dk  s| dkD  rt        d|  d      ||dk  rt        d| d      y y )	NzG'frac' and 'n' cannot both be None. One of those values must be definedg        g      ?z'frac' value z1 is out of range (0 <= probability_fraction <= 1)r   z
'n' value z must be greater than 0)r   )r.  r  s     r   r2  z DataFrame._validate_sample_inputT  sz    <AI6  tczv &C D  =QUz!,CDEE #=r   c                     | j                   S )z
        Returns a :class:`DataFrameNaFunctions` object that provides functions for
        handling missing values in the DataFrame.
        )r1  rD  s    r   nazDataFrame.nac       xxr   c                     | j                   S )z~
        Returns a :class:`DataFrameAIFunctions` object that provides AI-powered functions
        for the DataFrame.
        )r7  rD  s    r   aizDataFrame.aik  r9  r   c                     | j                   S )z
        Returns a :class:`snowflake.snowpark.Session` object that provides access to the session the current DataFrame is relying on.
        )r   rD  s    r   r  zDataFrame.sessions  s    
 }}r   )strings_include_math_statsr   c                   d}|r| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         t        | \  }|j                  _
        |D ]0  }t        |j                  j                  j                         |       2 ||_        t        | }t!        |      dkD  r| j#                  |d      n| }|j$                  j&                  D 	ci c]9  }	t)        |	j*                  t,        t.        f      r|	j0                  |	j*                  ; }
}	t2        t4        t6        t8        t:        d}t!        |
      dk(  r| j                   j=                  t?        |jA                               dgd      }tC        |d| jD                  jF                  |jD                  jF                  	       |r|jH                  |_%        |S tM               5 }d}|jO                         D ]3  \  }}g }|
jO                         D ]  \  }}t)        |t,              r`|s|d
vr!|jQ                  tS         ||                   =|jQ                  tS         |tU        d                  jW                  |             v|jQ                   ||               | jY                  |d      j[                  t?        |
jA                               d      j"                  tU        |      jW                  d      g|
jA                         ddi}|r|j]                  |d      n|}6 	 ddd       tC        d| jD                  jF                  |jD                  jF                  j_                         ja                                |r|jH                  |_%        |S c c}	w # 1 sw Y   wxY w)aE  
        Computes basic statistics for numeric columns, which includes
        ``count``, ``mean``, ``stddev``, ``min``, and ``max``. If no columns
        are provided, this function computes statistics for all numerical or
        string columns. Non-numeric and non-string columns will be ignored
        when calling this method.

        Example::
            >>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
            >>> desc_result = df.describe().sort("SUMMARY").show()
            -------------------------------------------------------
            |"SUMMARY"  |"A"                 |"B"                 |
            -------------------------------------------------------
            |count      |2.0                 |2.0                 |
            |max        |3.0                 |4.0                 |
            |mean       |2.0                 |3.0                 |
            |min        |1.0                 |2.0                 |
            |stddev     |1.4142135623730951  |1.4142135623730951  |
            -------------------------------------------------------
            <BLANKLINE>

        Args:
            cols: The names of columns whose basic statistics are computed.
            strings_include_math_stats: Whether StringType columns should have mean and stddev stats included.
        Nr   Fr   )r   r   r   r   r   summary)r  r   zDataFrame.describe)precallssubcalls)r   r   r   r@  rA  r  )1r   rW  rX  r\   rY  dataframe_describerA  r   r   r  r  rU   r  r  r=  r   r  r   r  rx  r   r  r   r   r   r   r   r   min_max_create_dataframer  r  re   r  	api_callsr  r  rc   r!  r   r   r   r  rZ  r  r  r|  r&  )r;  r=  r   r  r`  rY  rb  r   r   r  numerical_string_col_type_dictstat_func_dictr'  res_dfr   r  agg_colsrU  agg_stat_dfs                      r   describezDataFrame.describez  sT   @ ==++002D$TYY%A%A4HDdgg&+QSW+X(Hdii( U;DIINN<N<N<PRSTU.HD+,d336t9q=T[[[/d
 ))*
%..:|*DE JJ&*
& *
 
 -.!3//^((*+YK5 0 B
  $--++	 !XX
I $% 	)AF,224 
d:@@B 1DAq "!Z05 F : %OOGDG,<=$OOGDTO,D,H,H,KL Q01DHHXH7U4 > C C EFRWUXVD	i0 8<<>	
 #(   LLL>$ 1	@ 	 ZZ))\\++0023FFH	
 !XXFN[*
D	 	s   =>N/3E
N44N=col_or_mapper
new_columnc           	         d}d}|r| j                   j                  j                         }t        |j                  j
                  |      }| j                  |j                         |||j                  _	        t        |j                  |       || j                  |||d      S t        |t              s!t        dt!        |      j"                         t%        |      dk(  rt        d      t'        |j)                          \  }}|D ]7  }t        |t*              rt-        d| dt!        |      j"                   d	       | j/                  |      }	|	D 
cg c]  }
t1        |
       }}
t'        ||      D ci c]  \  }}||
 }}}t3        || j4                        }| j6                  r| j                   j8                  j;                  | j                   j8                  j=                  || j                   j8                  
      | j                   j8                        }| j?                  ||      S | j?                  ||      S c c}
w c c}}w )af  
        Returns a DataFrame with the specified column ``col_or_mapper`` renamed as ``new_column``. If ``col_or_mapper``
        is a dictionary, multiple columns will be renamed in the returned DataFrame.

        Example::
            >>> # This example renames the column `A` as `NEW_A` in the DataFrame.
            >>> df = session.sql("select 1 as A, 2 as B")
            >>> df_renamed = df.rename(col("A"), "NEW_A")
            >>> df_renamed.show()
            -----------------
            |"NEW_A"  |"B"  |
            -----------------
            |1        |2    |
            -----------------
            <BLANKLINE>
            >>> # This example renames the column `A` as `NEW_A` and `B` as `NEW_B` in the DataFrame.
            >>> df = session.sql("select 1 as A, 2 as B")
            >>> df_renamed = df.rename({col("A"): "NEW_A", "B":"NEW_B"})
            >>> df_renamed.show()
            ---------------------
            |"NEW_A"  |"NEW_B"  |
            ---------------------
            |1        |2        |
            ---------------------
            <BLANKLINE>

        Args:
            col_or_mapper: The old column instance or column name to be renamed, or the dictionary mapping from column instances or columns names to their new names (string)
            new_column: The new column name (string value), if a single old column is given
        NFr  zVIf new_column parameter is not specified, col_or_mapper needs to be of type dict, not r   z(col_or_mapper dictionary cannot be emptyz'You cannot rename a column using value z	 of type z as it is not a string.r  r  r  ) r   rW  rX  r\   rY  dataframe_renamerA  r   rO  rI  rS   rN  with_column_renamedr   r  r   r  r  r  r  r!  r  r  *_get_column_names_from_column_or_name_listr   rL   r  r  r  r	  r
  r  )r;  rN  rO  r   r  rY  column_or_name_listrename_listr   r  r  normalized_name_listrV  r@  
rename_maprename_planr  s                    r   renamezDataFrame.rename  s@   N 	00557I$Y^^%D%DiPDdgg&%(2%&t'9'9=I!++zY% ,   -.M*3346 
 }"GHH+.0C0C0E+F([ 	DdC(=dV9TRVZM`M`La b' ( 	 ??@ST7<=!
1=='*+?'MNtq!adN
NZ4!!--11IImm--JJ$--*A*A K  00	 J K ??;)?DD{i@@  >Ns   I	=Iexistingnewc                    t        |      }t        |t              rt        |      }nzt        |t              rRt        |j                  t
              ryddlm} t        | j                  j                  |      r| j                   |j                  }| j                  j                  j                  |j                  |j                        }nt        |j                  t               re|j                  j"                  rO| j                  j$                  j                  |j                  j                  |j                  j                        }nWt        |j                  t&              r|j                  j                  }n&t)        d| d      t+        t        |       d      ddlm}	 |	j0                  r<| j2                  D 
cg c]&  }
t        |
j                        t        |      k(  s%|
( }}
nE| j2                  D 
cg c]0  }
|
j                  j5                         |j5                         k(  s/|
2 }}
|st)        d| d      t7        |      d	kD  r t9        j:                  ||t7        |            | j2                  D cg c]6  }||j                  k(  rt        |      j=                  |      n
t        |      8 }}|~|r|| j                  j>                  jA                         }tC        |jD                  jF                  |      }| jI                  |jJ                         ||_&        tO        |jP                  |       | jS                  ||d
      S c c}
w c c}
w c c}w )a  Returns a DataFrame with the specified column ``existing`` renamed as ``new``.

        Example::

            >>> # This example renames the column `A` as `NEW_A` in the DataFrame.
            >>> df = session.sql("select 1 as A, 2 as B")
            >>> df_renamed = df.with_column_renamed(col("A"), "NEW_A")
            >>> df_renamed.show()
            -----------------
            |"NEW_A"  |"B"  |
            -----------------
            |1        |2    |
            -----------------
            <BLANKLINE>

        Args:
            existing: The old column instance or column name to be renamed.
            new: The new column name.
        r   r  zUnable to rename column z because it doesn't exist.( must be a column name or Column object.)r8  zUnable to rename column "z" because it doesn't exist.r   Fr  )*r   r   r  r   r  r'   r   r  r   r   r  r  r  rm  r!  r   r,   r"  r  r*   r   r  snowflake.snowparkr8  r6  r   r   r  ra   .DF_CANNOT_RENAME_COLUMN_BECAUSE_MULTIPLE_EXISTr  rW  rX  r\   rY  dataframe_with_column_renamedrA  r   new_namerU   r   r   )r;  rZ  r[  r  r   new_quoted_nameold_namer  attr8  r  to_be_renamednew_columnsrY  s                 r   rR  zDataFrame.with_column_renamedN  s   8 %S/h$!(+H&)(..	:Tdmm113GHKK**::3377SXXN8//1DE((11::JJNN((--x/C/C/H/H H00/B#//44 .xj8RS  s8}o-UVWW.77<<:aff+=HAU+UM 
  <<166<<>X^^=M+MM  +H:5QR  !#1``/3}+= 
 ||
 19CHH0DF3KOOO,&QT+U
 
 00557I$<<iD dgg&DM7(K{{;)u{MM;
s   &M*M?0M0M;Mr   c                   t        | j                  |       5  ddlm} ddd       | j                  j                  dt        t        j                         d      }d}|r| j                  j                  j                         }t        |j                  j                  |      }| j                  |j                         |t!        |j"                  |       t%        ||j                  j                  j&                  j(                         t+        | j                  j,                        r9| j.                  }d| _        | j0                  j3                  |dd       || _        n| j5                  t7        |gdt8        j:                  | j<                  t>        j@                  d	            }i |xs | jB                  xs i d
|i}	| j                  j,                  jE                  |j<                  tG        |	| j                  jH                  tJ        | j                  jL                  jO                  d                   tP        jR                  jT                  jW                  || j                  dd      }
d|
_,        |r|jZ                  |
_        | j                  j\                  j^                  | j<                  j`                  | j                  j\                  j^                  jb                  v r| j                  j\                  j^                  jb                  | j<                  j`                     }|je                         | j                  j\                  j^                  jb                  |
j<                  j`                  <   |
S # 1 sw Y   exY w)a^  Caches the content of this DataFrame to create a new cached Table DataFrame.

        All subsequent operations on the returned cached DataFrame are performed on the cached data
        and have no effect on the original DataFrame.

        You can use :meth:`Table.drop_table` or the ``with`` statement to clean up the cached result when it's not needed.
        Refer to the example code below.

        Note:
            An error will be thrown if a cached result is cleaned up and it's used again,
            or any other DataFrames derived from the cached result are used again.

        Examples::
            >>> create_result = session.sql("create temp table RESULT (NUM int)").collect()
            >>> insert_result = session.sql("insert into RESULT values(1),(2)").collect()

            >>> df = session.table("RESULT")
            >>> df.collect()
            [Row(NUM=1), Row(NUM=2)]

            >>> # Run cache_result and then insert into the original table to see
            >>> # that the cached result is not affected
            >>> df1 = df.cache_result()
            >>> insert_again_result = session.sql("insert into RESULT values (3)").collect()
            >>> df1.collect()
            [Row(NUM=1), Row(NUM=2)]
            >>> df.collect()
            [Row(NUM=1), Row(NUM=2), Row(NUM=3)]

            >>> # You can run cache_result on a result that has already been cached
            >>> df2 = df1.cache_result()
            >>> df2.collect()
            [Row(NUM=1), Row(NUM=2)]

            >>> df3 = df.cache_result()
            >>> # Drop RESULT and see that the cached results still exist
            >>> drop_table_result = session.sql(f"drop table RESULT").collect()
            >>> df1.collect()
            [Row(NUM=1), Row(NUM=2)]
            >>> df2.collect()
            [Row(NUM=1), Row(NUM=2)]
            >>> df3.collect()
            [Row(NUM=1), Row(NUM=2), Row(NUM=3)]
            >>> # Clean up the cached result
            >>> df3.drop_table()
            >>> # use context manager to clean up the cached result after it's use.
            >>> with df2.cache_result() as df4:
            ...     df4.collect()
            [Row(NUM=1), Row(NUM=2)]

        Args:
            statement_params: Dictionary of statement level parameters to be set while executing this action.

        Returns:
             A :class:`Table` object that holds the cached result in a temporary table.
             All operations on this new DataFrame have no effect on the original.

        Note:
            A temporary table is created to store the cached result and a :class:`Table` object is returned.
            You can retrieve the table name by accessing :attr:`Table.table_name`. Note that this temporary
            Snowflake table

                - may be automatically removed when the Table object is no longer referenced if
                  :attr:`Session.auto_clean_up_temp_table_enabled` is set to ``True``.

                - will be dropped after the session is closed.

            To retain a persistent table, consider using :meth:`DataFrameWriter.save_as_table` to persist
            the cached result.
        r   r  Nr   TF)create_temp_tabler   temp)creation_sourcer  cache_result_temp_tablerg  rh  ro  )is_temp_table_for_cleanupr   )3rb   cache_resultr   r  r   $get_fully_qualified_name_if_possibler   rs   r  rW  rX  r\   rY  dataframe_cache_resultrA  r   rR   rL  r`   object_namer   r   r   r  r  r  r  r:   r9   ERROR_IF_EXISTSr  r;   CACHE_RESULTr  rj  rw   rk  rr   rl  rm  r  r]  tabler   r  r  dataframe_profiler_query_historyuuid_dataframe_queriesr|  )r;  rL  r   r  temp_table_namer`  rY  r  r   !statement_params_for_cache_result	cached_dforiginal_queriess               r   rm  zDataFrame.cache_result  s   \ ,D,=,=tD 	QP	Q --LL+N,@,@AB!D

 ==++002D$TYY%E%EtLDdgg&+,T-B-BDTU
 !A!A!M!M!R!R dmm))+?@\\FDLJJ$$45 %  "DL$$%,,JJ$7$D$D%	B1#Ct'='=C1)?1- MM''"R5MM++#'+}}'9'9'='=9(	# ( 
 &&,,22MM&*	 3 
	 #	 $I MM,,;;G

}}//>>QQR
 00??RRJJOO  !%%' MM,,;;NN$$ e	Q 	Qs   NNweightsseedc                   |st        d      d}|r| j                  j                  j                         }t	        |j
                  j                  |      }|D ]  }|j                  j                  |        |r||j                  _
        |rt        |j                  |       | j                  |j                         t        |      dk(  r| gS |D ]  }|dk  s	t        d       t!        t"        j$                        }t'               5 }	| j                  j(                  j+                  d      r|d}
|&t-        j.                  |      }|j-                         }nt-        j,                         }| j1                  |t3        t5        dt7        |d	
      d	
      d	
      t8        z  d	
      }nDd}
| j1                  |t3        t;        |d	
      d	
      t8        z  d	
      j=                  |d	      }t?        |      }dgtA        tC        jD                  |D cg c]  }||z  	 c}            D cg c]  }tG        |t8        z         c}z   }tI        |dd |dd       }|D cg c]D  \  }}|jK                  tM        |      |k\  tM        |      |k  z  d	
      jO                  |d	
      F }}}ddd       D ]K  }tQ        |
| jR                  jT                  |jR                  jT                  dd 	jW                                M |rtY        |      D ]  \  }}| j                  j                  j                         }t	        |j
                  jZ                  ||      }|j\                  |_/        |j`                  jc                         }te        ||       |j\                  |_3         |S c c}w c c}w c c}}w # 1 sw Y   xY w)a  
        Randomly splits the current DataFrame into separate DataFrames,
        using the specified weights.

        Args:
            weights: Weights to use for splitting the DataFrame. If the
                weights don't add up to 1, the weights will be normalized.
                Every number in ``weights`` has to be positive. If only one
                weight is specified, the returned DataFrame list only includes
                the current DataFrame.
            seed: The seed used by the randomness generator for splitting.

                .. caution:: By default, reusing a seed value doesn't guarantee reproducible results.
            statement_params: Dictionary of statement level parameters to be set while executing this action.

        Example::

            >>> df = session.range(10000)
            >>> weights = [0.1, 0.2, 0.3]
            >>> df_parts = df.random_split(weights)
            >>> len(df_parts) == len(weights)
            True

        Note:
            1. When multiple weights are specified, the current DataFrame will
            be cached before being split.

            2. When a weight or a normailized weight is less than ``1e-6``, the
            corresponding split dataframe will be empty.

            3. To get reproducible seeding behavior, configure the DataFrame's :py:class:`Session`
            to use simplified querying:

            .. code-block::

                >>> session.conf.set("use_simplified_query_generation", True)
        zGweights can't be None or empty and its values must be positive numbers.Nr   r   z weights must be positive numbersr  zDataFrame.random_split[hash]r  Fr   z$DataFrame.random_split[cache_result]r^  r  rB  )
target_idx)4r   r   rW  rX  r\   rY  dataframe_random_splitr|  r   r}  rI  rR   rL  rA  r   r  r   rs   COLUMNrc   rl  rm  r   Randomr  abs_hash_r   _ONE_MILLIONrandom_rm  r  r  	itertools
accumulater  r  r  r   r2  re   r  rG  r&  r  object_get_itemr  objr  r  rS   r  )r;  r|  r}  rL  r   r`  r  r  temp_column_namer'  api_namelocal_randompython_seedintermediate_dfsum_weightsnormalized_cum_weightsnormalized_boundarieslower_boundupper_boundres_dfsr   r%  obj_stmtobj_exprr  s                            r   random_splitzDataFrame.random_splitJ  s   ^ Y 
 ==++002D#DII$D$DdKC &""1%&!%,S-A-ACSTcff%w<16M I6$%GHHI  ;>;P;PQ') ,-E==%%))*KL=H''-}}T':&2&9&9&;&,mmo&*&6&6(! #S%FRW ',	 '' #( '7 
'O  FH&*&6&6(WTU;uM&'"'	 '7 '
 #l4DPUlV $ "'l*+!!,,w-O!a+o-OP0 L()0 *& ),*3B/1G1K)% 5J 1[ $))-.+=/0;>@"' *  d+ud=	> K,\  #!ZZ11XX//4#;#N#N#P &w/ .EAr#}}77<<>H  1 55xA H $(88HL"--++-C.sA6!)BJ. NU .P0K, ,s8   	DN;N+ N;-N0N;"A	N5+N;+N;;Oz1.36.0output_fileverbosec                    | j                   j                  j                  t        j	                  d       y| j                   j                  j                  }| j
                  j                  |j                  vr.t        j	                  d| j
                  j                   d       y	 t        | j                   |      }| j
                  j                  |j                  v r2|j                  |j                  | j
                  j                            |j                  | j
                  j                     D ]  }|j                  ||        	 |j                          y# j                          w xY w)z
        Get the execution profile of the dataframe. Output is written to the file specified by output_file if provided,
        otherwise it is written to the console.
        Nz[No query history found. Enable dataframe profiler using session.dataframe_profiler.enable()z.No queries found for dataframe with plan uuid zK. Make sure to evaluate the dataframe before calling get_execution_profile.)r   rt  ru  r  r   r  rv  dataframe_queriesr   _describe_queriesprint_describe_queriesprofile_queryclose)r;  r  r  query_historyprofilerquery_ids         r   get_execution_profilezDataFrame.get_execution_profile  s!    ==++::BOOm 88GG::??-"A"AAOO@@Q  R]  ^ 		$T]]K@Hzz-"A"AA//!33DJJOOD *;;DJJOOL :&&x9: NNHNNs   (B$E E0c                 (   | j                   j                  }|t        j                     D cg c]  }|j                  j                          c}|t        j                     D cg c]  }|j                  j                          c}dS c c}w c c}w )a  
        Returns a ``dict`` that contains a list of queries that will be executed to
        evaluate this DataFrame with the key `queries`, and a list of post-execution
        actions (e.g., queries to clean up temporary objects) with the key `post_actions`.
        )r  post_actions)r  execution_queriesr4   QUERIESr  r   POST_ACTIONS)r;  plan_queriesr  s      r   r  zDataFrame.queries  s}     zz33 0<M<Q<Q/R&+		! 0<M<V<V/W&+		!	
 	
s   !B
$!Bc                 6    t        | j                                y)a^  
        Prints the list of queries that will be executed to evaluate this DataFrame.
        Prints the query execution plan if only one SELECT/DML/DDL statement will be executed.

        For more information about the query execution plan, see the
        `EXPLAIN <https://docs.snowflake.com/en/sql-reference/sql/explain.html>`_ command.
        N)rb  _explain_stringrD  s    r   explainzDataFrame.explain  s     	d""$%r   c                 J   | j                   j                  t        j                     }dj	                  d t        |      D              }d| }t        |      dk(  rD| j                  j                  |d   j                        }|r| d| }n|d   j                   d}| dS )	Nz
---
c              3   f   K   | ])  \  }}|d z    d|j                   j                           + yw)r   z.
N)r  r   )r  r%  r  s      r   r  z,DataFrame._explain_string.<locals>.<genexpr>  s4      (
/7q%qse3uyy()*(
s   /1z8---------DATAFRAME EXECUTION PLAN----------
Query List:
r   r   z
Logical Execution Plan:
z can't be explainedz-
--------------------------------------------)
r  r  r4   r  r  r  r  r   _explain_queryr  )r;  r  output_queriesmsg	exec_plans        r   r  zDataFrame._explain_string  s    zz33M4I4IJ" (
;D\;R(
 
  |!44\!_5H5HII8D%a,,--@ADEEr   c                 J   t        |      t        t        fd| j                              }t        t        d |            }t	        |      dk(  r|d   j                        S | j                  D cg c]  }|j                   }}t        j                  ||      c c}w )Nc                 4    t        | j                        k(  S rC  )r   r   )r  normalized_col_names    r   <lambda>z$DataFrame._resolve.<locals>.<lambda>-  s    Z		26II r   c                 $    t        | t               S rC  )r   r,   )r  s    r   r  z$DataFrame._resolve.<locals>.<lambda>4  s    Jt5H$I I r   r   r   )	r   r  r  r   r  	with_namer   ra   DF_CANNOT_RESOLVE_COLUMN_NAME)r;  r   r  r  all_colsr  s        @r   r  zDataFrame._resolve)  s    (2I4<<
 I4P
 t9>7$$%899.2ll;d		;H;1OO(  <s   5B c                     | j                   r | j                   j                  j                  S | j                  j                  S rC  )r  r}  
projectionr  outputrD  s    r   r   zDataFrame._output?  s=     %% ""00;;	
 ""	
r   c                 T    t        j                  | j                  j                        S )zmThe definition of the columns in this DataFrame (the "relational schema" for
        the DataFrame).
        )r   _from_attributesr  r:  rD  s    r   r  zDataFrame.schemaG  s    
 **4::+@+@AAr   c                     t        | j                  j                  | j                  j                        D cg c]  \  }}|t	        |j
                        f }}}|S c c}}w rC  )r  r  r  rx  rn   r  )r;  r   r  dtypess       r   r  zDataFrame.dtypesN  s[      #4;;#4#4dkk6H6HI
e )%..9:
 
 	
s   !Ac                 ~    t        | j                  ||      }| j                  |_        ||j                  |_        |S )ze
        :param proto.Bind ast_stmt: The AST statement protobuf corresponding to this value.
        r  )r   r   r  r  r  )r;  r  r  r   s       r   r  zDataFrame._with_planV  s:     t}}di@#55 "BJ	r   c                    g }|D ]  }t        |t              r|j                  |       &t        |t              rt        |j                  t
              rddlm} t        | j                  j                  |      r| j                   |j                  | j                  j                  j                  |j                  j                  |j                  j                               t        |t              rt        |j                  t               ru|j                  j"                  r_|j                  | j                  j$                  j                  |j                  j                  |j                  j                               t        |t              rAt        |j                  t&              r'|j                  |j                  j                         t)        t        |       d       |S )Nr   r  r]  )r   r  r   r   r  r'   r   r  r   r   r  r  r  rm  r!  r   r,   r"  r  r*   r  )r;  r  r  r   r  s        r   rS  z4DataFrame._get_column_names_from_column_or_name_listb  sT     	UA!S!QAv&:ammY+OTdmm113GHKKJJ,,00--q}}/A/A 1f%q}}.ABMM**JJCCGG**AMM,>,>
 Av&:amm_+UQ]]//03q6(*R STT7	U: r   calling_methodc                 l    dt         dt        ffd}t        | D cg c]
  } ||       }}|S c c}w )zVConvert a string or a Column, or a list of string and Column objects to expression(s).r   r   c                     t        | t              rt        |       j                  S t        | t              r| j                  S t	        dj                              )NzS{} only accepts str and Column objects, or a list containing str and Column objects)r   r  r   r  r  rE  )r   r  s    r   convertz1DataFrame._convert_cols_to_exprs.<locals>.convert  sM    #s#c{...C(&&&,f^&< r   )ri   r(   r   )r;  r  r  r  r   r  s    `    r   r=  z DataFrame._convert_cols_to_exprs  s@    		 		* 		 *G)MN#NN Os   1leveltranslate_columnstranslate_typesc                    dfd	|xs i }dj                  | j                  j                  D cg c]D  } |j                  |j                  |j                        |j
                  |j                        F c}      }d| S c c}w )Nc           	         |k\  ryd|z  }|dz   }|dt        |       dnd}g }|j                  j                  }d }rj                  ||      }t	        |t
              r d|j                  |      g}nt	        |t              r+ d|j                  |       d	|j                  |      g}ngt	        |t              rL|j                  D 	cg c]6  }	 t        |	j                  d
      |	j                  |	j                  |      8 }}	nt        |      }dj!                  | d|  d|xs | | g|D 
cg c]  }
|
s|
 
 c}
z         S c c}	w c c}
w )Nr   z |  r   z (nullable = r  element)depthkeyrI  T)	keep_caser~  z |-- z: )r  r  r  rm  r   r   r  r   r  r  r   rx  r   r   r  nullabler  )r   dtyper  r  r   nullable_strextra_linestype_str
translatedr  r  _format_datatyper  r  s              r   r  z2DataFrame._format_schema.<locals>._format_datatype  s    Ue^e^FAIE 5=4H-Ha0b  K//HJ,008D
 %+$Y0B0B%P E7+$UENN%H$We.>.>eL E:. "'  %"5::>	  u:99heD6J,B(+CL>R *5=dV9=> " >s   ;E=E
E
r~  zroot
Nr   )r  r  r:  rm  r   r  r  )r;  r  r  r  r  schema_tmp_strr  s    ` `  @r   _format_schemazDataFrame._format_schema  s    -	^ .3 !JJ11  !%))$))TYY?MMMM	
 '((s   A	B
c                 8    t        | j                  |             y)a  
        Prints the schema of a dataframe in tree format.

        Args:
            level: The level to print to for nested schemas.

        Examples::
            >>> df = session.create_dataframe([(1, "a"), (2, "b")], schema=["a", "b"])
            >>> df.print_schema()
            root
             |-- "A": LongType() (nullable = False)
             |-- "B": StringType() (nullable = False)
        N)rb  r  )r;  r  s     r   print_schemazDataFrame.print_schema  s     	d!!%()r   )NNFNT)r   r   )NNFT)T)NT)NNT)FT)r   NT)FFNrC  )NN)
   2   )NFFBOTHT)r  r  T)r  TFNN)TFN)	NNNNFNNNF)r   r  )NF)r   N)NNN)r  
__module____qualname____doc__r   r   r8   r%  r  Bindr<  r   rA  propertyr   rE  r   rG  r  r  setterr   r
   r  r   r   rS  r   rg   r   rd  r   re  r^  r1  rp  r   rt  r  r  r  r   r   r   r  r  rz   r  r  rf   r   r  r  r   r   r  r  r  r   ri   r   r   r  
selectExprr2  r1  rj   r  r;  r   rZ  rh   rc  rY  rn  rs  rw  r  rk   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r   r   r  r  rD  ra  rx   rr  rC   rq  r|  rc  r  r  r  r  r  r   rP   r	  r6   r  r)  taker  r5  staticmethodr2  r   r8  r   r;  r  rM  rY  rR  rm  r  r  r  r  r  r(   r*   r  r   r'   r   r   r  r  r  rS  r=  r  r  r  r*  r+  r,  r-  r.  r/  r0  r3  r4  r5  r6  createOrReplaceTempViewcreateOrReplaceViewcreateTempView	crossJoindropDuplicatesgroupByminussubtracttoDFtoPandasunionAllunionAllByNameunionByName
withColumnwithColumnRenamedtoLocalIteratorrandomSplitr  orderByprintSchemarQ  r   r   r   r   m  s%   fP  ;?&**.J"67J" {#J" 	J"
 EJJ'J" J" 
J" J"X?3 ?4 ? ,   6   #   ^^8Xc] 8t 8 8  6:!&#	 #4S>2	 		
 	 	 	 
c	  	  6:!&#	 #4S>2	 		
 	 	 	 
	  	  6:!&#1 #4S>21 	1
 1 1 1 
tCy("	#1  1f  6:!&#- #4S>2- 	-
 - - 
-  -d 6:&6&:&:!&#
 #4S>2
 	

 $
 
 
 
 
tCy("	#
B ":/" >B#+DcN#;	 "  6:# #4S>2 	
   
#    6:# #4S>2 	
   
    6:#?
 #4S>2?
 	?

 ?
 ?
 
x}h&	'?
  ?
B
)K 
)L
 		 :>"	 'tCH~6	 		
 	 38n	 	 
 
	  6: #4S>2 	
  sCx. 
    6:S #4S>2S 	S
 S sCx.S 
!8+	,S  Sj 		 :>"	 'tCH~6	 		
 	 38n	 f&&'	 
 
	  6: #4S>2 	
  sCx. 
    6:@
 #4S>2@
 	@

 @
 sCx.@
 
x*+X5	6@
  @
D (# 6:%
 #4S>2%
 	%

 %
 sCx.%
 
(	)%
   $%
N (# 6:&
 #4S>2&
 	&

 &
 sCx.&
 
x((2	3&
   $&
P CG1C#./1<@1	1  1f  6:'+!&OE#tCy.12O $s)$O 	O
 O 
"O  ObCc64&C D C  !c ! !" >C >D >F > >  +/WX, 112U<)::;<>
WX EJJ'WX WX 
WX  WXr  !%	6
c8C=()6
 ::6
 	6

 
6
  6
p JTXx<,)??@xMQx	x xt  !%	77 ::7 	7
 
7  7r  !%	MM ::M 	M
 
M  M^  IM	S\8L#99:S E$T%c	2B-C"CDES 	S
 
S  Sj '"6# 6$ 6  #6p  QfeL#$56S#XFGQ Q 
	Q  Qf )TX
<,)??@
MQ
	8
  )
> ) +/	E\8L#99:E EJJ'E 	E
 
9E  )EN ) 8
-679
8
 8
 
98
  )8
t )TX
<,)??@
MQ
	8
  )
> >B447;4	4 4l  !%	FsHSM)*F ::F 	F
 
F FP ) 15T
T
 (;')GGH
T
 "+.T
 T
 
9T
  )T
l  $LL L ,'	L
 L L 
L  L\   $RR R ::	R
 R 
R  Rh 0; 04 0; 0  0d 3{ 3t 3{ 3  3j  ',	:
:
  $:
 	:

 
:
  :
x  ',	<
<
  $<
 	<

 
<
  <
B &+ $GG G  $	G
 ::G 
GR .{ .t .{ .  .` -[ -T -[ -  -^  "	Q:Q: c]Q: 	Q: 
Q:  Q:f   $R:
 R:R: VR:
 R: R: R: 
R:  R:h  <@!	BD ,0BDBD U<#678BD c]	BD BD BD "&)BD BD 
BD  BDH 
 	^
Cc$556^
 &^
 	^

 !-^
 
^
  ^
@ 
 G
G
 	G

 G
 G
 
G
  G
^ ,0 $BKBK VXc]23BK 	BK BK BK "&)BK ::BK 
BKT ,0 $$G$G $G V$	$G $G $G "&)$G ::$G 
$GL  #(#DD 6,,-D
  D **D D 
D  DL  #( $N9N U6#4456N
  N ::N N 
N  N`  6: #4S>2 	
  
    6: #4S>2 	
  
    6:)5 #4S>2)5 	)5
 )5 
sH}	)5 )5V   < 
 *.!%)-26<@8<59)-w
#x},-w
 &	w

 #w
 "#w
 !#/w
 "(<"89w
 &d38n5w
 #4S>2w
 !w
 w
 w
 
cw
  w
r  !
 6:!! !
 #4S>2! ! 
!  !F KC
  #\
\
 sm\
 	\

 \
 \
 \
 
\
  \
~ PT#
5#
BG**#
	#
Js t 4 CGB
B
&)B
;?B
	B
L %))-!%ZZ c	"Z 	Z
 "#YZ Z 
Zx

$)#x}*<$=
	
 
 "&59!8
C#&'8
 #	8

 #4S>28
 8
 8
 
c8
  8
t  "&&*$(<@"-11559)-!#_
C#&'_
 	_

 _
 #_
 _
 sm_
 SM_
 "(<"89_
 _
 &c]_
 "*#_
 #4S>2_
 !_
  !_
" #_
$ 
c%_
  _
B 
 "&59!=
C#&'=
 #	=

 #4S>2=
 =
 =
 
c=
  =
~ 
 "&59:
C#&':
 #	:

 #4S>2:
 :
 
c:
  :
B !*.

 
 #	

 
 
 EJJ'
< "&&*$(<@"-115)-!/
/
 /
 	/

 ,/
 #/
 sm/
 SM/
 "(<"89/
 /
 &c]/
 "*#/
 !/
 /
b    6:C= #4S>2	
   
x}d3i'	(      6:C= #4S>2	
   
     ? 6:?C=? #4S>2	?
 ? ? 
x}d3i1	2? ?B D !%	'<uo'< C='< 	'<
 
'<  '<R FXe_ F F F (   (      $)	zS$s)^$z 	z
 
z zx  	SA\4/0SA SA 	SA  SAj 
 +/VNVN VN EJJ'	VN
 VN 
VN  VNp  6:	^ #4S>2^ 	^
 
^  ^@  #Q
 6:QeQ smQ
 #4S>2Q Q 
k	Q Qf (#AF#C=:>	 $: 
c49n- 
 
 &F F$ z?/J)K , 
i 
 
 B
 B B U38_-  
!,'!	!F \8L#99: 
j		.  $,0*.	B)}B) $D>B) "$	B)
 
B)H*(3- *4 *  E (>'M'MMN_!&&D
 
$
$C%..H1;;;Hy!&&F!&&F"**G :0%NI$NGEHDHH&NKJ+'OKHGKr   F)output_column_namesimportspackages	immutabler  
vectorizedmax_batch_size	dataframer  output_typesr  r  r   r  r  r  r  c                   t        |      dk(  rt        d      |(t        t        |            D 
cg c]
  }
d|
dz     }}
n"t        |      t        |      k7  rt        d      | j                  |xs1 t	        | j
                  j                         j                               }t        |d      }| j                  j                  D cg c]  }|j                   }}t        t        |            D 
cg c]  }
d|
 	 }}
t        t        ||      D cg c]  \  }}t        ||       c}}      }|r(t        |t              }t!        |      g}t!        ||      }t#        |      D 
cg c].  \  }
} d|
t              z   dz          j%                  |      0 }}
}|rd	  G fd
d      }nd  G fdd      }| j
                  j&                  j)                  |||||||	      } | j+                   | j-                  |            j.                  | S c c}
w c c}w c c}
w c c}}w c c}}
w )a  Returns a new DataFrame with the result of applying `func` to each of the
    rows of the specified DataFrame.

    This function registers a temporary `UDTF
    <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-tabular-functions>`_ and
    returns a new DataFrame with the result of applying the `func` function to each row of the
    given DataFrame.

    Args:
        dataframe: The DataFrame instance.
        func: A function to be applied to every row of the DataFrame.
        output_types: A list of types for values generated by the ``func``
        output_column_names: A list of names to be assigned to the resulting columns.
        imports: A list of imports that are required to run the function. This argument is passed
            on when registering the UDTF.
        packages: A list of packages that are required to run the function. This argument is passed
            on when registering the UDTF.
        immutable: A flag to specify if the result of the func is deterministic for the same input.
        partition_by: Specify the partitioning column(s) for the UDTF.
        vectorized: A flag to determine if the UDTF process should be vectorized. See
            `vectorized UDTFs <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-tabular-vectorized#udtfs-with-a-vectorized-process-method>`_.
        max_batch_size: The maximum number of rows per input pandas DataFrame when using vectorized option.

    Example 1::

        >>> from snowflake.snowpark.types import IntegerType
        >>> from snowflake.snowpark.dataframe import map
        >>> import pandas as pd
        >>> df = session.create_dataframe([[10, "a", 22], [20, "b", 22]], schema=["col1", "col2", "col3"])
        >>> new_df = map(df, lambda row: row[0] * row[0], output_types=[IntegerType()])
        >>> new_df.order_by("c_1").show()
        ---------
        |"C_1"  |
        ---------
        |100    |
        |400    |
        ---------
        <BLANKLINE>

    Example 2::

        >>> new_df = map(df, lambda row: (row[1], row[0] * 3), output_types=[StringType(), IntegerType()])
        >>> new_df.order_by("c_1").show()
        -----------------
        |"C_1"  |"C_2"  |
        -----------------
        |a      |30     |
        |b      |60     |
        -----------------
        <BLANKLINE>

    Example 3::

        >>> new_df = map(
        ...     df,
        ...     lambda row: (row[1], row[0] * 3),
        ...     output_types=[StringType(), IntegerType()],
        ...     output_column_names=['col1', 'col2']
        ... )
        >>> new_df.order_by("col1").show()
        -------------------
        |"COL1"  |"COL2"  |
        -------------------
        |a       |30      |
        |b       |60      |
        -------------------
        <BLANKLINE>

    Example 4::

        >>> new_df = map(df, lambda pdf: pdf['COL1']*3, output_types=[IntegerType()], vectorized=True, packages=["pandas"])
        >>> new_df.order_by("c_1").show()
        ---------
        |"C_1"  |
        ---------
        |30     |
        |60     |
        ---------
        <BLANKLINE>

    Example 5::

        >>> new_df = map(
        ...     df,
        ...     lambda pdf: (pdf['COL1']*3, pdf['COL2']+"b"),
        ...     output_types=[IntegerType(), StringType()],
        ...     output_column_names=['A', 'B'],
        ...     vectorized=True,
        ...     packages=["pandas"],
        ... )
        >>> new_df.order_by("A").show()
        -------------
        |"A"  |"B"  |
        -------------
        |30   |ab   |
        |60   |bb   |
        -------------
        <BLANKLINE>

    Example 6::

        >>> new_df = map(
        ...     df,
        ...     lambda pdf: ((pdf.shape[0],) * len(pdf), (pdf.shape[1],) * len(pdf)),
        ...     output_types=[IntegerType(), IntegerType()],
        ...     output_column_names=['rows', 'cols'],
        ...     partition_by="col3",
        ...     vectorized=True,
        ...     packages=["pandas"],
        ... )
        >>> new_df.show()
        -------------------
        |"ROWS"  |"COLS"  |
        -------------------
        |2       |3       |
        |2       |3       |
        -------------------
        <BLANKLINE>

    Note:
        1. The result of the `func` function must be either a scalar value or
        a tuple containing the same number of elements as specified in the
        `output_types` argument.

        2. When using the `vectorized` option, the `func` function must accept
        a pandas DataFrame as input and return either a pandas DataFrame, or a
        tuple of pandas Series/arrays.
    r   zoutput_types cannot be empty.c_r   zB'output_column_names' and 'output_types' must be of the same size.zsnowflake-snowpark-pythonr   $c                 `    t        | t        j                        st        | t              r| S | fS rC  )r   r   r   r  r  s    r   wrap_resultzmap.<locals>.wrap_result  s(    &&"2"23z&%7P9r   c                       e Zd Z fdZy)map.<locals>._MapFuncc                        |            S rC  rQ  )r;  pdfr  r  s     r   processzmap.<locals>._MapFunc.process  s    "49--r   Nr  r  r  r  )r  r  s   r   _MapFuncr    s    .r   r  c                 b    t        | t              rt        |       S t        | t              r| S | fS rC  )r   r   r  r
  s    r   r  zmap.<locals>.wrap_result  s-    &#&V}$FE*y r   c                       e Zd Z fdZy)r  c              7   D   K   t         }   ||              y wrC  r   )r;  argvinput_args_to_row
df_columnsr  r  s      r   r  zmap.<locals>._MapFunc.process  s(     $'$4!!$'8$'?"@AAs    Nr  )r  r  r  s   r   r  zmap.<locals>._MapFunc  s	    Br   )output_schemainput_typesinput_namesr  r   r  r  )r  )r  r   rF  r  r  r   get_packagesr  rp   r  rx  r  r   r  r   r   r   r  r   udtfregisterr  r~  r   )r  r  r  r  r  r   r  r  r  r  r%  r  r  udtf_output_colsr   type_r  r   output_columnsr  map_udtfr  r  s    `                   @@r   mapr#    sU   Z <A899"38\9J3KLaAaC5zLL	 	!S%6	6P
 	
 ""JK4	 2 2 ? ? A H H JKH/:UVH/8/?/?/F/FGe5>>GKG).s</@)ABA!A3BB367G3VWZS%S%	 WM  4HfE*;78+L:JK %%89	 Ax 	aC
O#a'()*00	
N  	
	. 	.	!	B 	B
 !!&&//#% 0 	H9((*"""=fn I M HBWs   H)H.9H3H8
13H>)r  r  r   r  r  r  c                    t        |t              rt        |      }|xs | j                  }fd}| j	                  |      j                  ||||||      S )a  Returns a new DataFrame with the result of applying `func` to each batch of data in
    the dataframe. Func is expected to be a python function that takes an iterator of pandas
    DataFrames as both input and provides them as output. Number of input and output DataFrame
    batches can be different.

    This function registers a temporary `UDTF
    <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-tabular-functions>`_

    Args:
        dataframe: The DataFrame instance.
        func: A function to be applied to the batches of rows.
        schema: A StructType or type string that represents the expected output schema
            of the `func` parameter.
        partition_by: A column or list of columns that will be used to partition the data
            before passing it to the func.
        imports: A list of imports that are required to run the function. This argument is passed
            on when registering the UDTF.
        packages: A list of packages that are required to run the function. This argument is passed
            on when registering the UDTF.
        immutable: A flag to specify if the result of the func is deterministic for the same input.
        max_batch_size: The maximum number of rows per input pandas DataFrame when using vectorized option.

    Example 1::

        >>> from snowflake.snowpark.dataframe import map_in_pandas
        >>> df = session.create_dataframe([(1, 21), (2, 30), (3, 30)], schema=["ID", "AGE"])
        >>> def filter_func(iterator):
        ...     for pdf in iterator:
        ...         yield pdf[pdf.ID == 1]
        ...
        >>> map_in_pandas(df, filter_func, df.schema).show()
        ----------------
        |"ID"  |"AGE"  |
        ----------------
        |1     |21     |
        ----------------
        <BLANKLINE>

    Example 2::

        >>> def mean_age(iterator):
        ...     for pdf in iterator:
        ...         yield pdf.groupby("ID").mean().reset_index()
        ...
        >>> map_in_pandas(df, mean_age, "ID: bigint, AGE: double").order_by("ID").show()
        ----------------
        |"ID"  |"AGE"  |
        ----------------
        |1     |21.0   |
        |2     |30.0   |
        |3     |30.0   |
        ----------------
        <BLANKLINE>

    Example 3::

        >>> def double_age(iterator):
        ...     for pdf in iterator:
        ...         pdf["DOUBLE_AGE"] = pdf["AGE"] * 2
        ...         yield pdf
        ...
        >>> map_in_pandas(df, double_age, "ID: bigint, AGE: bigint, DOUBLE_AGE: bigint").order_by("ID").show()
        -------------------------------
        |"ID"  |"AGE"  |"DOUBLE_AGE"  |
        -------------------------------
        |1     |21     |42            |
        |2     |30     |60            |
        |3     |30     |60            |
        -------------------------------
        <BLANKLINE>

    Example 4::

        >>> def count(iterator):
        ...     for pdf in iterator:
        ...         rows, _ = pdf.shape
        ...         pdf["COUNT"] = rows
        ...         yield pdf
        >>> map_in_pandas(df, count, "ID: bigint, AGE: bigint, COUNT: bigint", partition_by="AGE", max_batch_size=2).order_by("ID").show()
        --------------------------
        |"ID"  |"AGE"  |"COUNT"  |
        --------------------------
        |1     |21     |1        |
        |2     |30     |2        |
        |3     |30     |2        |
        --------------------------
        <BLANKLINE>
    c                 >    dd l } |j                   | g            S r  )r   concat)rf  r   r  s     r   wrapped_funcz#map_in_pandas.<locals>.wrapped_funcY  s    v}}T5']++r   )r  r   r  r  )r   r  ro   r  rY  applyInPandas)	r  r  r  r  r  r   r  r  r'  s	    `       r   map_in_pandasr)    sf    F &#+F349#4#4L,
 l+99% :  r   (  r|  r  r  r   resyscollectionsr   	functoolsr   loggingr   typesr   typingr   r   r	   r
   r   r   r   r   r   r   r   zoneinfor   r^  r  snowflake.snowpark.contextr]  r8  4snowflake.snowpark._internal.proto.generated.ast_pb2	_internalr  	generatedast_pb2snowflake.connector.optionsr   r   r   6snowflake.snowpark._internal.analyzer.binary_plan_noder   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r  r$   r%   4snowflake.snowpark._internal.analyzer.analyzer_utilsr&   0snowflake.snowpark._internal.analyzer.expressionr'   r(   r)   r*   r+   r,   6snowflake.snowpark._internal.analyzer.select_statementr-   r.   r/   r0   r1   r2   r3   4snowflake.snowpark._internal.analyzer.snowflake_planr4   9snowflake.snowpark._internal.analyzer.snowflake_plan_noder5   r6   r7   r8   r9   r:   r;   r<   5snowflake.snowpark._internal.analyzer.sort_expressionr=   r>   r?   r@   4snowflake.snowpark._internal.analyzer.table_functionrA   rB   rC   rD   5snowflake.snowpark._internal.analyzer.unary_plan_noderE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   &snowflake.snowpark._internal.ast.utilsrQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   *snowflake.snowpark._internal.error_messagera   +snowflake.snowpark._internal.open_telemetryrb   &snowflake.snowpark._internal.telemetryrc   rd   re   rf   rg   rh   'snowflake.snowpark._internal.type_utilsri   rj   rk   rl   rm   rn   ro   &snowflake.snowpark._internal.udf_utilsrp   "snowflake.snowpark._internal.utilsrq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   .snowflake.snowpark._internal.data_source.utilsr   snowflake.snowpark.async_jobr   r   snowflake.snowpark.columnr   r   r   )snowflake.snowpark.dataframe_ai_functionsr   0snowflake.snowpark.dataframe_analytics_functionsr   )snowflake.snowpark.dataframe_na_functionsr   +snowflake.snowpark.dataframe_stat_functionsr   #snowflake.snowpark.dataframe_writerr   snowflake.snowpark.exceptionsr   (snowflake.snowpark._internal.debug_utilsr   snowflake.snowpark.functionsr   r  r   r   r   r  r   r   rE  r   r   rD  r  r   r   r   r   )snowflake.snowpark.mock._select_statementr   snowflake.snowpark.rowr   !snowflake.snowpark.table_functionr   r   r   r   r   snowflake.snowpark.typesr   r   r   r   r   r   r   r   r   r   r   r   r   version_infor   collections.abcr  modinrs  r   r  r  r  r   compiler   r  r   r   r   r   r  r   r%  r  r#  r)  mapInPandasrQ  r   r   <module>r\     st       	 
  %        , , D D D I I    & S    O	 	 	         $ W V    T       : D Q Q J X J N ? D B    J &    & v(
H
 2::/2D1EWOP KS KS K	3 	$s) 	S T#Y 33
3 SM3 SM	3
 3i3> ?&	?&	?& ?& C=	?& ?& ?& #$?&Daf af^M 04;?7;FJ$(WW
W z"W
 "$s),W d5eCHo!5678W tE#z/234W W 5tL/A!ABCW W SMW~ GK;?7;$(tt
t *c/"t
 5tL/A!ABCt d5eCHo!5678t tE#z/234t t SMtn r   