
    ɯei                         d dl mZmZmZmZmZ d dlZd dlm	c m
c mc mc mZ d dlmZmZ d dlmZmZmZ d dlmZmZ d dlmZmZmZmZmZmZ d dlm Z m!Z!m"Z" d dl#m$Z$ g d	Z% G d
 d      Z&y)    )CallableDictListTupleUnionN)+build_expr_from_snowpark_column_or_col_namewith_src_position)experimental	publicapiwarning)Column_to_col_if_str)_call_functioncolfunctionlagleadmake_interval)IntegerTypeStructField
StructType)Window)smhdwmmyc                   P   e Zd ZdZd2dZdededefdZd Zd	 Zd
 Z	d Z
deeeef      dee   dee   dee   deeeegef   deeegef   deddfdZdedeeef   fdZ	 d3dedededeeef   fdZ	 	 	 d4dddddeeee   f   dee   deeeegef   d ed!eddfd"Zeedfdeeee   f   d#ee   dee   dee   deeeegef   d$eddfd%       Zeedfdeeee   f   dee   dee   d&edeeegef   d$eddfd'       Zeedfdeeeef      d(ee   dee   dee   deeeegef   d$eddfd)       Zeedfdeeeef      d*ee   dee   dee   deeeegef   d$eddfd+       Z ed,-      ededfd.edeeee   f   d/ee   dee   d0edeeeegef   d$eddfd1              Zy)5DataFrameAnalyticsFunctionsz~Provides data analytics functions for DataFrames.
    To access an object of this class, use :attr:`DataFrame.analytics`.
    returnNc                     || _         y N)
_dataframe)self	dataframes     r/var/www/html/glpi_dashboard/venv/lib/python3.12/site-packages/snowflake/snowpark/dataframe_analytics_functions.py__init__z$DataFrameAnalyticsFunctions.__init__#   s	    #    	input_col	operationc                 f    dj                  t        t        |            }|  d| }|r|d| z  }|S )N_)joinmapstr)r+   r,   argsargs_strformatted_names        r(   _default_col_formatterz2DataFrameAnalyticsFunctions._default_col_formatter&   s?    88CTN+%;a	{3(n,Nr*   c                     d}t        |t              st        d|       |st        d|       t	        d |j                         D              st        d|       y )NzThe 'aggs' argument must adhere to the following rules: 1) It must be a dictionary. 2) It must not be empty. 3) All keys must be strings. 4) All values must be non-empty lists of strings.zaggs must be a dictionary. zaggs must not be empty. c              3   n   K   | ]-  \  }}t        |t              xr t        |t              xr | / y wr$   )
isinstancer1   list).0keyvals      r(   	<genexpr>zFDataFrameAnalyticsFunctions._validate_aggs_argument.<locals>.<genexpr>:   s7      
S sC BZT%:BsB
s   35zIaggs must have strings as keys and non-empty lists of strings as values. )r8   dict	TypeError
ValueErrorallitems)r&   aggsargument_requirementss      r(   _validate_aggs_argumentz3DataFrameAnalyticsFunctions._validate_aggs_argument-   s    @ 	 $%9:O9PQRR78M7NOPP 
 JJL
 
 [\q[rs 	
r*   c                     d| d}t        |t              st        | d|       |st        | d|       t	        d |D              st        | d|       y )NThe 'z' argument must adhere to the following rules: 1) It must be a list. 2) It must not be empty. 3) All items in the list must be strings. must be a list.  must not be empty. c              3   <   K   | ]  }t        |t                y wr$   )r8   r1   r:   items     r(   r=   zMDataFrameAnalyticsFunctions._validate_string_list_argument.<locals>.<genexpr>O   s     :T:dC(:s   z must be a list of strings. r8   r9   r?   r@   rA   r&   dataargument_namerD   s       r(   _validate_string_list_argumentz:DataFrameAnalyticsFunctions._validate_string_list_argumentB   s    M? #8 8 	 $%}o->?T>UVWW /!56K5LM  :T:: /!=>S=TU  ;r*   c                     d| d}t        |t              st        | d|       |st        | d|       t	        d |D              st        | d|       y )NrG   z' argument must adhere to the following criteria: 1) It must be a list. 2) It must not be empty. 3) All items in the list must be positive integers.rH   rI   c              3   J   K   | ]  }t        |t              xr |d kD    yw)r   N)r8   intrK   s     r(   r=   zWDataFrameAnalyticsFunctions._validate_positive_integer_list_argument.<locals>.<genexpr>a   s#     G$:dC(5TAX5Gs   !#z! must be a list of integers > 0. rM   rN   s       r(   (_validate_positive_integer_list_argumentzDDataFrameAnalyticsFunctions._validate_positive_integer_list_argumentT   s    M? #B B 	 $%}o->?T>UVWW /!56K5LM  G$GG /!BCXBYZ  Hr*   c                 0    t        |      st        d      y )Nz%formatter must be a callable function)callabler?   )r&   	fromatters     r(   _validate_formatter_argumentz8DataFrameAnalyticsFunctions._validate_formatter_argumentf   s    	"CDD #r*   colsperiodsorder_bygroup_bycol_formatterwindow_func	func_namez&snowflake.snowpark.dataframe.DataFramec                 \   | j                  |d       | j                  |d       | j                  ||j                         dz          | j                  |       t	        j
                  |      j                  |      }| j                  }	g }
g }|D ]  }|D ]  }t        |d|j                                } |||      j                  |      } ||j                         j                  dd      ||      }|
j                  |       |j                  |         |	j                  |
|d      S )	z
        Generic function to create window function columns (lag or lead) for the DataFrame.
        Args:
            func_name: Should be either "LEAD" or "LAG".
        r\   r]   r   ztransform.compute_" F	_emit_ast)rQ   rU   lowerrY   r   partition_byr\   r%   r   overget_namereplaceappendwith_columns)r&   rZ   r[   r\   r]   r^   r_   r`   window_specdf	col_namesvaluescperiodcolumn
window_colformatted_col_names                    r(   _compute_window_functionz4DataFrameAnalyticsFunctions._compute_window_functionj   s(    	++HjA++HjA55gy?PSV?VW))-8))(3<<XF__	 	*A! *'-?	@Q?R+ST(8==kJ
%2OO%--c26	6&"   !34j)*	* y&EBBr*   time_strc                     t        |      }t        |      D ]  \  }}|j                         r|dvs|} n t        |d |       }||d  j	                         }||fS )N)+-)len	enumerateisdigitrT   rf   )r&   rw   indexichardurationunits          r(   _parse_time_stringz.DataFrameAnalyticsFunctions._parse_time_string   sm    H * 	GAt<<>d*&<	
 x'(%%'~r*   TrP   allow_negativec                     d| ddj                  t               d}|st        | d|       | j                  |      \  }}|s|dk  rt        | d|       |t        vrt        d| d	t         d
|       ||fS )NrG   z' argument must adhere to the following criteria: 1) It must not be an empty string. 2) The last character must be a supported time unit. Supported units are 'z, zy'. 3) The preceding characters must represent an integer. 4) The integer must not be negative if allow_negative is False.rI   r   z must not be negative. zUnsupported unit 'z'. Supported units are 'z. )r/   SUPPORTED_TIME_UNITSr@   r   )r&   rw   rP   r   rD   r   r   s          r(   _validate_and_extract_time_unitz;DataFrameAnalyticsFunctions._validate_and_extract_time_unit   s     M? #$ %)II.B$C#D ENN 	  /!56K5LM  00:$(Q, /!89N8OP  ++$TF*BCWBXXZ[pZqr  ~r*   rc   base_dfinput_dfrC   group_by_colswindowrename_suffixc           	      :   |j                         D ]  \  }}	|	D ]}  }
|r
 |||
|      n| d|
 | }t        |
t        ||z   d      d      j                  |d      }|j	                  |d      j                  |d      }|j                  ||dd      }  |S )a  
        Perform window-based aggregations on the given DataFrame.

        This function applies specified aggregation functions to columns of an input DataFrame,
        grouped by specified columns, and joins the results back to a base DataFrame.

        Parameters:
        - base_df: DataFrame to which the aggregated results will be joined.
        - input_df: DataFrame on which aggregations are to be performed.
        - aggs: A dictionary where keys are column names and values are lists of aggregation functions.
        - group_by_cols: List of column names to group by.
        - col_formatter: Optional callable to format column names of aggregated results.
        - window: Optional window specification for aggregations.
        - rename_suffix: Optional suffix to append to column names.

        Returns:
        - DataFrame with the aggregated data joined to the base DataFrame.
        r.   Frd   left)onhowre   )rB   r   r   aliasr]   aggr/   )r&   r   r   rC   r   r^   r   r   rs   funcsfuncagg_column_nameagg_expressionagg_dfs                 r(   _perform_window_aggregationsz8DataFrameAnalyticsFunctions._perform_window_aggregations   s    8 "ZZ\ 	MFE  % "&$7"81TF=/:  
 "0#f}4FRW"%5%9  "**=E*JNN"e O  ",,}&E ' 	" r*   window_sizesre   c           
         | j                  |       | j                  |d       | j                  |d       | j                  |d       | j                  |       d}d}|r-| j                  j
                  j                  j                         }t        |j                  j                  |      }| j                  j                  |j                         |j                         D ]V  \  }	}
t        j                         }|	|_        |j"                  j%                  |
       |j&                  j)                  |       X |j*                  j%                  |       |j,                  j%                  |       |j.                  j%                  |       | j                  }|j                         D ]  \  }}
|D ]  }|
D ]  }t1        j2                  |      j/                  |      j5                  | dz   d      }t7        |t9        |d      d      j;                  |d      } ||||      }|t<        j>                  k7  r|r|j@                  j)                  |       |jC                  ||d      }   |r|jD                  |_#        |S )	a  
        Applies moving aggregations to the specified columns of the DataFrame using defined window sizes,
        and grouping and ordering criteria.

        Args:
            aggs: A dictionary where keys are column names and values are lists of the desired aggregation functions.
                Supported aggregation are listed here https://docs.snowflake.com/en/sql-reference/functions-analytic#list-of-functions-that-support-windows.
            window_sizes: A list of positive integers, each representing the size of the window for which to
                        calculate the moving aggregate.
            order_by: A list of column names that specify the order in which rows are processed.
            group_by: A list of column names on which the DataFrame is partitioned for separate window calculations.
            col_formatter: An optional function for formatting output column names, defaulting to the format '<input_col>_<agg>_<window>'.
                        This function takes three arguments: 'input_col' (str) for the column name, 'operation' (str) for the applied operation,
                        and 'value' (int) for the window size, and returns a formatted string for the column name.

        Returns:
            A Snowpark DataFrame with additional columns corresponding to each specified moving aggregation.

        Raises:
            ValueError: If an unsupported value is specified in arguments.
            TypeError: If an unsupported type is specified in arguments.
            SnowparkSQLException: If an unsupported aggregration is specified.

        Example:
            >>> data = [
            ...     ["2023-01-01", 101, 200],
            ...     ["2023-01-02", 101, 100],
            ...     ["2023-01-03", 101, 300],
            ...     ["2023-01-04", 102, 250],
            ... ]
            >>> df = session.create_dataframe(data).to_df(
            ...     "ORDERDATE", "PRODUCTKEY", "SALESAMOUNT"
            ... )
            >>> result = df.analytics.moving_agg(
            ...     aggs={"SALESAMOUNT": ["SUM", "AVG"]},
            ...     window_sizes=[2, 3],
            ...     order_by=["ORDERDATE"],
            ...     group_by=["PRODUCTKEY"],
            ... ).sort("ORDERDATE")
            >>> result.show()
            --------------------------------------------------------------------------------------------------------------------------------------
            |"ORDERDATE"  |"PRODUCTKEY"  |"SALESAMOUNT"  |"SALESAMOUNT_SUM_2"  |"SALESAMOUNT_AVG_2"  |"SALESAMOUNT_SUM_3"  |"SALESAMOUNT_AVG_3"  |
            --------------------------------------------------------------------------------------------------------------------------------------
            |2023-01-01   |101           |200            |200                  |200.000              |200                  |200.000              |
            |2023-01-02   |101           |100            |300                  |150.000              |300                  |150.000              |
            |2023-01-03   |101           |300            |400                  |200.000              |600                  |200.000              |
            |2023-01-04   |102           |250            |250                  |250.000              |250                  |250.000              |
            --------------------------------------------------------------------------------------------------------------------------------------
            <BLANKLINE>
        r\   r]   r   N   r   Frd   )$rE   rQ   rU   rY   r%   _session
_ast_batchbindr	   exprdataframe_analytics_moving_agg_set_ast_refrn   rB   protoTuple_String_List_String_1_2extendrC   rk   r   r]   r\   r   rg   rows_betweenr   r   rh   r!   r5   formatted_col_nameswith_columnuid_ast_id)r&   rC   r   r\   r]   r^   re   stmtastcol_name	agg_funcsagg_func_tuple_astr   rs   window_sizeagg_funcrm   agg_colru   s                      r(   
moving_aggz&DataFrameAnalyticsFunctions.moving_agg   sE   z 	$$T*++HjA++HjA55lNS))-8 ??++66;;=D#DII$L$LdSCOO((0'+zz| 4#)%*%C%C%E"(0"%"%%,,Y7 23	4
 ##L1LL)LL) !% 	FI+  ) H++H5!(+%{lQ&6:   - #f">%d;%d8  *7vx)U&%6MMN%//667IJ#//*Gu 0 F)	4 !XXFNr*   
is_forwardc           	      
   | j                  |       | j                  |d       | j                  |d       | j                  |       d}d}|r| j                  j                  j
                  j                         }t        |j                  j                  |      }| j                  j                  |j                         |j                         D ]V  \  }	}
t        j                         }|	|_        |j                   j#                  |
       |j$                  j'                  |       X |j(                  j#                  |       |j*                  j#                  |       ||_        t/        j0                  |      j+                  |      }|r!|j3                  dt.        j4                        }n |j3                  t.        j6                  d      }| j                  }|j                         D ]  \  }}
|
D ]y  }t9        |t;        |d      d      j=                  |d      } |||      }|t>        j@                  k7  r|r|jB                  j'                  |       |jE                  ||d      }{  |r|jF                  |_$        |S )a  
        Applies cummulative aggregations to the specified columns of the DataFrame using defined window direction,
        and grouping and ordering criteria.

        Args:
            aggs: A dictionary where keys are column names and values are lists of the desired aggregation functions.
            order_by: A list of column names that specify the order in which rows are processed.
            group_by: A list of column names on which the DataFrame is partitioned for separate window calculations.
            is_forward: A boolean indicating the direction of accumulation. True for 'forward' and False for 'backward'.
            col_formatter: An optional function for formatting output column names, defaulting to the format '<input_col>_<agg>'.
                        This function takes two arguments: 'input_col' (str) for the column name, 'operation' (str) for the applied operation,
                        and returns a formatted string for the column name.

        Returns:
            A Snowflake DataFrame with additional columns corresponding to each specified cumulative aggregation.

        Raises:
            ValueError: If an unsupported value is specified in arguments.
            TypeError: If an unsupported type is specified in arguments.
            SnowparkSQLException: If an unsupported aggregration is specified.

        Example:
            >>> sample_data = [
            ...     ["2023-01-01", 101, 200],
            ...     ["2023-01-02", 101, 100],
            ...     ["2023-01-03", 101, 300],
            ...     ["2023-01-04", 102, 250],
            ... ]
            >>> df = session.create_dataframe(sample_data).to_df(
            ...     "ORDERDATE", "PRODUCTKEY", "SALESAMOUNT"
            ... )
            >>> res = df.analytics.cumulative_agg(
            ...     aggs={"SALESAMOUNT": ["SUM", "MIN", "MAX"]},
            ...     group_by=["PRODUCTKEY"],
            ...     order_by=["ORDERDATE"],
            ...     is_forward=True
            ... ).sort("ORDERDATE")
            >>> res.show()
            ----------------------------------------------------------------------------------------------------------
            |"ORDERDATE"  |"PRODUCTKEY"  |"SALESAMOUNT"  |"SALESAMOUNT_SUM"  |"SALESAMOUNT_MIN"  |"SALESAMOUNT_MAX"  |
            ----------------------------------------------------------------------------------------------------------
            |2023-01-01   |101           |200            |600                |100                |300                |
            |2023-01-02   |101           |100            |400                |100                |300                |
            |2023-01-03   |101           |300            |300                |300                |300                |
            |2023-01-04   |102           |250            |250                |250                |250                |
            ----------------------------------------------------------------------------------------------------------
            <BLANKLINE>
        r\   r]   Nr   Frd   )%rE   rQ   rY   r%   r   r   r   r	   r   "dataframe_analytics_cumulative_aggr   rn   rB   r   r   r   r   r   rC   rk   r]   r\   r   r   rg   r   UNBOUNDED_FOLLOWINGUNBOUNDED_PRECEDINGr   r   rh   r!   r5   r   r   r   r   )r&   rC   r]   r\   r   r^   re   r   r   r   r   r   rm   r   rs   r   r   ru   s                     r(   cumulative_aggz*DataFrameAnalyticsFunctions.cumulative_agg[  s3   v 	$$T*++HjA++HjA))-8 ??++66;;=D#DII$P$PRVWCOO((0'+zz| 4#)%*%C%C%E"(0"%"%%,,Y7 23	4
 LL)LL)'CN))(3<<XF%221f6P6PQK%2263M3MqQK !% 	FI% (c&E:e${e$4  &368%D" "%@%W%WW!++223EF++&5 , 	& !XXFNr*   lagsc           	      0   d}d}|r| j                   j                  j                  j                         }t	        |j
                  j                  |      }|D ]&  }	t        |j                  j                         |	       ( |j                  j                  |       |j                  j                  |       |j                  j                  |       | j                   j                  |j                         |t         j"                  k7  r_|r]|D ]X  }	|D ]Q  }
t%        |	d      } ||j'                         j)                  dd      d|
      }|j*                  j-                  |       S Z | j/                  |||||t0        d      }|r|j2                  |_        |S )a	  
        Creates lag columns to the specified columns of the DataFrame by grouping and ordering criteria.

        Args:
            cols: List of column names or Column objects to calculate lag features.
            lags: List of positive integers specifying periods to lag by.
            order_by: A list of column names that specify the order in which rows are processed.
            group_by: A list of column names on which the DataFrame is partitioned for separate window calculations.
            col_formatter: An optional function for formatting output column names, defaulting to the format '<input_col>LAG<lag>'.
                        This function takes three arguments: 'input_col' (str) for the column name, 'operation' (str) for the applied operation,
                        and 'value' (int) for lag value, and returns a formatted string for the column name.

        Returns:
            A Snowflake DataFrame with additional columns corresponding to each specified lag period.

        Example:
            >>> sample_data = [
            ...     ["2023-01-01", 101, 200],
            ...     ["2023-01-02", 101, 100],
            ...     ["2023-01-03", 101, 300],
            ...     ["2023-01-04", 102, 250],
            ... ]
            >>> df = session.create_dataframe(sample_data).to_df(
            ...     "ORDERDATE", "PRODUCTKEY", "SALESAMOUNT"
            ... )
            >>> res = df.analytics.compute_lag(
            ...     cols=["SALESAMOUNT"],
            ...     lags=[1, 2],
            ...     order_by=["ORDERDATE"],
            ...     group_by=["PRODUCTKEY"],
            ... ).sort("ORDERDATE")
            >>> res.show()
            ------------------------------------------------------------------------------------------
            |"ORDERDATE"  |"PRODUCTKEY"  |"SALESAMOUNT"  |"SALESAMOUNT_LAG_1"  |"SALESAMOUNT_LAG_2"  |
            ------------------------------------------------------------------------------------------
            |2023-01-01   |101           |200            |NULL                 |NULL                 |
            |2023-01-02   |101           |100            |200                  |NULL                 |
            |2023-01-03   |101           |300            |100                  |200                  |
            |2023-01-04   |102           |250            |NULL                 |NULL                 |
            ------------------------------------------------------------------------------------------
            <BLANKLINE>
        Nztransform.compute_lagrb   rc   LAG)r%   r   r   r   r	   r   dataframe_analytics_compute_lagr   rZ   addr   r   r]   r\   r   rn   r!   r5   r   ri   rj   r   rk   rv   r   r   r   )r&   rZ   r   r\   r]   r^   re   r   r   rq   _lagrs   ru   rn   s                 r(   compute_lagz'DataFrameAnalyticsFunctions.compute_lag  sk   j ??++66;;=D#DII$M$MtTC O;CHHLLNANOHHOOD!LL)LL)OO((0 8OOO G  GD+A/FGF)6)11#r:E4*& ++223EFGG **$(M3
 BJ	r*   leadsc           	      0   d}d}|r| j                   j                  j                  j                         }t	        |j
                  j                  |      }| j                   j                  |j                         |D ]&  }	t        |j                  j                         |	       ( |j                  j                  |       |j                  j                  |       |j                  j                  |       |t         j"                  k7  r_|r]|D ]X  }	|D ]Q  }
t%        |	d      } ||j'                         j)                  dd      d|
      }|j*                  j-                  |       S Z | j/                  |||||t0        d      }|r|j2                  |_        |S )a	  
        Creates lead columns to the specified columns of the DataFrame by grouping and ordering criteria.

        Args:
            cols: List of column names or Column objects to calculate lead features.
            leads: List of positive integers specifying periods to lead by.
            order_by: A list of column names that specify the order in which rows are processed.
            group_by: A list of column names on which the DataFrame is partitioned for separate window calculations.
            col_formatter: An optional function for formatting output column names, defaulting to the format '<input_col>LEAD<lead>'.
                        This function takes three arguments: 'input_col' (str) for the column name, 'operation' (str) for the applied operation,
                        and 'value' (int) for the lead value, and returns a formatted string for the column name.

        Returns:
            A Snowflake DataFrame with additional columns corresponding to each specified lead period.

        Example:
            >>> sample_data = [
            ...     ["2023-01-01", 101, 200],
            ...     ["2023-01-02", 101, 100],
            ...     ["2023-01-03", 101, 300],
            ...     ["2023-01-04", 102, 250],
            ... ]
            >>> df = session.create_dataframe(sample_data).to_df(
            ...     "ORDERDATE", "PRODUCTKEY", "SALESAMOUNT"
            ... )
            >>> res = df.analytics.compute_lead(
            ...     cols=["SALESAMOUNT"],
            ...     leads=[1, 2],
            ...     order_by=["ORDERDATE"],
            ...     group_by=["PRODUCTKEY"]
            ... ).sort("ORDERDATE")
            >>> res.show()
            --------------------------------------------------------------------------------------------
            |"ORDERDATE"  |"PRODUCTKEY"  |"SALESAMOUNT"  |"SALESAMOUNT_LEAD_1"  |"SALESAMOUNT_LEAD_2"  |
            --------------------------------------------------------------------------------------------
            |2023-01-01   |101           |200            |100                   |300                   |
            |2023-01-02   |101           |100            |300                   |NULL                  |
            |2023-01-03   |101           |300            |NULL                  |NULL                  |
            |2023-01-04   |102           |250            |NULL                  |NULL                  |
            --------------------------------------------------------------------------------------------
            <BLANKLINE>
        Nztransform.compute_leadrb   rc   LEAD)r%   r   r   r   r	   r    dataframe_analytics_compute_leadr   rn   r   rZ   r   r   r   r]   r\   r!   r5   r   ri   rj   r   rk   rv   r   r   r   )r&   rZ   r   r\   r]   r^   re   r   r   rq   _leadrs   ru   rn   s                 r(   compute_leadz(DataFrameAnalyticsFunctions.compute_lead!  sn   j ??++66;;=D#DII$N$NPTUCOO((0 O;CHHLLNANOIIU#LL)LL) 8OOO G" GE+A/GHF)6)11#r:FE*& ++223EFGG **%8]D&
 BJ	r*   z1.12.0)versiontime_colwindowssliding_intervalc                    | j                  |       | j                  |d       | j                  |       |st        d      |rt	        |t
              st        d      |rt        dd       d}|r| j                  j                  j                  j                         }t        |j                  j                  |      }	||	_        |j                         D ]V  \  }
}t!        j"                         }|
|_        |j&                  j)                  |       |	j*                  j-                  |       X |	j.                  j)                  |       |	j0                  j)                  |       ||	_        | j                  j5                  |	j6                         |t8        j:                  k7  rU|j                         D ]B  \  }}|D ]8  }|D ]1  }|r
 ||||      n| d| d}|	j<                  j-                  |       3 : D d	d
lm } t	        | j                  j                  jB                  |      r| j                  j                  jB                  jD                  rY| j                  j                  jG                  g tI        tK        dtM                     g      d      }|r|jN                  |_(        |S g }g }|D ]{  }| jS                  |d      \  }}|d	kD  rdnd}tU        |      }||i}tW        |jY                  d      |jY                  d      |jY                  d      |jY                  d      |jY                  d      |jY                  d      |jY                  d            }|d	kD  rtZ        j\                  |}}n| tZ        j\                  }}t[        j^                  |      ja                  |      jc                  ||      }|j                         D ]]  \  }}|D ]S  } |j-                   te        |       tg        |            ji                  |             |j-                   ||| |             U _ ~ | j                  jk                  ||      }!|r|jN                  |!_(        |!S )aQ  
        Applies aggregations to the specified columns of the DataFrame over specified time windows,
        and grouping criteria.

        Args:
            aggs: A dictionary where keys are column names and values are lists of the desired aggregation functions.
            windows: Time windows for aggregations using strings such as '7D' for 7 days, where the units are
                S: Seconds, M: Minutes, H: Hours, D: Days, W: Weeks, MM: Months, Y: Years. For future-oriented analysis, use positive numbers,
                and for past-oriented analysis, use negative numbers.
            sliding_interval: (Deprecated) Interval at which the window slides, specified in the same format as the windows.
            group_by: A list of column names on which the DataFrame is partitioned for separate window calculations.
            col_formatter: An optional function for formatting output column names, defaulting to the format '<input_col>_<agg>_<window>'.
                        This function takes three arguments: 'input_col' (str) for the column name, 'operation' (str) for the applied operation,
                        and 'value' (int) for the window size, and returns a formatted string for the column name.

        Returns:
            A Snowflake DataFrame with additional columns corresponding to each specified time window aggregation.

        Raises:
            ValueError: If an unsupported value is specified in arguments.
            TypeError: If an unsupported type is specified in arguments.
            SnowparkSQLException: If an unsupported aggregration is specified.

        Example:
            >>> sample_data = [
            ...     ["2023-01-01", 101, 200],
            ...     ["2023-01-02", 101, 100],
            ...     ["2023-01-03", 101, 300],
            ...     ["2023-01-04", 102, 250],
            ... ]
            >>> df = session.create_dataframe(sample_data).to_df(
            ...     "ORDERDATE", "PRODUCTKEY", "SALESAMOUNT"
            ... )
            >>> df = df.with_column("ORDERDATE", to_timestamp(df["ORDERDATE"]))
            >>> def custom_formatter(input_col, agg, window):
            ...     return f"{agg}_{input_col}_{window}"
            >>> res = df.analytics.time_series_agg(
            ...     time_col="ORDERDATE",
            ...     group_by=["PRODUCTKEY"],
            ...     aggs={"SALESAMOUNT": ["SUM", "MAX"]},
            ...     windows=["1D", "-1D"],
            ...     sliding_interval="12H",
            ...     col_formatter=custom_formatter,
            ... ).sort("ORDERDATE")
            >>> res.show()
            ----------------------------------------------------------------------------------------------------------------------------------------------------
            |"PRODUCTKEY"  |"SALESAMOUNT"  |"ORDERDATE"          |"SUM_SALESAMOUNT_1D"  |"MAX_SALESAMOUNT_1D"  |"SUM_SALESAMOUNT_-1D"  |"MAX_SALESAMOUNT_-1D"  |
            ----------------------------------------------------------------------------------------------------------------------------------------------------
            |101           |200            |2023-01-01 00:00:00  |300                   |200                   |200                    |200                    |
            |101           |100            |2023-01-02 00:00:00  |400                   |300                   |300                    |200                    |
            |101           |300            |2023-01-03 00:00:00  |300                   |300                   |400                    |300                    |
            |102           |250            |2023-01-04 00:00:00  |250                   |250                   |250                    |250                    |
            ----------------------------------------------------------------------------------------------------------------------------------------------------
            <BLANKLINE>
        r]   zwindows must not be emptyztime_col must be a stringz time_series_agg.sliding_intervalzFsliding_interval is deprecated since 1.31.0. Do not use in production.Nr.   Br   )MockServerConnectionrowF)schemare   r   r   r   r   r   r   r   r   r   )secondsminuteshoursdaysweeksmonthsyears)startend)6rE   rQ   rY   r@   r8   r1   r   r%   r   r   r   r	   r   #dataframe_analytics_time_series_aggr   rB   r   r   r   r   r   rC   rk   r   r]   r   r   rn   r!   r5   r   #snowflake.snowpark.mock._connectionr   _conn_suppress_not_implemented_errorcreateDataFramer   r   r   r   r   r   absr   getr   CURRENT_ROWrg   r\   range_betweenr   r   rh   rl   )"r&   r   rC   r   r]   r   r^   re   r   r   r   r   r   rs   r   r   r   r   r   ansagg_columnsagg_column_nameswindow_durationwindow_unitwindow_signwindow_duration_absinterval_argsintervalrange_start	range_endrm   aggregationsr   	result_dfs"                                     r(   time_series_aggz+DataFrameAnalyticsFunctions.time_series_aggx  s!   F 	$$T*++HjA))-8899z(C88992X ??++66;;=D#DII$Q$QSWXC#CL'+zz| 4#)%*%C%C%E"(0"%"%%,,Y7 23	4
 KKw'LL)#3C OO((0 ; R RR%)ZZ\ LMFE") L$) LD $1 !.fdF C(.xqa%8 ,
  33::?KLLL 	M t//557KL((..NN //**::!;ukm#D"EF ; C
 "hhJ "	UF+/+O+O,(O[  /2!K"%o"6(*=>M$%))#.%))#.#'',"&&s+#'',$((.#'',H Q)/););XY*2F4F4FY ##H-(#[i@  )-

 U$ , UH&&**3v;7<<[I %++M&(F,ST	UU;"	UH OO001A;O	 $Ir*   )r'   zsnowflake.snowpark.DataFramer"   N)T)NNrc   )__name__
__module____qualname____doc__r)   r1   r5   rE   rQ   rU   rY   r   r   r   rT   r   rv   r   r   boolr   r   r   r   r   r   r   r   r
   r    r*   r(   r!   r!      s   $# #  *$$E"C5f%&"C c"C s)	"C
 s)"C  c3 45"C vsmV34"C "C 
2"CH
3 
5c? 
 IM,/AE	sCxH 9=-9- ;- 3S	>"	-
 Cy-  c3 45- - - 
2-^  9Oq3S	>"q 3iq s)	q
 s)q  c3 45q q 
2q qf  4Jm3S	>"m s)m s)	m
 m  c
C0m m 
2m m^  9OS5f%&S 3iS s)	S
 s)S  c3 45S S 
2S Sj  9OT5f%&T CyT s)	T
 s)T  c3 45T T 
2T Tl (# !#8Nhh 3S	>"h c	h
 s)h h  c3 45h h 
2h  $hr*   r!   )'typingr   r   r   r   r   snowflake.snowpark	snowflake4snowflake.snowpark._internal.proto.generated.ast_pb2snowpark	_internalr   	generatedast_pb2&snowflake.snowpark._internal.ast.utilsr   r	   "snowflake.snowpark._internal.utilsr
   r   r   snowflake.snowpark.columnr   r   snowflake.snowpark.functionsr   r   r   r   r   r   snowflake.snowpark.typesr   r   r   snowflake.snowpark.windowr   r   r!   r   r*   r(   <module>r     sT   
 6 5  D D D P O <  J I , < D Dr*   