
    ɯeiu              	       z   d dl Z d dlZd dlmZ d dlmZmZmZ d dlZ	d dl
mZ d dlmZmZ d dlmZ d dlmZmZ d dlmZmZmZ d d	lmZmZ d d
lmZ d dlmZmZm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ejP                  dk  rd dlm)Z) nd dl*m)Z)  ee+      Z,	 ddede"de-de-fdZ.dZ/deee0e)e0   f      ddfdZ1 G d d      Z2y)    N)	getLogger)DictOptionalUnion)	ColumnSum)build_expr_from_python_valwith_src_position)SnowparkClientExceptionMessages)add_api_calladjust_api_subcalls)$VALID_PYTHON_TYPES_FOR_LITERAL_VALUELiteralTypepython_type_to_snow_type)	publicapi
quote_name)Column)ifflitwhen)DataTypeDecimalType
DoubleType	FloatTypeIntegerTypeLongType)   	   )Iterablevaluedatatypeinclude_decimalreturnc           
      p   t         t        t        t        f}t        t        f}|r|t        f}|t        f}| d u xs} t        | t              xr t        | t               xr t        ||      xsJ t        | t              xr t        ||      xs* t        |t        t        t        |             d               S )Nr   )r   r   r   r   r   
isinstanceintboolfloattyper   )r   r    r!   	int_typesfloat_typess        k/var/www/html/glpi_dashboard/venv/lib/python3.12/site-packages/snowflake/snowpark/dataframe_na_functions.py'_is_value_type_matching_for_na_functionr,   /   s     h	:>Ij)K ,	"K0 	Puc" 0ud++0 8Y/	P ue$JHk)J	P h%=d5k%J1%M NO
    zDsubset should be a single column name, list or tuple of column namessubsetc                 v    | 7t        | t              s&t        | t        t        f      st	        t
              yyy)z<Produces exception when invalid subset parameter was passed.N)r$   strlisttuple	TypeError_SUBSET_CHECK_ERROR_MESSAGE)r.   s    r+   _check_subset_parameterr5   P   s=     	63'6D%=1344 2 ( 	r-   c                   @   e Zd ZdZddZe	 	 	 	 ddedee   dee	ee
e   f      d	eddf
d
       Ze	 	 dddde	eeeef   f   dee	ee
e   f      d	ededdf
d       Ze	 	 	 dddde	ee
e   eeef   f   dee	ee
e   f      dee	ee
e   f      d	ededdfd       Zy)DataFrameNaFunctionszGProvides functions for handling missing values in a :class:`DataFrame`.snowflake.snowpark.DataFramer"   Nc                     || _         y )N)
_dataframe)self	dataframes     r+   __init__zDataFrameNaFunctions.__init__]   s	    #r-   howthreshr.   	_emit_astc                    ||dvrt        d| d      t        |       d}|r3| j                  j                  j                  j                         }t        |j                  j                  |      }||_	        |||j                  _        t        |t              r@d|j                  _        t!        |j                  j"                  j%                         |       nVt        |t&              rFd|j                  _        |D ]0  }t!        |j                  j"                  j%                         |       2 | j                  j)                  |j*                         || j                  j,                  }nt        |t              r|g}||dk(  rt/        |      nd}|dk  st/        |      d	k(  rE| j                  j1                         }t3        |d
       |r|j4                  |_        | j                  S |t/        |      kD  rA| j                  j9                  d	|d      }t;        |d
d       |r|j4                  |_        |S | j                  j<                  j>                  D 	ci c]"  }	tA        |	jB                        |	jD                  $ }
}	|D ch c]  }tA        |       }}g }|D ]  }||
vr$tG        jH                  ||
jK                               | j                  jM                  |d      }t        |
|   tN        tP        f      r4tS        |tT        jV                  k(  |jY                  d      z  d	dd      }ntS        |jY                  d      d	dd      }|j[                  |        t]        t_        |D cg c]  }|j`                   c}      d      }| j                  jc                  ||k\  d      }t;        |d
d       |r|j4                  |_        |S c c}	w c c}w c c}w )a  
        Returns a new DataFrame that excludes all rows containing fewer than
        a specified number of non-null and non-NaN values in the specified
        columns.

        Args:
            how: An ``str`` with value either 'any' or 'all'. If 'any', drop a row if
                it contains any nulls. If 'all', drop a row only if all its values are null.
                The default value is 'any'. If ``thresh`` is provided, ``how`` will be ignored.
            thresh: The minimum number of non-null and non-NaN
                values that should be in the specified columns in order for the
                row to be included. It overwrites ``how``. In each case:

                    * If ``thresh`` is not provided or ``None``, the length of ``subset``
                      will be used when ``how`` is 'any' and 1 will be used when ``how``
                      is 'all'.

                    * If ``thresh`` is greater than the number of the specified columns,
                      the method returns an empty DataFrame.

                    * If ``thresh`` is less than 1, the method returns the original DataFrame.

            subset: A list of the names of columns to check for null and NaN values.
                In each case:

                    * If ``subset`` is not provided or ``None``, all columns will be included.

                    * If ``subset`` is empty, the method returns the original DataFrame.

        Examples::

            >>> df = session.create_dataframe([[1.0, 1], [float('nan'), 2], [None, 3], [4.0, None], [float('nan'), None]]).to_df("a", "b")
            >>> # drop a row if it contains any nulls, with checking all columns
            >>> df.na.drop().show()
            -------------
            |"A"  |"B"  |
            -------------
            |1.0  |1    |
            -------------
            <BLANKLINE>
            >>> # drop a row only if all its values are null, with checking all columns
            >>> df.na.drop(how='all').show()
            ---------------
            |"A"   |"B"   |
            ---------------
            |1.0   |1     |
            |nan   |2     |
            |NULL  |3     |
            |4.0   |NULL  |
            ---------------
            <BLANKLINE>
            >>> # drop a row if it contains at least one non-null and non-NaN values, with checking all columns
            >>> df.na.drop(thresh=1).show()
            ---------------
            |"A"   |"B"   |
            ---------------
            |1.0   |1     |
            |nan   |2     |
            |NULL  |3     |
            |4.0   |NULL  |
            ---------------
            <BLANKLINE>
            >>> # drop a row if it contains any nulls, with checking column "a"
            >>> df.na.drop(subset=["a"]).show()
            --------------
            |"A"  |"B"   |
            --------------
            |1.0  |1     |
            |4.0  |NULL  |
            --------------
            <BLANKLINE>
            >>> df.na.drop(subset="a").show()
            --------------
            |"A"  |"B"   |
            --------------
            |1.0  |1     |
            |4.0  |NULL  |
            --------------
            <BLANKLINE>

        See Also:
            :func:`DataFrame.dropna`
        N)anyallzhow ('z') should be 'any' or 'all'TFrB      r   zDataFrameNaFunctions.drop)	_ast_stmtr@   len_subcallsr@   )2
ValueErrorr5   r:   _session
_ast_batchbindr	   exprdataframe_na_drop__pythonr>   r?   r   r$   r0   r.   variadicr   argsaddr   _set_ast_refdfcolumnslen_copy_without_astr   uid_ast_idlimitr   schemafieldsr   namer    r
   DF_CANNOT_RESOLVE_COLUMN_NAMEkeyscolr   r   r   mathnanis_nullappendr   r   _expressionwhere)r;   r>   r?   r.   r@   stmtastr_   new_dffielddf_col_type_dictcol_namenormalized_col_name_setis_na_columnsnormalized_col_nameis_naccol_counters                     r+   dropzDataFrameNaFunctions.drop`   s   @ ?s.8vcU*EFGG' ??++66;;=D#DII$G$GNCCG!#)

 &#&&*

#*3::??+>+>+@&IFH-&+

#! KC.szz/B/B/DcJKOO((0 >__,,F$XF
 >$'5LS[aF A:V)__668F!<=!%??" c&k!__**1*NF(CRST!%M "__33::  5::&6    MS&Sz(';&S#&SM'> ,#&.>>9WW+-=-B-B-D  oo))*=)O$%89Iz;R  CKK%K,HH"'	E  e <aeTE$$U+',( !-@Q1==@AUK __**;&+@E*RF(CRST!%MG  'T. As   "'OO$<O)Fr!   r   r!   c                	   t        |       d}|r| j                  j                  j                  j	                         }t        |j                  j                  |      }| j                  j                  |j                         t        |t              ra|j                         D ]M  \  }}t        |t              s|j                  j                         }	||	_        t#        |	j$                  |       O nt#        |j&                  |       t        |t              r@d|j(                  _        t#        |j(                  j,                  j                         |       nVt        |t.              rFd|j(                  _        |D ]0  }
t#        |j(                  j,                  j                         |
       2 ||_        || j                  j2                  }nt        |t              r|g}t        |t              rCt5        |j7                         D cg c]  }t        |t               c}      st9        d      |}n|D ci c]  }|| }}|s;| j                  j;                         }t=        |d       |r|j>                  |_         |S t5        |jC                         D cg c]  }t        |tD               c}      st9        dtD         d      | j                  jF                  jH                  D ci c]"  }tK        |jL                        |jN                  $ }}i }|j                         D ]=  \  }}tK        |      }||vr$tQ        jR                  ||j7                               |||<   ? g }|j                         D ]*  \  }}| j                  jU                  |      }
||v r||   }tW        |||      rt        |tX        tZ        f      rN|j]                  t_        |
t`        jb                  k(  |
je                         z  ||
      jg                  |             |j]                  t_        |
je                  d	      ||
d	      jg                  |d	             th        jk                  d
| d| d| dtm        |              |j]                  |
       |j]                  |
       - | j                  jo                  ||      }tq        |dd       |S c c}w c c}w c c}w c c}w )a  
        Returns a new DataFrame that replaces all null and NaN values in the specified
        columns with the values provided.

        Args:
            value: A scalar value or a ``dict`` that associates the names of columns with the
                values that should be used to replace null and NaN values in those
                columns. If ``value`` is a ``dict``, ``subset`` is ignored. If ``value``
                is an empty ``dict``, the method returns the original DataFrame.
            subset: A list of the names of columns to check for null and NaN values.
                In each case:

                    * If ``subset`` is not provided or ``None``, all columns will be included.

                    * If ``subset`` is empty, the method returns the original DataFrame.
            include_decimal: Whether to allow ``Decimal`` values to fill in ``IntegerType``
                and ``FloatType`` columns.

        Examples::

            >>> df = session.create_dataframe([[1.0, 1], [float('nan'), 2], [None, 3], [4.0, None], [float('nan'), None]]).to_df("a", "b")
            >>> # fill null and NaN values in all columns
            >>> df.na.fill(3.14).show()
            ---------------
            |"A"   |"B"   |
            ---------------
            |1.0   |1     |
            |3.14  |2     |
            |3.14  |3     |
            |4.0   |NULL  |
            |3.14  |NULL  |
            ---------------
            <BLANKLINE>
            >>> # fill null and NaN values in column "a"
            >>> df.na.fill(3.14, subset="a").show()
            ---------------
            |"A"   |"B"   |
            ---------------
            |1.0   |1     |
            |3.14  |2     |
            |3.14  |3     |
            |4.0   |NULL  |
            |3.14  |NULL  |
            ---------------
            <BLANKLINE>
            >>> # fill null and NaN values in column "a"
            >>> df.na.fill({"a": 3.14}).show()
            ---------------
            |"A"   |"B"   |
            ---------------
            |1.0   |1     |
            |3.14  |2     |
            |3.14  |3     |
            |4.0   |NULL  |
            |3.14  |NULL  |
            ---------------
            <BLANKLINE>
            >>> # fill null and NaN values in column "a" and "b"
            >>> df.na.fill({"a": 3.14, "b": 15}).show()
            --------------
            |"A"   |"B"  |
            --------------
            |1.0   |1    |
            |3.14  |2    |
            |3.14  |3    |
            |4.0   |15   |
            |3.14  |15   |
            --------------
            <BLANKLINE>
            >>> df2 = session.create_dataframe([[1.0, True], [2.0, False], [3.0, False], [None, None]]).to_df("a", "b")
            >>> df2.na.fill(True).show()
            ----------------
            |"A"   |"B"    |
            ----------------
            |1.0   |True   |
            |2.0   |False  |
            |3.0   |False  |
            |NULL  |True   |
            ----------------
            <BLANKLINE>

        Note:
            If the type of a given value in ``value`` doesn't match the
            column data type (e.g. a ``float`` for :class:`~snowflake.snowpark.types.StringType`
            column), this replacement will be skipped in this column. Especially,

                * ``int`` can be filled in a column with
                  :class:`~snowflake.snowpark.types.FloatType` or
                  :class:`~snowflake.snowpark.types.DoubleType`, but ``float`` cannot
                  filled in a column with :class:`~snowflake.snowpark.types.IntegerType`
                  or :class:`~snowflake.snowpark.types.LongType`.

        See Also:
            :func:`DataFrame.fillna`
        NTFz.All keys in value should be column names (str)zDataFrameNaFunctions.fillz(All values in value should be in one of  typesrs   rH   zgInput value type doesn't match the target column data type, this replacement was skipped. Column Name: , Type: , Input Value: rE   rD   rF   )9r5   r:   rJ   rK   rL   r	   rM   dataframe_na_fillrR   rS   r$   dictitemsr0   	value_maprQ   _1r   _2r   r.   rO   rP   r   r!   rT   rC   r^   rI   rV   r   rW   rX   valuesr   rZ   r[   r   r\   r    r
   r]   r_   r,   r   r   rc   r   r`   ra   rb   as__loggerwarningr(   selectr   )r;   r   r.   r@   r!   rf   rg   kventryr_   
value_dictrk   rh   ri   rj   normalized_value_dictrn   res_columnsr    s                       r+   fillzDataFrameNaFunctions.fill  sr   Z 	 ' ??++66;;=D#DII$?$?FCOO((0%&!KKM @DAq!!S) # 1 1 3#$2588Q?@ +399e<&#&&*

#*3::??+>+>+@&IFH-&+

#! KC.szz/B/B/DcJK"1C>__,,F$XFeT"EJJLAq
1c*AB D  J:@Ah(E/AJA__668F!<=!%M $**, 1BC
 :78@  //66
 uzz"ENN2
 
 !#)//1 	?OHe",X"6"*::5SS')9)>)>)@  :?!"56	? "2"8"8": !	(Hh//%%h/C00-h7:8_ "(Y
,CD#**CKKM A5#NRR ( $** #e < % #*/	
 "c(ec< OOFFNZ P!!)
/%eW
  &&s+ ""3'C!	(F ''t'DF$?aPY B B
s   S;
SS .'S%
to_replacec                |   t        |       d}|r7| j                  j                  j                  j	                         }t        |j                  j                  |      }| j                  j                  |j                         t        |t              r_|j                         D ]K  \  }}	|j                  j                         }
t        |
j                   |       t        |
j"                  |	       M nTt        |t$              r.|D ](  }	|j&                  j                         }
t        |
|	       * nt        |j(                  |       t        |t$              r.|D ](  }	|j*                  j                         }
t        |
|	       * nt        |j,                  |       t        |t.              r@d|j0                  _        t        |j0                  j4                  j                         |       nVt        |t$              rFd|j0                  _        |D ]0  }t        |j0                  j4                  j                         |       2 ||_        || j                  j8                  }nt        |t.              r|g}t;        |      dk(  r;| j                  j=                         }t?        |d       |r|j@                  |_!        |S t        |t              r|}nt        |tD        tF        f      rt        |tD        tF        f      rZt;        |      t;        |      k7  r#tI        dt;        |       dt;        |             tK        ||      D 	ci c]  \  }}	||	
 }}}	n|D ci c]  }|| }}n||i}|s;| j                  j=                         }t?        |d       |r|j@                  |_!        |S tM        |j                         D 	cg c]'  \  }}	t        |tN              xr t        |	tN              ) c}	}      stI        dtN         d	      | j                  jP                  jR                  D ci c]"  }tU        |jV                        |jX                  $ }}|D ch c]  }tU        |       }}|D ]*  }||vst[        j\                  ||j_                                g }|j                         D ]G  \  }}| j                  ja                  |      }||v rd}|j                         D ]  \  }}tc        |||
      rgtc        |||
      rY||je                         n|tg        |      k(  }|tg        d      n
tg        |      }||ji                  ||      nti        ||      }{tj        jm                  d| d| d| dto        |       d| dto        |               |3|jq                  |      js                  |      }|ju                  |       $|ju                  |       7|ju                  |       J | j                  jw                  ||      }ty        |dd       |S c c}	}w c c}w c c}	}w c c}w c c}w )a+  
        Returns a new DataFrame that replaces values in the specified columns.

        Args:
            to_replace: A scalar value, or a list of values or a ``dict`` that associates
                the original values with the replacement values. If ``to_replace``
                is a ``dict``, ``value`` and ``subset`` are ignored. To replace a null
                value, use ``None`` in ``to_replace``. To replace a NaN value, use
                ``float("nan")`` in ``to_replace``. If ``to_replace`` is empty,
                the method returns the original DataFrame.
            value: A scalar value, or a list of values for the replacement. If
                ``value`` is a list, ``value`` should be of the same length as
                ``to_replace``. If ``value`` is a scalar and ``to_replace`` is a list,
                then ``value`` is used as a replacement for each item in ``to_replace``.
            subset: A list of the names of columns in which the values should be
                replaced. If ``cols`` is not provided or ``None``, the replacement
                will be applied to all columns. If ``cols`` is empty, the method
                returns the original DataFrame.
            include_decimal: Whether to allow ``Decimal`` values to replace ``IntegerType``
                and ``FloatType`` values.
        Examples::

            >>> df = session.create_dataframe([[1, 1.0, "1.0"], [2, 2.0, "2.0"]], schema=["a", "b", "c"])
            >>> # replace 1 with 3 in all columns
            >>> df.na.replace(1, 3).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |3    |3.0  |1.0  |
            |2    |2.0  |2.0  |
            -------------------
            <BLANKLINE>
            >>> # replace 1 with 3 and 2 with 4 in all columns
            >>> df.na.replace([1, 2], [3, 4]).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |3    |3.0  |1.0  |
            |4    |4.0  |2.0  |
            -------------------
            <BLANKLINE>
            >>> # replace 1 with 3 and 2 with 3 in all columns
            >>> df.na.replace([1, 2], 3).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |3    |3.0  |1.0  |
            |3    |3.0  |2.0  |
            -------------------
            <BLANKLINE>
            >>> # the following line intends to replaces 1 with 3 and 2 with 4 in all columns
            >>> # and will give [Row(3, 3.0, "1.0"), Row(4, 4.0, "2.0")]
            >>> df.na.replace({1: 3, 2: 4}).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |3    |3.0  |1.0  |
            |4    |4.0  |2.0  |
            -------------------
            <BLANKLINE>
            >>> # the following line intends to replace 1 with "3" in column "a",
            >>> # but will be ignored since "3" (str) doesn't match the original data type
            >>> df.na.replace({1: "3"}, ["a"]).show()
            -------------------
            |"A"  |"B"  |"C"  |
            -------------------
            |1    |1.0  |1.0  |
            |2    |2.0  |2.0  |
            -------------------
            <BLANKLINE>

        Note:
            If the type of a given value in ``to_replace`` or ``value`` doesn't match the
            column data type (e.g. a ``float`` for :class:`~snowflake.snowpark.types.StringType`
            column), this replacement will be skipped in this column. Especially,

                * ``int`` can replace or be replaced in a column with
                  :class:`~snowflake.snowpark.types.FloatType` or
                  :class:`~snowflake.snowpark.types.DoubleType`, but ``float`` cannot
                  replace or be replaced in a column with :class:`~snowflake.snowpark.types.IntegerType`
                  or :class:`~snowflake.snowpark.types.LongType`.

                * ``None`` can replace or be replaced in a column with any data type.

        See Also:
            :func:`DataFrame.replace`
        NTFr   zDataFrameNaFunctions.replacez<to_replace and value lists should be of the same length.Got z and z1All keys and values in value should be in one of ru   rs   znInput key or value type doesn't match the target column data type, this replacement was skipped. Column Name: rv   z, Input Key: rw   rx   rD   rF   )=r5   r:   rJ   rK   rL   r	   rM   dataframe_na_replacerR   rS   r$   rz   r{   replacement_maprQ   r   r}   r~   r   to_replace_listto_replace_valuer   r   r0   r.   rO   rP   r!   rT   rU   rV   r   rW   rX   r1   r2   rI   ziprC   r   rZ   r[   r   r\   r    r
   r]   r^   r_   r,   rb   r   r   r   r   r(   	otherwiser   rc   r   r   )r;   r   r   r.   r@   r!   rf   rg   r   r   r   r_   rh   replacementri   rj   rk   rl   rn   r   r    	case_whenkeycondreplace_values                            r+   replacezDataFrameNaFunctions.replace  s   N 	 ' ??++66;;=D#DII$B$BDICOO((0*d+&,,. <DAq//335E.uxx;.uxx;< J1# 9A//335E.ua89 +3+?+?L%* 9AJJNN,E.ua89 +399e<&#&&*

#*3::??+>+>+@&IFH-&+

#! KC.szz/B/B/DcJK"1C >__,,F$XFv;!__668F!?@!%Mj$'$K
T5M2%$/z?c%j0$":/uSZLB 
 58
E4J"KDAq1a4"KK"K1;<Aq%x<<%u-K__668F!?@!%M (--/ Aq 1BC Hq"FGH
 C78@  //66
 uzz"ENN2
 
 IO"OH:h#7"O"O#: 	"*::5SS')9)>)>)@ 	 "2"8"8": #	(Hh//%%h/C22 	"-"3"3"5 JC> (7 B (7
 14s{{}#S/5:]D	E
  )4 &NN4?!%dM!: "  JJR T%%-JmC5c T,,17(4;-I%0 ( ) 3 3C 8 < <X FI&&y1  &&s+""3'G#	(J ''t'DF$BQRSW #L<
 #Ps   X#
X) ,X.
)'X4X9)r<   r8   r"   N)rB   NNT)NT)NNT)__name__
__module____qualname____doc__r=   r   r0   r   r%   r   r   r&   rr   r   r   r   r    r-   r+   r7   r7   Z   s   Q$   $6:uu u sHSM123	u
 u 
(u un  7;	X !&X[$sK'7"889X sHSM123X 	X X 
(X Xt  FJ6:m !&m[!k)*,
m k8K+@@ABm sHSM123m m m 
(m mr-   r7   )F)3r`   sysloggingr   typingr   r   r   snowflake.snowpark	snowflake0snowflake.snowpark._internal.analyzer.expressionr   &snowflake.snowpark._internal.ast.utilsr   r	   *snowflake.snowpark._internal.error_messager
   &snowflake.snowpark._internal.telemetryr   r   'snowflake.snowpark._internal.type_utilsr   r   r   "snowflake.snowpark._internal.utilsr   r   snowflake.snowpark.columnr   snowflake.snowpark.functionsr   r   r   snowflake.snowpark.typesr   r   r   r   r   r   version_infor   collections.abcr   r   r&   r,   r4   r0   r5   r7   r   r-   r+   <module>r      s     
  ( (  F W T 
 E , 7 7  v(
H
 "  
	: K 
5HU33E-F$G 5D 5G
 G
r-   