
    eiT                    *   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ 	 d dlZd dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlZd dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ 	 d dl%Z&	 d dl'm(Z) 	 d dl*m+Z, ejZ                  jP                  Z. G d d      Z/d Z0d	 Z1d
 Z2ejf                  d        Z4ejf                  d        Z5ejf                  d        Z6 ejf                  d      d        Z7ejf                  d        Z( ejf                  ddgddg      d        Z8ejZ                  jV                  d        Z9d Z:ejZ                  jV                  d        Z;d Z<d Z=ejZ                  jV                  d        Z>ejZ                  jV                  d        Z?ejZ                  jV                  d        Z@ejZ                  jV                  d         ZAejZ                  jV                  d!        ZBejZ                  jV                  d"        ZCd# ZDd$ ZEd% ZFejZ                  j                  d&g d'      d(eHd)eId*eIfd+       ZJd, ZKd- ZLejZ                  jV                  d.        ZMejZ                  jV                  d/        ZNejZ                  jV                  d0        ZOd1 ZPd2 ZQejZ                  j                  d3 ej                  d4d5      d6d7gg      ejZ                  j                  d8ddg      ejZ                  jV                  d9                      ZSejZ                  jV                  d:        ZTejZ                  jV                  ejZ                  j                  d;               ZVd< ZWd= ZXejZ                  jV                  d>        ZYejZ                  jV                  dd?       ZZejZ                  jV                  d@        Z[ejZ                  jJ                  ejZ                  jV                  dA               Z\ejZ                  jV                  dB        Z]ejZ                  jV                  dC        Z^ejZ                  jV                  dD        Z_ejZ                  jJ                  ejZ                  jV                  dE               Z`ejZ                  jV                  dF        ZaejZ                  jV                  dG        ZbddHZcejZ                  jJ                  ejZ                  jV                  dI               ZdejZ                  jV                  dJ        ZeejZ                  jJ                  ejZ                  jV                  dK               ZfejZ                  jV                  dL        ZgejZ                  jV                  dM        ZhejZ                  jV                  dN        ZiejZ                  jV                  dO        ZjejZ                  jV                  dP        ZkejZ                  jV                  dQ        ZlejZ                  jJ                  ejZ                  jV                  dR               ZmejZ                  jV                  ejZ                  j                  dSdT dU g      dV               ZnejZ                  jV                  ejZ                  j                  dWddg      ejZ                  j                  dSdX dY g      dZ                      ZoejZ                  j                  dSd[ d\ g      d]        ZpejZ                  j                  dSd^ d_ g      d`        Zqda Zrdb ZsejZ                  jV                  ejZ                  jJ                  dc               Ztdd Zude Zvdf Zwdg Zxdh Zydi Zzdj Z{ejZ                  jV                  dk        Z|dl Z}ddmZ~dn Zdo Zdp ZejZ                  jV                  dq        ZejZ                  jV                  dr        ZejZ                  jV                  ds        ZejZ                  jV                  dt        ZejZ                  jV                  du        ZejZ                  jV                  dv        ZejZ                  jV                  dw        ZejZ                  jV                  dx        ZejZ                  jV                  dy        ZejZ                  jV                  dz        Zd{ Zd| Zd} Zd~ ZejZ                  j                  dddg      d        Zd ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jV                  d        Zd Zd ZejZ                  jV                  ejZ                  j                  dddg      ejZ                  j                  dddg      ejZ                  j                  dWddg      ejZ                  j                  dg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg      d                                    ZejZ                  jJ                  d        Zejf                  d        ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jJ                  d        ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jV                  d        Zd Zd Zd Zd ZejZ                  jV                  d        ZejZ                  jV                  d        Zd ZejZ                  jX                  d        ZejZ                  jX                  d        Zd ZejZ                  jX                  d        ZejZ                  jJ                  d        ZejZ                  jJ                  ejZ                  j                  dg d      d               Zd Zd Zd ZejZ                  jJ                  d        ZejZ                  jJ                  d        ZejZ                  jJ                  d        Zd Zd Zd ZejZ                  jJ                  ejZ                  j                  dg d      d               Zd ZejZ                  jV                  ejZ                  jJ                  d               ZejZ                  jV                  ejZ                  jJ                  ejZ                  j                  ej                  dk(  d      d                      ZejZ                  jV                  ejZ                  jJ                  d               ZejZ                  jV                  dÄ        ZejZ                  jV                  ejZ                  jJ                  dĄ               Zdń ZdƄ ZejZ                  jV                  ejZ                  jJ                  dǄ               ZejZ                  jV                  ejZ                  jJ                  dȄ               ZejZ                  jV                  ejZ                  jJ                  dɄ               ZejZ                  jV                  ejZ                  jJ                  dʄ               ZejZ                  jV                  d˄        ZejZ                  jV                  ejZ                  jJ                  d̄               ZejZ                  jV                  ejZ                  jJ                  d̈́               Zd΄ ZejZ                  jJ                  ejZ                  jV                  dτ               ZejZ                  jV                  ejZ                  jJ                  dЄ               Zdф Z	 dd҄ZejZ                  jV                  dӄ        ZejZ                  jV                  ejZ                  jJ                  dԄ               ZdՄ Zdք Zdׄ ZejZ                  jV                  ejZ                  j                  d؄               Zdل ZejZ                  jJ                  dڄ        ZڐddۄZd܄ Zd݄ ZejZ                  jV                  dބ        ZejZ                  jV                  d߄        ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jV                  ejZ                  jJ                  d               ZejZ                  jV                  ejZ                  jJ                  d               ZejZ                  jV                  ejZ                  jJ                  d               Zd Zd Zd Zd Zd ZejZ                  j                  ejZ                  jV                  d               Zd ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jV                  ejZ                  jJ                  d               Zd ZejZ                  jV                  d        ZejZ                  jV                  ejZ                  j                  d               ZdZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  j                  dddg      d        ZejZ                  j                  dddg      d         Zd Zd ZejZ                  j                  dd      d        Z d ZejZ                  jV                  d        Zd Zd	 ZejZ                  j                  dddg      d
        Zd Zd Zd Zy# e$ r dZY w xY w# e$ r dZ&Y w xY w# e$ r dZ)Y w xY w# e$ r dZ,Y w xY w(      N)copytree)quote)is_threading_enabled)FSProtocolClassProxyHandler_configure_s3_limited_user_filesystem_uri
change_cwdc                       e Zd Zd ZddZy)TableStreamWrapperc                     || _         y Ntable)selfr   s     \/var/www/html/glpi_dashboard/venv/lib/python3.12/site-packages/pyarrow/tests/test_dataset.py__init__zTableStreamWrapper.__init__F   s	    
    Nc                 8    | j                   j                  |      S r   )r   __arrow_c_stream__)r   requested_schemas     r   r   z%TableStreamWrapper.__arrow_c_stream__I   s    zz,,-=>>r   r   )__name__
__module____qualname__r   r    r   r   r   r   E   s    ?r   r   c           	      4   dd l }dd l} |j                   ddd      } |j                  d      }|j                  g d      }g }t	        |       D ].  }|j                  ||t        |      t        |      f       ||z  }0 t        j                  |g d      S )	Nr   i        )days)greenblueyellowredorange)dateindexvaluecolorcolumns)
datetime	itertools	timedeltacyclerangeappendfloatnextpd	DataFrame)nr+   r,   dayintervalcolorsdatais           r   _generate_datar;   M   s    
(

D!Q
'C!x!!q)H__IJFD1X S!U1XtF|45x <<&IJJr   c           
         t        j                  t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j
                               t        j                  dt        j                               g      }t         j                  j                  | |d      }|j                         S )Nr%   r&   r'   r(   F)schemapreserve_index)
par=   fielddate32int64float64stringTablefrom_pandasreplace_schema_metadata)dfr=   r   s      r   _table_from_pandasrI   ]   s    YY
%
"((*%
"**,'
"))+&	 F HH  F5 IE((**r   c                 :   | j                         D ]|  }|j                         5 }t        |t        j                        sJ |j
                  rJ |j                         sJ |j                         sJ |j                         rJ 	 d d d        ~ y # 1 sw Y   xY wr   )	get_fragmentsopen
isinstancer?   
NativeFileclosedseekablereadablewritable)datasetfragmentnfs      r   +assert_dataset_fragment_convenience_methodsrV   h   s    ))+ %]]_ 	%b"--000yy =;;= =;;= ={{}$$}	% 	%%	% 	%s   A!BB	c                     t        j                         } ddg}t        |      D ]  \  }}| d| d}| j                  |       | j	                  |      5 }t        t        d            t        t        t        t        d                  t        t        t        t        d                  |gdz  t        d      D cg c]  }|dz  t        |dz        d c}g}t        j                  dt        j                         fd	t        j                         fd
t        j                         fdt        j                         fdt        j                  t        j                         t        j                         d      fg      }t        j                   ||      }	t        j"                  j%                  |	g      }
t'        j(                  |
|       d d d         | S c c}w # 1 sw Y   xY w)Nzsubdir/1/xxxzsubdir/2/yyyz/file.parquetr      abi64f64strconststructr=   )fs_MockFileSystem	enumerate
create_diropen_output_streamlistr/   mapr1   r_   r?   r=   rB   rC   rD   ra   record_batchrE   from_batchespqwrite_table)mockfsdirectoriesr:   	directorypathoutjr9   r=   batchr   s              r   rn   rn   s   s   !F 	K
 "+. '9E!H-)$&&t, 	'U1XSa)*SeAh'(a8=aA1q1u3q1u:.AD YY
#

%		$"((*%299288:BIIK%HIJ  F OOD8EHH))5'2ENN5#&%	' 	''. M B	' 	's    A(G!=GC7G!G!!G+	c            
      v   t        j                         } t        j                  t        j                  ddgt        j
                               t        j                  j                  t        j                  ddgt        j
                               ddg      d      }t        j                  t        j                  dd	gt        j                               t        j                  j                  t        j                  ddgt        j                               d
dg      d      }| j                  d       d}| j                  |      5 }t        j                  ||       ddd       d}| j                  |      5 }t        j                  ||       ddd       | ||fS # 1 sw Y   @xY w# 1 sw Y   xY w)z
    Creates a _MockFileSystem with two parquet files that have promotable schemas.
    - file1: value: int8, dictionary: dictionary<int8, string>
    - file2: value: uint16, dictionary: dictionary<int16, string>
    r      typer   r[   r\   r'   
dictionaryrY      dcz
subdir/zzzzsubdir/zzz/file1.parquetNzsubdir/zzz/file2.parquet)rc   rd   r?   r   arrayint8DictionaryArrayfrom_arraysuint16int16rf   rg   rl   rm   )rn   table1table2path1rr   path2s         r   promotable_mockfsr      sa    !FXX1a&rwwy1((44HHaV"''),#J
 F XX1a&ryy{3((44HHaV"((*-#J
 F l#&E		"	"5	) $S
vs#$ 'E		"	"5	) $S
vs#$ 5%$ $$ $s   F#?F/#F,/F8c                    	 ddl m}m} ddlm}  |       fdt               		fd}| j                  |d|        | |            }t        j                  	fd       }||fS )	Nr   )LocalFileSystemPyFileSystemr   )r   c                 ^    | D ch c]  }j                  t        |             c}S c c}w r   )normalize_pathr_   )pathsplocalfss     r   
normalizedz#open_logging_fs.<locals>.normalized   s&    8=>1&&s1v.>>>s   !*c                     j                  t        |            }j                  |       | j                  j	                  |      S r   )r   r_   add_fsopen_input_file)r   rq   r   openeds     r   r   z(open_logging_fs.<locals>.open_input_file   s8    %%c$i0

4xx''--r   r   c              3      K   j                          	 d          |       k(  sJ y #         |       k(  sJ w xY wwr   )clear)expected_openedr   r   s    r   assert_opensz%open_logging_fs.<locals>.assert_opens   sI     	Ef%O)DDDD:f%O)DDDDs   A- AAA)	
pyarrow.fsr   r   test_fsr   setsetattr
contextlibcontextmanager)
monkeypatchr   r   r   r   rc   r   r   r   r   s
          @@@r   open_logging_fsr      so    8%G? UF. &7I	l7+	,BE E |r   module)scopec           
         | j                   j                  j                  d       | j                   j                  j                  d       t        d      }t	        j
                         }t        |      }t        d||dz        D cg c]  }|j                  |||dz  z     c}\  }}}}|j                  d       t        |      }t        t        d||dz        D cg c]  }|j                  |||dz  z     c}      D ]D  \  }}	d| d	}
|j                  |
      5 }t        j                  t        |	      |       d d d        F |j                  d
       |j                  |j                   j"                  j$                  |j&                  g      D ]b  \  }}	d|d    d|d    }| d}
|j                  |       |j                  |
      5 }t        j                  t        |	      |       d d d        d |j                  d       |j                  |j                   j"                  j(                  |j                   j"                  j*                  g      D ]b  \  }}	d|d    d|d    }| d}
|j                  |       |j                  |
      5 }t        j                  t        |	      |       d d d        d |j                  d       |j                  d      D ]Y  \  }}	d| }| d}
|j                  |       |j                  |
      5 }t        j                  t        |	      |       d d d        [ |S c c}w c c}w # 1 sw Y   QxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)Npandasparquet  r   r{   plain
   zplain/chunk-rX   r=   zschema//r   z/chunk.parquethivez
hive/year=z/month=
hive_colorr(   zhive_color/color=)configpyarrowrequiresr;   rc   rd   lenr/   ilocrf   re   rg   rl   rm   rI   groupbyr%   dt	dayofweekr(   yearmonth)requestrH   rn   r5   r:   df_adf_bdf_cdf_dchunkrq   rr   partfolders                 r   multisourcefsr      sN   NN##H-NN##I.		B!F 	BA9>q!QT9JKAbgga!Q$/KD$d gD	AU1aB=OPtyy1QU73PQ ;5aS)&&t, 	;NN-e4c:	; 	;; h||TYY\\%;%;TZZ$HI ;e47)1T!WI.(&!&&t, 	;NN-e4c:	; 	;	; f||TYY\\%6%6		8J8J$KL ;ed1gYgd1gY7(&!&&t, 	;NN-e4c:	; 	;	; l#||G, ;e$TF+(&!&&t, 	;NN-e4c:	; 	;	; MM L Q	; 	;	; 	;	; 	;	; 	;s<   L>M M M M" M/M	M	"M,	/M8	c           
         t        j                         }t        j                  dd      }t        j                  d      }t        j
                  t        j                  t        j                  dt        j                               t        j                  dt        j                               g            |_        t        j                  | |||      }|j                         S )NsubdirT	recursivegroupkey)dsParquetFileFormatrc   FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr?   r=   r@   int32rD   partitioningFileSystemDatasetFactoryfinish)rn   formatselectoroptionsfactorys        r   rS   rS     s    !!#Fx48H))(3G33
		HHWbhhj)HHUBIIK(
 	G ))&(FGLG>>r   TFthreadedserial)paramsidsc                 B    | j                    G fdd      } |       S )z]
    Fixture which allows dataset scanning operations to be
    run with/without threads
    c                   F    e Zd Z fdZ fdZd Zd Zd Zd Zd Z	d Z
y	)
dataset_reader.<locals>.readerc                     | _         y r   use_threads)r   r   s    r   r   z'dataset_reader.<locals>.reader.__init__+  s    *Dr   c                 .    d|v rt        d      |d<   y )Nr   z9Invalid use of dataset_reader, do not specify use_threads)	Exception)r   kwargsr   s     r   _patch_kwargsz,dataset_reader.<locals>.reader._patch_kwargs.  s)    &$& & %0F=!r   c                 H    | j                  |        |j                  di |S Nr   )r   to_tabler   rS   r   s      r   r   z'dataset_reader.<locals>.reader.to_table5  s&    v&#7##-f--r   c                 H    | j                  |        |j                  di |S r   )r   
to_batchesr   s      r   r   z)dataset_reader.<locals>.reader.to_batches9  &    v&%7%%///r   c                 H    | j                  |        |j                  di |S r   )r   scannerr   s      r   r   z&dataset_reader.<locals>.reader.scanner=  s$    v&"7??,V,,r   c                 J    | j                  |        |j                  |fi |S r   )r   head)r   rS   num_rowsr   s       r   r   z#dataset_reader.<locals>.reader.headA  s&    v&7<<3F33r   c                 J    | j                  |        |j                  |fi |S r   )r   take)r   rS   indicesr   s       r   r   z#dataset_reader.<locals>.reader.takeE  s&    v&7<<2622r   c                 H    | j                  |        |j                  di |S r   )r   
count_rowsr   s      r   r   z)dataset_reader.<locals>.reader.count_rowsI  r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   s   r   readerr   )  s+    	+	0	.	0	-	4	3	0r   r   )param)r   r   r   s     @r   dataset_readerr     s"     --K"0 "0H 8Or   c           	      *
   t        j                  t        j                  dt        j                               g      }t	        j
                         }ddg}t        dd      D cg c]  }t	        j                  d      |k(   }}t        ||      D cg c]  \  }}|j                  || |       }}}t	        j                  d      t	        j                  d      k(  }	t	        j                  |||| |		      }
t        j                  j                  |||| ||	
      }|
|fD ]"  }t        |t        j                        sJ t        |j                  t        j
                        sJ |j                  j                  |	      sJ t!        |j"                        t!        |      k(  sJ t%        |j'                               }t        |||      D ]2  \  }}}|j                  j                  |      sJ |j(                  |k(  sJ t        |j                  t        j
                        sJ t        |t        j*                        sJ |j,                  dgk(  sJ |j.                  dk(  sJ t%        |j1                               }|j.                  t3        |      cxk(  rdk(  sJ  J t        |d   t        j*                        sJ |d   j(                  |k(  sJ |d   j,                  dgk(  sJ |d   j.                  dk(  r3J  t%        |j'                  t	        j                  d      dk(              }t3        |      dk(  r#J  t	        j                  ||||       }|j                  j                  t	        j                  d            sJ t        j                  j                  ||||       }|j                  j                  t	        j                  d            sJ |j'                         D ]2  }|j                  j                  t	        j                  d            r2J  t5        j6                  t8        d      5  t	        j                  |||       d d d        t5        j6                  t8        d      5  t	        j                  |||d       d d d        t5        j6                  t8        d      5  t        j                  j                  ||       d d d        y c c}w c c}}w # 1 sw Y   xY w# 1 sw Y   fxY w# 1 sw Y   y xY w)Nr`   subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetr   rY   r   leveli9  )r=   r   
filesystemroot_partition)r=   r   r   
partitionsr   r   filterrv   r=   r   r   Tzincorrect typematch)r=   r   r   r   )r?   r=   r@   rB   r   r   r/   zipmake_fragmentscalarFileSystemDataset
from_pathsrM   r   partition_expressionequalsr   filesrh   rK   rq   ParquetFileFragment
row_groupsnum_row_groupssplit_by_row_groupr   pytestraises	TypeError)rn   r=   file_formatr   xr   rq   r   	fragmentsr   dataset_from_fragmentsdataset_from_pathsrS   rT   	partitionrow_group_fragmentss                   r   test_filesystem_datasetr  P  s   YY
"((*% F &&(K)+GHE16q!=A"((6"a'=J=#&uj#9;T4 **4> ;I ;XXg&"))D/9N11&. --88f[Vn 9 
 +,>? #'2#7#7888'.."*>*>???++22>BBB7==!SZ///..01	),Y
E)J 	>%Hi0077	BBB==D(((hoor/C/CDDDh(>(>???&&1#---**a///"&x'B'B'D"E**c2E.FK!KKKKK1!4b6L6LMMM&q)..$666&q)44;;;&q)88A===	> ..bhhw6G16L.MN	9~"""/#4 ""&G ''..ryy??? ""--f[V . G ''..ryy???))+ E,,33BIIdODDDE 
y(8	9 =
YV<= 
y(8	9 C
Yv$/	CC 
y(8	9 G
''	+'FG G{ >;j= =C CG Gs0   #S&S+S1>S=;"T	1S:=T	Tc                    t        j                  t        j                  dt        j                               g      }t	        j
                         }dg}t        j                  j                  |||t        j                               }|j                          t        j                  t              5  | j                  |       d d d        y # 1 sw Y   y xY w)Nf1znonexistingfile.arrowr   )r?   r=   r@   rB   r   IpcFileFormatr  r  rc   r   rK   r  r  FileNotFoundErrorr   )r   r=   r  r   rS   s        r   1test_filesystem_dataset_no_filesystem_interactionr    s    YY
rxxz" F ""$K$%E ""--f[%%' . G  
(	) )() ) )s   2CCc                    t        | t        j                        sJ t        | j                  t        j
                        sJ t	        j                  g dt	        j                               }t	        j                  g dt	        j                               }|j                  |       D ]b  }t        |t        j                        sJ |j                  d      j                  |      sJ |j                  d      j                  |      rbJ  |j                  |       j                         D ]D  }t        |t        j                        sJ t        |j                   t        j"                        rDJ  |j%                  |       }t        |t        j&                        sJ t)        |      dk(  sJ t        j*                  d      dk(  }| j%                  d|      }|j-                  d	      j/                         }|d   ddgk(  sJ |d
   ddgk(  sJ t1        |d	         ddgk(  sJ t1        |d         ddgk(  sJ t        j*                  d      dk(  }| j%                  d|      }|j-                  d	      j/                         }|d   g dk(  sJ |d
   g dk(  sJ |d	   g dk(  sJ |d   g dk(  sJ t        j*                  d      t        j*                  d
      t        j*                  d      dk(  d}| j%                  d|      }|j-                  d      j/                         }t3        |      g dk(  sJ |d   g dk(  sJ |d
   g dk(  sJ |d   g dk(  sJ t5        |        y )Nr   r   rv   rY   r{   rw   r   r   r   r]   T)r   r   r   r^         ?rv   r   xxxyyy)ra   r\   1)r   r{   r   r{   )r!        @r!  r%  )r   r   rv   rv   )r"  r"  r#  r#  )r]   r^   new)r   r*   )
r   r   r   r   rv   rv   rY   rY   r{   r{   )
        r'  r!  r!         @r(        @r)  r%  r%  r&  )
FFTTFFFFTT)rM   r   Datasetr=   r?   Schemar~   rB   rC   r   RecordBatchcolumnr	  r   scan_batchesTaggedRecordBatchrT   Fragmentr   rE   r   r@   sort_by	to_pydictsortedrh   rV   )	rS   r   expected_i64expected_f64rt   r   	conditionresult
projections	            r   test_datasetr9    s:   grzz***gnnbii000 88O"((*=L88O"**,?L**73 4%000||A%%l333||A%%l3334
  ''0==? 7%!5!5666%.."++6667 ##G,EeRXX&&&u:1$I$yAF^^G$..0F%=QF"""%=RH$$$&/"q!f,,,&- UEN222 )S0I$yAF^^G$..0F%=L(((%=0000'?l***%=8888 xxxxxx(C/J
 $
CF^^E",,.F<0000%=::::%= ; ; ; ;%= 7 7 7 7/8r   c                 F   | \  }}}t        j                  |||gt        j                               }t        j                  t
        j                  d      5  |j                          d d d        |j                  d      }t        j                  t        j                  dt        j                               t        j                  dt        j                  t        j                         t        j                                     g      }|j                  |      sJ |j                  |      }|j!                         }|j#                  d       t        j$                  t        j&                  d	d
gddggt        j                               t        j&                  t
        j(                  j+                  t        j,                  dd	gt        j                               ddg      t
        j(                  j+                  t        j,                  d	dgt        j                               ddg      g      d      }	|j                  |	      sJ |j                  dd	      }
|
j                  d      j.                  t        j                         k7  sJ y # 1 sw Y   0xY w)NzCUnable to merge: Field value has incompatible types: int8 vs uint16r   
permissivepromote_optionsr'   rz   Tfullr   rv   rY   r{   rw   r   r[   r\   r|   r}   ry   )r=  r  )r   r   r   r  r  r?   ArrowTypeErrorinspectr=   r@   r   rz   r   rD   r	  r   r   validater   chunked_arrayr   r   r~   rx   )r   rn   r   r   r   r=   expected_schemarS   r   expected_tableinspected_schema_one_frags              r   -test_dataset_factory_inspect_schema_promotionrG    s   ,FE5)) 4 4 6G 

S
  		 __\_:Fii
"((*%
r}}RXXZEF	! O ==)))nnV$GE	NNNXX!!Aq6Aq6"2D&&**!Qbhhj1c
 **!Qbhhj1c
	(
 	 N <<''' !($ !0 !3$**7388BHHJFFFG s   JJ c                    | \  }}}t        j                  |||gt        j                               }t        j                  t
        d      5  |j                  d       d d d        t        j                  t
        d      5  |j                  d       d d d        t        j                  t
        d      5  |j                  d	       d d d        y # 1 sw Y   vxY w# 1 sw Y   LxY w# 1 sw Y   y xY w)
Nz#Invalid promote_options: bad_optionr   
bad_optionr<  z<Fragment count must be a non-negative int or None; got 'one'one)r  z9Fragment count must be a non-negative int or None; got -1)r   r   r   r  r  
ValueErrorrA  )r   rn   r   r   r   s        r   'test_dataset_factory_inspect_bad_paramsrM    s    ,FE5)) 4 4 6G 
z)N	O 656 
X
 ) 	%()
 
U
 & 	"%& &6 6) )
& &s$   CC!9C-C!C*-C6c                 b    | j                  dd      }t        |      }|j                  dk(  sJ y )N      )fragment_readaheadbatch_readahead   )r   r2   num_columns)rS   r   rt   s      r   test_scanner_optionsrU  *  s4      B JGME!!!r   c                    |j                  | t        j                               }t        |t        j
                        sJ t        j                  t        j                        5  |j                  | dg       d d d        |j                  | dgt        j                               }|j                  | j                  k(  sJ |j                  t        j                  dt        j                         fg      k(  sJ t        |t        j
                        sJ |j                         }|j                         D ].  }|j                  |j                  k(  sJ |j                  dk(  r.J  ||j!                         j#                         k(  sJ |j                  |j                  k(  sJ t%        |j&                        D ]=  }t        j(                  |g      }|j+                  |      |j+                  |      k(  r=J  t        j                  t        j,                        5  |j+                  t        j(                  |j&                  g             d d d        |j&                  |j/                         k(  sJ |j                  | g dt        j                               }|j                         }g d}|j0                  |k(  sJ |j3                  d      }|d	   j5                         d
gdz  dgdz  z   k(  sJ |d   j5                         dgdz  dgdz  z   k(  sJ |d   j5                         dgdz  k(  sJ |d   j5                         dgdz  k(  sJ y # 1 sw Y   xY w# 1 sw Y   xY w)N)memory_poolunknownr)   r]   )r*   rW  r   )
__filename__fragment_index__batch_index__last_in_fragmentrZ  rY  r   r   r   r   r[  r   r\  T)r   r?   default_memory_poolrM   r   Scannerr  r  ArrowInvaliddataset_schemar=   projected_schemarB   r   r   rT  	to_readerread_allr/   r   r~   r   ArrowIndexErrorr   column_namesr1  	to_pylist)	rS   r   r   r   rt   r:   r   expected_namessorted_tables	            r   test_scannerri  1  s@   $$R335 % 7Ggrzz***	r	' =w<= $$Wug131G1G1I % KG!!W^^333##ryy5"((*2E1F'GGGGgrzz***E##% &||w77777  A%%%& G%%'002222<<7333335>>" <((A3-zz'"gll7&;;;;< 
r))	* 1RXXu~~./01 >>W//1111$$W 7M 241G1G1I	 % KG
 E=N///==!34L%//1	%&*	%&*	+, , , *+557QC!GqcAg<MNNN(224b@@@,-779dVb[HHHQ= =&1 1s   %M 0M!M!M+c                    t        j                         }t        j                         }t        j                  |       	 |j	                         }t
        j                  j                  |       }|j                         }|j	                         |kD  sJ 	 t        j                  |       y # t        j                  |       w xY wr   )	r?   r]  system_memory_poolset_memory_poolbytes_allocatedr   r^  from_datasetr   )rS   old_poolpoolallocated_beforer   _s         r   test_scanner_memory_poolrs  b  s     %%'H   "Dt%//1**))'2##%(8888
8$8$s   AB* *Cc                    |j                  | d      }|t        j                  j                  g | j                        k(  sJ |j                  | ddg      j                         }|ddgik(  sJ |j                  | ddgt        j                  d      dkD        j                         }|dddgik(  sJ |j                  | d	dg      j                         }|dt        t        d
            dz  ik(  sJ t        | j                               }|j                  ddg      j                         }|ddgik(  sJ |j                  d	dg      j                         }|dt        t        d
            ik(  sJ y )Nr   rb   r   r]   r)   rv   r*   r   rY      r   )r   r?   rE   rk   r=   r2  r   r@   rh   r/   r2   rK   )rS   r   r7  rT   s       r   	test_headrw  u  s{     !,FRXX**2gnn*EEEE  !eW =GGIFeaS\!!!  !eW(*!(; ! ==FY[ eaV_$$$  $ @JJLFeT%(^a/0000G))+,H]]1ug].88:FeaS\!!!]]4%]1;;=FeT%(^,,,,r   c                    t        | j                               }ddgt        j                  ddg      fD ]9  }|j	                  |      j                  |      }|j                  ||      |k(  r9J  t        j                  t              5  |j                  |t        j                  dg             d d d        ddgt        j                  ddg      fD ]7  }|j                  | |      |j	                  |       j                  |      k(  r7J  t        j                  t              5  |j                  | t        j                  dg             d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr   rY   r   rS  r   )	r2   rK   r?   r~   r   r   r  r  
IndexError)rS   r   rT   r   expecteds        r   	test_taker{    sX   G))+,HFBHHaV,- B!**8499'B""8W5AAAB 
z	" 5Hbhhsm45 FBHHaV,- P""W!/!8!8!A!F!Fw!OP 	P PP 
z	" 5GRXXrd^45 55 55 5s   
'E''E#E #E,c                    t        | j                               }|j                  |      dk(  sJ |j                  |t        j                  d      dk(        dk(  sJ |j                  |       dk(  sJ |j                  | t        j                  d      dk(        dk(  sJ |j                  | t        j                  d      dk\        dk(  sJ |j                  | t        j                  d      d	k        d	k(  sJ y )
Nr   r]   r{   r   r   r   r   rY   r   )r2   rK   r   r   r@   )rS   r   rT   s      r   test_count_rowsr}    s   G))+,H$$X.!333$$%A- % /234 4 4 $$W-333$$)Q. % 0345 5 5 $$WRXXe_5I$JaOOO$$WRXXe_q5H$IQNNNr   c                      t         j                  t         j                  t         j                  g} | D ]+  }t	        j
                  t              5   |        d d d        - y # 1 sw Y   8xY wr   )r   
FileFormatr^  Partitioningr  r  r  )classesklasss     r   test_abstract_classesr    sZ    




G
  ]]9% 	G	 		 	s   A!!A*	c                  N   t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t
        j                  t
        j                  t
        j                  fD ]:  } ||       }t        |t
        j                        sJ | ||       k(  sJ |dk7  r:J  t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t        j                  |       }t        |j                        dk(  sJ t        d |j                  D              sJ |j                  d      }t        |t
        j                        sJ t        j                  d      d	k(  t        j                  d      d
k(  z  }|j!                  |      sJ t#        j$                  t         j&                        5  |j                  d       d d d        |j                  d      }t        j                  d      d	k(  }|j!                  |      sJ |t        j                  | d      k7  sJ t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t        j                  | d      }t        |j                        dk(  sJ t        d |j                  D              sJ |j                  d      }t        j                  d      t        j(                  d      k(  t        j                  d      t        j(                  d	      k(  z  }|j!                  |      sJ |j                  d      }t        j                  d      j+                         t        j                  d      t        j(                  d	      k(  z  }|j!                  |      sJ dD ]?  }t#        j$                  t         j&                        5  |j                  |       d d d        A |t        j                  | d      k7  sJ t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t        j                  |       }t        |j                        dk(  sJ t        d |j                  D              sJ |j                  d      }t        |t
        j                        sJ t        j                  d      d	k(  t        j                  d      d
k(  z  }|j!                  |      sJ t#        j$                  t         j&                        5  |j                  d       d d d        |t        j                  | d      k7  sJ t        j                  t        j                  dt        j                               t        j                  dt        j,                  t        j.                         t        j0                                     g      } t        j                  | dt        j2                  g d      i      }|j                  d   J |j                  d   j5                         g dk(  sJ |t        j                  | d       k7  sJ t        j                  t        j                  t        j                  dt        j                               t        j                  dt        j,                  t        j.                         t        j0                                     g      dt        j2                  g d      i      }|j                  d   J |j                  d   j5                         g dk(  sJ t        j6                  t        j2                  t9        d            t        j2                  d  t9        d      D              t        j2                  d!gd"z  d#gd"z  z         gg d$%      }t        j                  d&t        j0                         fg      }t
        j                  t
        j                  t
        j                  fD ]|  }t;        j<                         5 } ||      }t        j>                  ||d'|(       t        j@                  |d'|(      }	|	jC                         }
|
j!                  |      sJ 	 d d d        ~ t;        j<                         5 }t        j                  |      }t        j>                  ||d'|(       d }	t#        j$                  tD        d)*      5  t        j@                  |d'tG        d      (      }	d d d        |	J 	 d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   HxY w# 1 sw Y   KxY w# 1 sw Y   y xY w)+Nr]   r^   zother objectr   r   rv   c              3   $   K   | ]  }|d u  
 y wr   r   .0r  s     r   	<genexpr>z$test_partitioning.<locals>.<genexpr>       <QqDy<   z/3/3.14/rY   gQ	@z/prefix/3/aaaz/3/nonesegment_encodingalphabetaxyz)null_fallbackc              3   $   K   | ]  }|d u  
 y wr   r   r  s     r   r  z$test_partitioning.<locals>.<genexpr>  r  r  z/alpha=0/beta=3/r   z/alpha=xyz/beta=3/)z/alpha=one/beta=2/z/alpha=one/z
/beta=two/otherc              3   $   K   | ]  }|d u  
 y wr   r   r  s     r   r  z$test_partitioning.<locals>.<genexpr>  r  r  z3_3.14_prefix_3_aaa_)firstsecondthirddictionariesr      c              3   D   K   | ]  }t        j                            y wr   randomr  rr  s     r   r  z$test_partitioning.<locals>.<genexpr>       %I!fmmo%I    r[   r   r\   r  f2r   namesr   ipcr   r   z,Expected Partitioning or PartitioningFactoryr   )$r?   r=   r@   rB   rC   r   r   HivePartitioningFilenamePartitioningrM   r  r   r  allparse
Expressionr	  r  r  r_  r  is_nullrz   r   rD   r~   rf  r   r/   tempfileTemporaryDirectorywrite_datasetrS   r   rL  int)r=   r  r   exprrz  
shouldfailr   partitioning_schematempdir	load_backload_back_tables              r   test_partitioningr    s+   YY

#


% F **B,?,?))+ .V},888uV},,,~---. YY
"((*%


% F ++F3L|(()Q...<,";";<<<<j)DdBMM***!Q&288E?d+BCH;;x   	r	' ,?+, e$Dxx A%H;;x   233FVTTTTYY
"((*%
$ F &&vUCL|(()Q...<,";";<<<<01D	'	biil	*	&	RYYq\	)	+  ;;x   23D	'		"	"	$(8BIIaL(H	I  ;;x   I +
]]2??+ 	+z*	+ 	++ 2..vWMMMMYY
"((*%


% F **62L|(()Q...<,";";<<<<i(DdBMM***!Q&288E?d+BCH;;x   	r	' ,?+, 2226FSSSSYY
"((*%
bggi=> F ++eRXX.J%KLL $$Q'///$$Q'113 8$ $ $ $233FNNNN**
		HHWbhhj)HHUBMM"'')RYY[AB
 	
 28889
L $$Q'///$$Q'113 8$ $ $ $ HH
rRXX%IuRy%II
#sebj()+ #E
 ))fbiik%:$;<**B,?,?))+ 	1((* 	1g !45LUG$)F

750<>I'002O"))%000	1 	1	1 
	$	$	& !'//0CD
 %L	B	]]:!OQ 	O

75s1vNI	O    ! !A, ,<	+ 	+$, ,L	1 	1	O 	O! !sV   gg(
g5AhAh#"hhg%(g2	5g?h	h	hh$c           
      $   t        j                  t        j                  dt        j                               t        j                  dt        j                               g      }t        j                  |      t        j                  |      t        j                  |      t        j                  |d      t        j                  |d      t        j                  |dd      g}|D ]'  }| j                  | j                  |            |k(  r'J  y )Nr]   r^   r  r  r  )r  r  )r?   r=   r@   rB   rC   r   r   r  r  loadsdumps)pickle_moduler=   partsr   s       r   test_partitioning_picklingr  ;  s    YY

#


% F
 	  (
F#
'
  &A
@
FV5QE  F""=#6#6t#<=EEEFr   z@flavor, expected_defined_partition, expected_undefined_partition))r  )zfoo=A/bar=ant%20bee r  r  )r   )z	A/ant beer  r  )r  )r  z
A_ant bee_)r  rr  flavorexpected_defined_partitionexpected_undefined_partitionc                    t        j                  dt        j                         fdt        j                         fg      } t        t        |       |      }|j                  t        j                  d      dk(  t        j                  d      dk(  z        |k(  sJ |j                  dj                  |            j                  t        j                  d      dk(  t        j                  d      dk(  z        sJ |j                  t        j                  d      dk(  t        j                  d      dk(  z  t        j                  d      dk(  t        j                  d      dk(  z  z        |k(  sJ |j                  t        j                  d      dk(  t        j                  d      dk(  z  t        j                  d      dk(  t        j                  d      dk(  z  z        |k(  sJ | dk7  rVt        j                  t         j                  d	      5  |j                  t        j                  d      dk(         d d d        y |j                  t        j                  d      dk(        d
k(  sJ y # 1 sw Y   y xY w)Nfoobarrb   zant beeAr   r  zDNo partition key for foo but a key was provided subsequently for barr   )zbar=ant%20beer  )r?   r=   rD   getattrr   r   pcr@   r  joinr	  r  r  r_  )r  r  r  r  r   s        r    test_dataset_partitioning_formatr  M  sD    ))eRYY[%9E299;;O$PQ&72v&.ABL 	RXXe_	9bhhuoQT>TUV%	&	&
 chh'ABCJJ	%C	BHHUOy$@A   	hhuo*rxx#/EF9,%C1GHJ	
 &		&	& 	hhuo*rxx#/EF9,%C1GHJ	
 (		(	( ## ]]OO(
 	B "((5/Y">A	B 	B ""RXXe_	%AD I
 
 	
 
	B 	Bs   (I22I;c                     t        j                  t        j                  g dg dd            } t        j                  d      }t        j                  d      }| j                  |dz   ||z
  |dz  |j                  d      |z  d	
      }t        j                  g dg dg dg dd	      }|j                  |      sJ y )Nr   rv   rY   )rv   rv   rv   rZ   r[   r\   r   rv   rC   )za+1zb-aza*2za/br)   rv   rY   r{   )r   r   rK  )rv   r{      )      ?r!  g      ?)r   rS   r?   r   r@   r   castr	  )rS   r[   r\   r7  rz  s        r   $test_expression_arithmetic_operatorsr    s    jj		"BCDG
A
A1u1u1uvvi 1$	' F xx H =="""r   c                  |   dD  cg c]  } t        j                  |       | k(   c} \  }}}t        j                  |      ddik(  sJ t        j                  |      t        j                  |      k(  sJ t        j                  ||z  |z        dD  ci c]  } | |  c} k(  sJ t        j                  d      dk\  }t        j                  |      i k(  sJ t        j                  ||z        ddik(  sJ t        j                  d      j	                         }t        j                  |      dd ik(  sJ y c c} w c c} w )Nabcr[   r|   rY   )r   r@   get_partition_keys_get_partition_keysr  )fr[   r\   r}   nopenulls         r   test_partition_keysr    s    )./Arxx{a/GAq!  #Sz111  #r'='=a'@@@@  Q+e/D1/DDDD88C=AD  &",,,  T*sCj88888C=  "D  &3+555 0 0Es   D4
D9c                     t        j                         } t        j                  ddg      }t        j                  d      }t        j                  t        j                               }t        j                  t        j                        }| j
                  t               k(  sJ |j
                  ddhk(  sJ | j                  dk(  sJ |j                  dk(  sJ | j                  t        j                         k(  sJ |j                  t        j                         k(  sJ | j                  t        j                  u sJ |j                  t        j                  u sJ | | k(  sJ | |k7  sJ | |k7  sJ | |k7  sJ | |k7  sJ d |_        |j                  t        j                         k(  sJ | |k(  sJ t        j                         |_        |j                  t        j                         k(  sJ | |k7  sJ t        j                  |_
        |j                  t        j                  u sJ || k(  sJ t        j                  |_
        |j                  t        j                  u sJ || k7  sJ y )	Nr[   r\   dictionary_columnsmscoerce_int96_timestamp_unitbinary_type	list_typens)r   ParquetReadOptionsr?   binary_viewLargeListTyper  r   r  r  binaryr  ListTypelarge_binary)opts1opts2opts3opts4opts5s        r   test_parquet_read_optionsr    s   !!#E!!c3Z@E!!dCE!!bnn.>?E!!B,<,<=E##su,,,##Sz111,,444,,444		+++ 0000??bkk)))??b.....E>>E>>E>>E>>E>>E		+++E>>)E 1111E>>kkEO??bkk)))E>>&&EO??b.....E>>r   c                     t        j                         } t        j                  dh      }t        j                  d      }t        j                  t        j                               }t        j                  t        j                        }| j
                  t        j                         k(  sJ |j
                  t        j                  dg      k(  sJ |j
                  t        j                  d      k(  sJ |j
                  t        j                  t        j                               k(  sJ |j
                  t        j                  t        j                        k(  sJ y )Nr[   r  sr  r  r  )r   r   r?   r  r  read_optionsr  )pff1pff2pff3pff4pff5s        r   %test_parquet_file_format_read_optionsr    s"   !DC59DC@DBNN,<=D"*:*:;D 5 5 7777 5 5# OOOO 5 5$'!) ) ) ) 5 5NN$!& & & & 5 5""!$ $ $ $r   c                     t        j                         } t        j                  d      }t        j                  dd      }t        j                  dd      }t        j                  dd	
      }t        j                  d      }t        j                  ddd      }t        j                  d|      }| j                  du sJ | j
                  dk(  sJ t               r| j                  du sJ | j                  dk(  sJ | j                  dk(  sJ | j                  du sJ |j                  du sJ |j
                  dk(  sJ t               r|j                  du sJ |j                  du sJ |j
                  dk(  sJ t               r|j                  du sJ |j                  du sJ |j
                  dk(  sJ t               r|j                  du sJ |j                  dk(  sJ |j                  d	k(  sJ |j                  du sJ t               r|j                  du sJ |j                  |k(  sJ |j                  | j                  k7  sJ | | k(  sJ | |k7  sJ ||k7  sJ ||k7  sJ || k7  sJ || k7  sJ || k7  sJ y )N   buffer_sizei    T)r  use_buffered_streamF)r  
pre_bufferi@ i )thrift_string_size_limitthrift_container_size_limitpage_checksum_verificationrv  )hole_size_limitrange_size_limitlazy)r   cache_optionsi @B )r   ParquetFragmentScanOptionsr?   CacheOptionsr  r  r   r   r  r  r  r  )r  r  r  r  r  opts6
cache_optsopts7s           r   test_parquet_scan_optionsr    s   ))+E))d;E))t5E))eNE))!'$*-E ))#')EdDJ))TTE$$---%%%4'''))[888,,	999++u444$$---%%%4'''$$,,,%%%4'''$$---%%%5((())V333,,666++t3334'''*,,,%"5"5555E>>E>>E>>E>>E>>E>>E>>r   c                    t        j                         t        j                         t        j                  t        j                  j                  dd            t        j                  t        j                  j                  ddg            t        j                  t        j                  j                  dd	            t        j                         t        j                  t        j                  j                  dd
            t        j                  t        j                  j                  dd            g}	 |j                  t        j                                t        g|j                  t        j                         t        j                  dh      t        j                  d      t        j                  dddd      g       |D ]'  }| j                  | j!                  |            |k(  r'J  y # t        $ r Y w xY w)N	T)	delimiterignore_empty_linesrY   r  )	skip_rowsre  r  i   )r  
block_sizeignorenewlines_in_valuesunexpected_field_behavior)parse_optionsF   r   r  r[   r  )r  r  {   i  )r  r  r  r  )r   r  CsvFileFormatr?   csvParseOptionsReadOptionsJsonFileFormatjsonr0   OrcFileFormatImportErrorrl   extendr   r  r  )r  formatsr  s      r   test_file_format_picklingr)  %  s   


,,t@D - F 	G
bff&8&8ug '9 '/ 	0
bff&8&8E '9 '+ 	,

''..$IQ / S	T 	rww':':" (; (. 	/G r'')* 
~  "  SE:  T:  $( ),,/		

 
	  T""=#6#6{#CDSSST!  s   #G> >	H
	H
c                    t        j                         t        j                  t        j                  j	                  d            t        j                  t        j                  j                  d            t        j                         t        j                  t        j                  j                  dd	            t        j                  t        j                  j                  dd
            g}t        ;|j                  t        j                  d      t        j                  d      g       |D ]'  }| j                  | j                  |            |k(  r'J  y )NT)strings_can_be_nullconvert_options   r  r  Ferrorr  i   r  r  r  )r   )r   CsvFragmentScanOptionsr?   r   ConvertOptionsr"  JsonFragmentScanOptionsr$  r!  rl   r'  r
  r  r  )r  r   options      r   #test_fragment_scan_options_picklingr5  L  s'   
!!#
!!FF11d1K	M
!!++u+=	?
""$
""GG  E;B ! D	E 	"",,#,N	PG 
~))d;))T:
 	
  J""=#6#6v#>?6IIIJr   paths_or_selectorr   r   r   r   r   c                 x   t        j                  t        j                  dh      |      }t        j                  d      }t        j                  t        j                  t        j                  dt        j                               t        j                  dt        j                               g            |_
        |j                  dk(  sJ |j                  ddgk(  sJ |j                  d	u sJ t        j                  | |||      }|j                         }|j                         j!                  t        j                  t        j                  d
t        j"                               t        j                  dt        j$                               t        j                  dt        j&                  t        j                         t        j                                     t        j                  dt        j"                               t        j                  dt        j(                  t        j"                         t        j                         d            t        j                  dt        j                               t        j                  dt        j                               g      d	      sJ t+        |j-                         t.              sJ t+        |j1                  |      t         j2                        sJ |j4                  j!                  t        j6                  d            sJ |j1                         }t+        |t         j2                        sJ |j9                         }t        j:                  g dt        j"                               }	t        j:                  g dt        j$                               }
t
        j<                  j?                  t        j:                  g dt        j                               t        j:                  djA                         t        j                                     }t        j:                  tC        d      D cg c]  }|dz  tE        |dz        d c}      }|jG                         }tI        |ddgddg      D ]M  \  \  }}}}t        j:                  |gdz  t        j                               }t        j:                  |gdz  t        j                               }t        j:                  |dz
  gdz  t        j"                               }|jJ                  J |jL                  dk(  sJ |d   j!                  |	      sJ |d   j!                  |
      sJ |d   j!                  |      sJ |d   j!                  |      sJ |d   j!                  |      sJ |d   j!                  |      sJ |d   j!                  |      rNJ  |jO                         }t+        |t
        jP                        sJ tS        |      dk(  sJ |jL                  dk(  sJ y c c}w )Nr_   r  )r  r   r   r   r   .rr  Fr]   r^   r`   ra   rZ   check_metadataTr   rw   z	0 1 2 3 4r   rY   r   rv   r"  r#  rS  r   r{   r  r   )*r   r   r  r   r   r?   r=   r@   r   rD   r   partition_base_dirselector_ignore_prefixesexclude_invalid_filesr   rA  r	  rB   rC   rz   ra   rM   inspect_schemasrh   r   r  r   r  r   r~   r   r   splitr/   r_   r.  r  r  rT  r   rE   r   )rn   r6  r   r   r   r   inspected_schemarS   r   r4  r5  expected_strr:   expected_structiteratorrt   rT   r   r   expected_groupexpected_keyexpected_constr   s                          r   test_filesystem_factoryrG  e  s    !!**ugFF
 ))(3G33
		HHWbhhj)HHUBIIK(
 	G %%111++Sz999((E111))!67G (??##BII

#


%
bhhj"))+>?
"((*%
299288:+-99;&8 9 	:
"((*%
		$	/ 	%  $ 	 	 	 g--/666gnn%56**, , ,!!((4999nnGgr33444ooG88O"((*=L88O"**,?L%%11
rxxz2
""$299;7L hh).q 3$% '(!e#a!e* =  3 4O##%H),X1vu~)N -%5#5'A+BHHJ?xx			<519+/
C,,888  A%%%Qx|,,,Qx|,,,Qx|,,,Qx~...Qx///Qx~...Qx|,,,- EeRXX&&&u:!!!+ 3s   X7c                    t        j                         }t        j                  d| |      }|j                  D ]  }|j	                  ||       }|j
                  dgk(  sJ |j	                  || dg      }||fD ]P  }t        |t         j                        sJ |j                  |k(  sJ t        |j                  t        |             rPJ  |j
                  dgk(  rJ  y )N/plainr   r   r   r  )r   r   rS   r
  r  r  rM   r  rq   r   rx   )r   parquet_formatrS   rq   rT   row_group_fragmentr  s          r   test_make_fragmentrN    s    ))+Njjm .0G  
4!//mD""qc)))+99$FGS : J./ 	AAa!7!788866T>!>allD,?@@@	A ",,333
4r   c           	      r   | \  }}}}}}}}t        j                         }	|g}
|
D cg c]  }|	j                  ||       }}t        j                  ||	|j                  |      }|j                         }|j                  |      sJ |j                  D cg c]'  }|j                  j                  |      j                  ) }}t        |
|      D cg c]  \  }}|	j                  |||       }}}t        j                  ||	|j                  |      }|j                         }|j                  |      sJ |
D cg c]  }d }}t        |
|      D cg c]  \  }}|	j                  |||       }}}t        j                  ||	|j                  |      }t        j                  t        j                  j                   d      5  |j                         }ddd       |
D cg c]  }d }}t        |
|      D cg c]  \  }}|	j                  |||       }}}t        j                  ||	|j                  |      }t        j                  t"        d      5  |j                         }ddd       yc c}w c c}w c c}}w c c}w c c}}w # 1 sw Y   xY wc c}w c c}}w # 1 sw Y   yxY w)	z
    Test passing file_size to make_fragment. Not all FS implementations make use
    of the file size (by implementing an OpenInputFile that takes a FileInfo), but
    s3 does, which is why it's used here.
    )r   r=   r   )	file_sizer   zParquet file size is 1 bytesr   Nr	  zHTTP status 416)r   r   r  r  r=   r   r	  r
  r   get_file_infosizer  r  r  r   libr_  OSError)s3_example_simpler   rq   rc   urihostport
access_key
secret_keyr  r   r  rS   tblr  
sizes_truerR  fragments_with_sizedataset_with_sizesizes_toosmallsizes_toolarges                        r   test_make_fragment_with_sizera    s    @Q<E4S$j*&&(KFE #$ **44 $I $""+ellrG 


C::e ELMMRq'$$2215::RJR-0
-CE)tT '44T24N E E,,KQS 


C::e %**Da*N*-0-GI)tT '44T24N I I ,,KQS 
w{{//7U	V -!**,- +00$g0N0-0-GI)tT '44T24N I I ,,KQS
 
w&7	8 -!**,- -S$ SE +I- - 1I- -sA   I;,J J>	JJJ(	J"J'!J-J-J6c                 <   t        j                  d      }t        j                  |j	                  d            }t        j                         }|j                  |      }t        |j                         t        j                        sJ t        j                  g dg dg dgg d      }| j                  |      j                  |      sJ |j                  |j                  |            }| j                  |      j                  |j                               sJ y )NzT
        alpha,num,animal
        a,12,dog
        b,11,cat
        c,10,rabbit
    utf-8r[   r\   r}         r   dogcatrabbitr  numanimalr  )textwrapdedentr?   	py_bufferencoder   r  r  rM   rL   BufferReaderr   r   r	  r  r  )r   r  contentbuffer
csv_formatrT   rz  pickleds           r   "test_make_csv_fragment_from_bufferrx    s    oo  	G \\'..12F!!#J''/H hmmor777xx%13 9:H ""8,33H===!!-"5"5h"?@G""7+2283D3D3FGGGr   c                    d}t        j                  |j                  d            }t        j                         }|j                  |      }t        |j                         t         j                        sJ t        j                  g dg dg dgg d      }| j                  |      j                  |      sJ |j                  |j                  |            }| j                  |      j                  |j                               sJ y )Nz{"alpha" : "a", "num": 12, "animal" : "dog"}
{"alpha" : "b", "num": 11, "animal" : "cat"}
{"alpha" : "c", "num": 10, "animal" : "rabbit"}
rc  rd  re  rh  rl  r  )r?   rq  rr  r   r#  r  rM   rL   rs  r   r   r	  r  r  )r   r  rt  ru  json_formatrT   rz  rw  s           r   #test_make_json_fragment_from_bufferr{    s    <G \\'..12F##%K((0H hmmor777xx%13 9:H ""8,33H===!!-"5"5h"?@G""7+2283D3D3FGGGr   c                 \   t        j                  g d      t        j                  g d      t        j                  g d      g}|d   j                         |d   |d   j                         g}t        j                  t        j
                  ddg	      d
d      }|t        j                         f||fg}|D ]p  \  }}t        j                  |g d      }t        j                         }t        j                  ||       |j                         }	|j                  |	      }
| j                  |
      j                  |      sJ |j                  |j                  |
            }| j                  |      j                  |      sJ t        j                   |	      }|j                  |      }
|
j#                         }t%        |t         j&                        sJ |j(                  sJ t        j*                  |
j#                               j-                         j                  |      rqJ  y )Nrd  re  rh  r   r   rv   r  rn  r  Tr  )r  r  r  rl  r  )r?   r~   dictionary_encoder   r   r  r   BufferOutputStreamrl   rm   getvaluer  r   r	  r  r  rs  rL   rM   rN   rQ   ParquetFileread)r   r  arraysdictionary_arraysdictionary_formatcasesformat_r   rr   ru  rT   rw  	file_likeopened_files                 r   &test_make_parquet_fragment_from_bufferr  2  s    	!

)*F 	q	##%q	q	##%
 ,,** '2
 ! 
%%'(	-.E ! D'AB##%
uc"((0&&x077>>>%%m&9&9(&CD&&w/66u=== OOF+	((3mmo+r}}555####~~hmmo.335<<UCCC'Dr   c                     t        j                  t        d      dgdz  dgdz  dgdz  z   gg d      }t        | dz        }t	        j
                  ||d	g|
       t        j                  |dd|      }||fS )NrP  r   r[   r{   r\   r  r  test_parquet_datasetr   )partition_cols
chunk_sizer   r   )r   r   r   )r?   r   r/   r_   rl   write_to_datasetr   rS   )r  r  r   r   rq   rS   s         r   _create_dataset_for_fragmentsr  `  s    HH	qA37SEAI	12"E
 w//0Dt(.xJHjjYV
G '>r   c                    t        |       \  }}t        |j                               }t        |      dk(  sJ |d   }ddg}|j                  j
                  |k(  sJ |j                  j                  |j                  |j                        |j                  k(  sJ |j                  j                  t        j                  d      dk(        sJ |j                  |      }|j                  |k(  sJ |j                  |j!                  d      j#                  dd            sJ |j                  ||j$                        }|j                  g d	k(  sJ |j                  |j#                  dd            sJ |j                  |j$                  j'                  d      k(  sJ |j                  ||j$                  t        j                  d      dk  
      }|j                  g d	k(  sJ y )Nrv   r   r  r  r   r[   r{   rb   r  )r=   r   )r  rh   rK   r   physical_schemar  r   rA  rq   r   r  r	  r   r@   r   re  remove_columnslicer=   remove)r  r   r   rS   r  r  physical_namesr7  s           r   test_fragmentsr  r  s   27;NE7 W**,-Iy>Q!AD\N""n44488AFFALL1Q5F5FFFF!!((&)9S)@AAA $$Q'F.000==,,Q/55a;<<< $$Qw~~$>F"6666==Q*+++ 4 4Q 7777 $$	'..$!); % =F"6666r   c                    t        j                  t        d      dgdz  dgdz  z   gddg      }t        | dz        }t	        j
                  ||dg	       t        j                  t        j                  d
g      d      }t        j                  |d|      }|j                  t        j                  d      dk\        }t        t        |            dk(  sJ y )NrP  r   r{   rv   colr   r  r  r  )r   r   r   r  r   r  r   )r?   r   r/   r_   rl   r  r   r   r=   rS   rK   r@   r   rh   )r  r   rq   r   rS   r  s         r   test_fragments_implicit_castr    s     HHeAha1#' 125&/JEw//0DtVH=??299&6%78HDjjidCG%%RXXf-=-B%CItI1$$$r   c                 .  
 t        |       \  
}	 d
fd	}t        |j                               d   }|j                  }|j	                  |j                  |            }|j                  |      |j                  |      k(  sJ |j                  |j                  |j                  |j                        }|j                  |      j                  |j                  |            sJ  ||d       |j                  |j                  |j                  |j                        } ||dt        j                  d      dk         |j                  |j                  |j                  |j                        } ||ddgt        j                  d      d	k  
       |j                  |j                  |j                  |j                        } ||dt        j                  d      dk(         d|j                  j                  ddd      z   }	t!        j"                  t$        |	      5  |j                  |j                  |j                  |j                        }|j                  |t        j                  d      dk(         d d d        y # 1 sw Y   y xY w)Nc                     | j                  j                  ||      }|r|nj                  }|j                  |k(  sJ  j                  | j	                  |      }|j                  |      sJ y )Nr=   r*   r   )r   r=   re  r  selectr	  )rT   	row_slicer*   r   actualre  rz  r   s          r   assert_yields_projectedz;test_fragments_reconstruct.<locals>.assert_yields_projected  st    ""<< # A")wu/A/A""l2225;;	*11,?}}X&&&r   r   )r  )r   r{   )r   rv   r  rv   r   r(  ru  r   r[   z&No match for FieldRef.Name\(part\) in Fr   NN)r  rh   rK   r   r  r  r   r  rq   r   r  r	  r   r@   r  	to_stringr  r  rL  )r  r   r  rS   r  rT   rL  pickled_fragmentnew_fragmentpatternr   s             @r   test_fragments_reconstructr    s}   27;NE7 6:' G))+,Q/H__N %**=+>+>x+HI""+44X>? ? ? "//x**%:: 0 <L ""<077)+ + +L&1 "//x**%:: 0 <L L&$!9KL "//x**%:: 0 <L L&%)F288D>C3GI "//x**%:: 0 <L L&#%88F#3s#:<
 9''11%FGG	z	1 N%33MM8..!)!>!> 4 @ 	RXXf5E5LM	N N Ns   &AJJc                    t        | d      \  }}t        |j                               d   }t        |j                               }t	        |      |j
                  cxk(  rdk(  sJ  J |j                  |d   |j                        }|j                  g dk(  sJ t	        |      dk(  sJ |j                  |j                  dd            sJ |d   j                  J |d   j
                  dk(  sJ |d   j                  d   j                  dddddddk(  sJ t        |j                  t        j                  d	      dk  
            d   }t        |j                  t        j                  d	      dk              }t	        |      dk(  sJ |j                  |d   t        j                  d	      dk  
      }t	        |      dk(  sJ y )Nrv   r  r   rb   r  r   minmaxr  r  r  r   )r  rh   rK   r  r   r  r   r=   re  r	  r  r  
statisticsr   r@   )r  r   r   rS   rT   r  r7  s          r   !test_fragments_parquet_row_groupsr    s   27qINE7G))+,Q/H x::<="#x'>'>C!CCCCC$$Aw~~ % 7F"6666v;!==Q*+++q!,,888q!00A555q!,,Q/::""?   
 G))$!1C)DEaHHx::288D>A;MNO"#q((($$Arxx~'9 % ;Fv;!r   c                    t        j                  dt        d      i      }t        j                  || dz  d       t        j                  | dz  d      }t        |j                               d   }|j                  j                  |j                  |j                  d	d
g      }|j                  dk(  sJ |j                          |j                  dk(  sJ t        |j                         dk(  sJ y )Nr[   rP  test.parquetrv   row_group_sizer   r  r   r   rY   rK  )r?   r   r/   rl   rm   r   rS   rh   rK   r   r  rq   r   r  ensure_complete_metadatar   r  )r  r   rS   original_fragmentrT   s        r   %test_fragments_parquet_num_row_groupsr    s    HHc58_%ENN5'N21Ejj>1)DGW2245a8 !''55 1 < <q6 6 H ""a'''%%'""a'''x""#q(((r   c                    t        j                  t        ddgddg            }|d   j                  d      |d<   t	        j
                  t        j                  |      | dz         d	d lm	}  |j                  | dz        }|j                  | |j                  d      dk(  
      }|j                  d	   |j                         k(  j                         j                         sJ y )Nr[   r\   r   rv   )col1col2r  categoryztest_filter_dictionary.parquetr   r   )r3   r4   dictastyperl   rm   r?   r   pyarrow.datasetrS   r   r@   r   	to_pandasr  )r  r   rH   r   rS   r7  s         r   ,test_fragments_parquet_row_groups_dictionaryr    s     
dc
!Q8	9BF"":.BvJNN288B<+K!KL bjj#CCDG$$WXRXXf5E5L$MFGGAJ&**,,11377999r   c                    |\  }}t        | d|      \  }}t        |j                               d   } ||j                  g      5  |j	                          d d d        |j
                  ddgk(  sJ  |g       5  |j	                          d d d        t        |j                  t        j                        sJ |j                  j                  |j                  |j                  ddg      }|j
                  |j
                  k(  sJ |j	                          |j
                  d   }	|	j                  dk(  sJ |	j                  dk(  sJ |	j                  J |j!                  |j#                  |            }
 ||j                  g      5  |
j
                  ddgk(  sJ |
j
                  d   }	|	j                  dk(  sJ |	j                  J 	 d d d        y # 1 sw Y   xY w# 1 sw Y   ZxY w# 1 sw Y   y xY w)Nrv   r  r   r   r   rK  )r  rh   rK   rq   r  r  rM   metadatarl   FileMetaDatar   r  r   idr   r  r  r  )r  r   r  rc   r   rr  rS   rT   r  	row_groupr  s              r   &test_fragments_parquet_ensure_metadatar    s   &B.A"JAw G))+,Q/H 
x}}o	& ,))+,1a&((( 
b	 ,))+, h''999 ??00x**1v 1 L ""h&9&9999 ))+''*I<<1"""+++ %**=+>+>|+LM	x}}o	& 0**q!f444$//2	||q   ##///	0 03, ,
, ,(0 0s%   G:G#
AG0G #G-0G9c                 X   |\  }}t        | |      \  }}t        |j                               d   } |g       5  |j                  |j	                  |            }d d d         |j
                  g      5  |j                  }	d d d        	dgk(  sJ y # 1 sw Y   :xY w# 1 sw Y   xY w)Nr   r   r   )r  rh   rK   r  r  rq   r  )
r  r   r  rc   r   rr  rS   rT   r  r  s
             r   )test_fragments_parquet_pickle_no_metadatar  F  s     'B.w2FJAwG))+,Q/H 
b	 N(..}/B/B8/LMN 
',,-	. 1%00
1!N N1 1s   !B6B B B)c                 &   t        j                  t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j
                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                   d            t        j                  g dt        j                   d            t        j                  g dt        j                   d            t        j                  g dt        j"                               t        j                  g dt        j$                               t        j                  g dt        j&                  d            t        j                  g dt        j(                  d            gg d	
      }t+        | dz        }t-        j.                  |||       |t1        j2                  |dd      fS )N)TNF)r   r   *   )r!  g      $@      E@)r[   Nzr  r  us)r   r   l    jt )booleanr   uint8r   r   r   uint32rB   uint64r1   doubleutf8r  ts[s]ts[ms]ts[us]rA   date64time32time64r  test_parquet_dataset_all_typesr  r   r   r  )r?   r   r~   bool_r   r  r   r   r   r  rB   r  float32rC   r  r  	timestamprA   r  r  r  r_   rl   r  r   rS   )r  r  r   rq   s       r   _create_dataset_all_typesr  X  s   HHHH("((*5HH["''),HH["((*-HH["((*-HH["))+.HH["((*-HH["))+.HH["((*-HH["))+.HH&

5HH&

5HH%rwwy1HH%ryy{3HH[",,s"34HH[",,t"45HH[",,t"45HH["))+.HH("))+6HH["))C.1HH["))D/2)	
,
/-E^ w99:D t
;"**T)&IIIr   c                 D   t        |       \  }}t        |j                               d   }dd lfd}fd}fd}j                  }j
                  }t        |j                               }	|	d   j                  J |	d   j                  d   }
|
j                  dk(  sJ |
j                  dkD  sJ |
j                  i ddd	d
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
d |d       |d      d
d  |d       |d      d
d! |d       |d      d
d" |d#dd$       |d#d$d%      d
 |d#dd       |d#d$d&      d
 |ddd       |ddd      d
 |dddd       |dddd      d
d'k(  sJ y )(Nr   c                 2     j                   ddddd|       S N  r   r   r+   r  r+   s    r   dt_sz.test_parquet_fragment_statistics.<locals>.dt_s  s     )))$1aA>>r   c           
      :     j                   dddddd| dz        S )Nr  r   r   r   r  r  s    r   dt_msz/test_parquet_fragment_statistics.<locals>.dt_ms  s&    ***4Aq!Q$GGr   c           	      4     j                   dddddd|       S r  r  r  s    r   dt_usz/test_parquet_fragment_statistics.<locals>.dt_us  s"    ***4Aq!QBBr   rY   r   r  FTr  r   r   r  r  r   r   r   r  rB   r  r1   r!  r  r  r  r[   r  r     a   zr  r  r  rA   r  rv   rf     )r  r  r  )r  rh   rK   r+   r%   timer  r  r   total_byte_sizer  )r  r   rS   rT   r  r  r  r%   r  r  r  r+   s              @r    test_parquet_fragment_statisticsr    sn    /w7NE7G))+,Q/H>GB==D==D x::<=q!,,888#A&11!4I"""$$t+++ $5.$"%$ 	2&$ 	2&	$
 	!B'$ 	2&$ 	!B'$ 	2&$ 	!B'$ 	T*$ 	#d+$ 	C($ 	$t,$ 	ab2$ 	%(595$  	%(595!$" 	$tQ*4a3DE#$$ tQ*4a3DEq!Q-Q2?q!Q*41a3DE)$   r   c                 <   t        j                  g dg dd      }t        j                  || dz  d       t	        j
                  | dz  d      }t        |j                               d	   j                         }|d
   j                  d	   j                  i k(  sJ y )N)r   r   NN)r[   r\   NNrZ   r  rv   r  r   r  r   r   )r?   r   rl   rm   r   rS   rh   rK   r  r  r  )r  r   rS   r  s       r   &test_parquet_fragment_statistics_nullsr    s    HH-4JKLENN5'N21Ejj>1)DGW**,-a0CCEIQ<""1%00B666r   c                 8   t        j                  g dg dd      d d }|j                  | dz  d       t        j                  | dz  d	      }t        |j                               d   j                         }|d   j                  d   j                  i k(  sJ y )
N)r[   r\   r\   r{   r   r  rZ   r   r  r   enginer   r  )
r3   r4   
to_parquetr   rS   rh   rK   r  r  r  )r  rH   rS   r  s       r   'test_parquet_empty_row_group_statisticsr    s     
O)<	=bq	ABMM'N*9M=jj>1)DGW**,-a0CCEIQ<""1%00B666r   c                    t        | d      \  }}t        |j                               d   }|j                  j	                  t        j                  d      dk(        sJ t        |j                  t        j                  d      dk(  |j                              }t        |      dk(  sJ t        |j                  t        j                  d      dk(  |j                              }t        |      dk(  sJ y )Nrv   r  r   r   r[   r   r=   r\   )
r  rh   rK   r  r	  r   r@   r  r=   r   )r  r   rS   rT   r  s        r   +test_fragments_parquet_row_groups_predicater    s    27qINE7G))+,Q/H((//0@C0GHHH ##288F+;s+B+2>> 	$ 	;< "#q((( ##288F+;s+B+2>> 	$ 	;< "#q(((r   c                 
   t        | d      \  }}t        |j                               d   }|j                  }t        |j	                               }|j                  |j                  |            }|j                  |      |j                  |      k(  sJ |j                  |j                  |j                  |j                  dg      }	|j                  |	      }
|
j                  |j                  |d               sJ |j                  |j                  |j                  |j                  dh      }	|j                  |	|j                  ddgt        j                  d      dk  	      }
|
j                   ddgk(  sJ t#        |
      dk(  sJ |j                  |j                  |j                  |j                  dh      }	t%        j&                  t(        d
      5  |j                  |	       d d d        y # 1 sw Y   y xY w)Nrv   r  r   )r  r  r   r  r   rY   r  zreferences row group 2r   )r  rh   rK   r   r  r  r  r   r  rq   r   r  r	  r=   r   r@   re  r   r  r  ry  )r  r   r  r   rS   rT   rL  r  r  r  r7  s              r   -test_fragments_parquet_row_groups_reconstructr    s    37qINE7G))+,Q/H__Nx::<= %**=+>+>x+HI""+44X>? ? ? "//x**%::3 0 L $$\2F==001DQ1GHIII "//x**%::3 0 L $$U\\D&>xx~! % %F 4.000v;! "//x**%::3 0 L 
z)A	B .-. . .s   G99Hc                    |\  }}t        | d|      \  }}t        |j                               d   }|j                  ddg      } |g       5  |j                  dk(  sJ |j
                  ddgk(  sJ |j
                  d   j                  J 	 d d d        |j                  |      }	|	j                         ddgddgdk(  sJ |j                  g       }|j                  dk(  sJ |j
                  g k(  sJ |j                  ||j                        }	|	j                  dk(  sJ |	j                  |d d       sJ y # 1 sw Y   xY w)	Nr   r  r   rY   row_group_idsrv   r  rb   )r  rh   rK   subsetr  r  r  r   r2  r=   r   r	  
r  r   r   rc   r   r   rS   rT   subfragr7  s
             r   !test_fragments_parquet_subset_idsr    sk    'B27q>@BNE7G))+,Q/H ooQFo3G	b	 <%%***!!aV+++!!!$//;;;< $$W-FAq!f!==== ooBo/G!!Q&&&###$$WW^^$DF??a==r###< <s   A D>>Ec                 |   |\  }}t        | d|      \  }}t        |j                               d   }|j                  t	        j
                  d      dk\        } |g       5  |j                  dk(  sJ t        |j                        dk(  sJ |j                  d   j                  J 	 d d d        |j                  |      }	|	j                         g dg ddk(  sJ |j                  t	        j
                  d      d	kD        }|j                  dk(  sJ |j                  g k(  sJ |j                  ||j                  
      }	|	j                  dk(  sJ |	j                  |d d       sJ |j                  t	        j
                  d      dk(  |j                  
      }|j                  dk(  sJ y # 1 sw Y   	xY w)Nr   r  r   r  rY   r  )r   r   r   r  r   rb   r   r[   r{   )r  rh   rK   r  r   r@   r  r   r  r  r   r2  r=   r   r	  r  s
             r   $test_fragments_parquet_subset_filterr  0  s    'B27q>@BNE7G))+,Q/H oobhhtn12G	b	 <%%***7%%&!+++!!!$//;;;< $$W-F	!CCCC oobhhtnq01G!!Q&&&###$$WW^^$DF??a==r### oobhhv.#5gnnoMG!!Q&&&'< <s   "AF11F;c                    t        | d      \  }}t        |j                               d   }t        j                  t
              5  |j                  t        j                  d      dk\  ddg       d d d        t        j                  t
              5  |j                          d d d        y # 1 sw Y   <xY w# 1 sw Y   y xY w)Nr   r  r   r  rv   r   )	r  rh   rK   r  r  rL  r  r   r@   )r  rr  rS   rT   s       r   %test_fragments_parquet_subset_invalidr	  P  s    .w1EJAwG))+,Q/H 
z	" C!+Aq6BC 
z	"  C C s   ,B-B9-B69Cc                 T   t        j                  g d      }t        j                  g d      }t        j                  g d      }t         j                  j                  ||gddg      }t         j                  j                  ||gddg      }t        j                  d	|i      }t        j                  || d
z  d       t        j                  | d
z  d      }t        |j                               d   }|j                  dk(  sJ |j                  t        j                  d	d      dkD        }	|	j                  dk(  sJ |j                  t        j                  d	d      dkD        }	|	j                  dk(  sJ |j                  t        j                  d	dd      dkD        }	|	j                  dk(  sJ |j                  t        j                  d	dd      dk        }	|	j                  dk(  sJ t        j                  t         j                   d      5  |j                  t        j                  d	d      dkD         d d d        t        j                  t"        d      5  |j                  t        j                  d	d      dkD         d d d        y # 1 sw Y   VxY w# 1 sw Y   y xY w)N)r   r   rv   rY   )皙?皙?333333?皙?r   rv   rY   r{   f21f22r  r  r  r  zdata_struct.parquetrv   r  r   r  r   r   r   zNo match for FieldRef.Nestedr   f3z)Function 'greater' has no kernel matching)r?   r~   StructArrayr   r   rl   rm   r   rS   rh   rK   r  r  r@   r  r  r_  NotImplementedError)
r  r  r  r  r  
struct_colr   rS   rT   r  s
             r   0test_fragments_parquet_subset_with_nested_fieldsr  ]  s:    
,	B
(('
(C
((<
 C		#	#S#Juen	#	EB++RHT4L+IJHHeZ()ENN5'$99!Ljj#88KGG))+,Q/H""a'''oobhhud3a78G!!Q&&&oobhhud3a78G!!Q&&&oobhhudE:Q>?G!!Q&&&oobhhudE:a?@G!!Q&&& 
r.L	M 3-123 
#N
 3 	-123 3	3 33 3s   )J )JJJ'c                    t        |j                               d   }t        |      dk(  st        |      dk(  sJ t        |       \  }}t	        j
                  |d      }t        |j                               d   }t        |      d|j                  j                  t        |             dk(  sJ | dz  }t        j                  j                  ||       t	        j
                  |d	      }t        |j                               d   }t        |      d
|j                  j                  t        |             dk(  sJ y )Nr   zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[key=xxx, group=1]>zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[group=1, key=xxx]>r   r  z*<pyarrow.dataset.ParquetFileFragment path=>data.featherfeatherz,<pyarrow.dataset.FileFragment type=ipc path=)rh   rK   repr_create_single_filer   rS   r   r   r_   r?   r  write_feather)r  rS   rT   r   rq   s        r   test_fragments_reprr    sB    G))+,Q/H 	X	(	( 	X	(	(	( &g.KE4jji0GG))+,Q/HX4,,SY7
8	;	;	; ^#DJJUD)jji0GG))+,Q/HX6,,SY7
8	;	;	;r   rw  c                     | S r   r   r  ms     r   <lambda>r"        Q r   c                 B    |j                  |j                  |             S r   r  r  r   s     r   r"  r"        QWWQWWQZ-@ r   c                    t        j                  dd      }t        j                         }t        j                  d      }t        j
                  j                  ddg      } |||      }t        |t        j                        sJ ||_	        t        j                  | |||      }|j                         }t        j                  dt        j                         fdt        j                         fdt        j                          fd	t        j                         fd
t        j"                  t        j                         t        j                          d      fdt        j$                         fdt        j                          fg      }	|j'                  |	      sJ t        j(                  j                         }
t        |
t        j                        sJ y )Nr   Tr   r   r   r]   r^   r_   r`   ra   rZ   )rc   r   r   r   r   r   discoverrM   PartitioningFactorypartitioning_factoryr   rA  r?   r=   rB   rC   rD   ra   r   r	  r  )rn   rw  r  r6  r   r   r*  r   r@  rD  hive_partitioning_factorys              r   test_partitioning_factoryr,    sp    DA!!#F))(3G33<<gu=MN"#7G*B,B,BCCC#7G ))!67G (ii	
	

				"((*	299288:BIIK@AB	"((*			! O ""?333 " 3 3 < < >/1G1GHHHr   infer_dictionaryc                     | S r   r   r   s     r   r"  r"    r#  r   c                 B    |j                  |j                  |             S r   r%  r   s     r   r"  r"    r&  r   c                 f   t        j                  dd      }t        j                         }t        j                  d      }t        j
                  j                  ddg|      } |||      |_        t        j                  | |||      }|j                         }	|rct        j                  t        j                         t        j                               }
|	j                  d      j                  |
k(  sJ |j!                         j#                         j%                         }|j'                  d      j)                  d      }t        j*                  dgd	z  d
gd	z  z         j-                         }|j/                  |      sJ |j!                         j#                  t        j                  d      dk(        }|j'                  d      j)                  d      }|j1                  dd	      }|j/                  |      sJ y |	j                  d      j                  t        j                         k(  sJ y )Nr   Tr   r   r   r-  r   r"  r   r#  r   )rc   r   r   r   r   r   r(  r*  r   rA  r?   rz   r   rD   r@   rx   r   r   combine_chunksr-  r   r~   r}  r	  r  )rn   r-  rw  r  r6  r   r   r*  r   inferred_schemaexpected_typer   r  rz  s                 r   $test_partitioning_factory_dictionaryr5    s    DA!!#F))(3G33<<	%+; = =#*+?#OG ))!674G oo'Obhhj"))+>$$U+00MAAA ))+::<e$**1-88UGaK5'A+56HHJ}}X&&&  ))%E1I)Je$**1->>!Q'}}X&&&$$U+00BIIK???r   c                     | S r   r   r   s     r   r"  r"    r#  r   c                 B    |j                  |j                  |             S r   r%  r   s     r   r"  r"    r&  r   c                 r   t        j                         }t        j                         }t	        j
                  dt	        j                         fg      }t	        j                  t	        j                  t        d            g|      }t	        j
                  dt	        j                  d      fdt	        j                         fg      }t	        j
                  dt	        j                         fdt	        j                         fg      }t	        j
                  t        |      t        |      z         }dD ]z  }	|j                  |	       |j                  |	dz         5 }
t        j                  j!                  |
|      5 }|j#                  |       |j%                          d d d        d d d        | t        j&                  d	d
      }t        j(                  d	      }t        j*                  j-                  |      } | ||      |_        t        j0                  ||||      }|j3                         }||k(  sJ |j5                         j7                  dt        j8                  d      j;                  t	        j                               i      }|d   d   j=                         dk(  sJ t        j*                  j-                  ddgd      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  |d      } | ||      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  j-                  |d      } | ||      |_        t        j0                  ||||      }tG        jH                  t        jJ                  d      5  |j3                         }d d d        t        j&                  dd
      }t        j(                  d      }t        jL                  j-                  |      } | ||      |_        t        j0                  ||||      }|j3                         }||k(  sJ |j5                         j7                  dt        j8                  d      j;                  t	        j                               i      }|d   d   j=                         dk(  sJ t        jL                  j-                  d      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        jL                  |d      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        jL                  j-                  |d      } | ||      |_        t        j0                  ||||      }tG        jH                  t        jJ                  d      5  |j3                         }d d d        y # 1 sw Y   xY w# 1 sw Y   kxY w# 1 sw Y   
xY w# 1 sw Y   y xY w)Nr]   r   rb   r%   r  rD   )z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24
/0.featherrp   Tr   date_intr)   r   逎`r  r  2021-05-04 00%3A00%3A00%24r=   r  +Could not cast segments for partition fieldr   r   )'rc   rd   r   r  r?   r=   rB   r   r~   r/   r  rD   rh   rf   rg   r  new_filerm   closer   r   r   r(  r*  r   rA  r   r   r@   r  as_pyrK   r  r	  r   r  r  r_  r  )rw  r  rn   r   r=   r   partition_schemastring_partition_schemafull_schemarp   sinkwriterr   r   r*  r   r3  r  r  r   s                       r   *test_partitioning_factory_segment_encodingrH    s    !FFYY
+,-FHHbhhuRy)*6:Eyy
",,s#	$x&=>@ ii
"))+	299; 78:))DL40@+AABK 	 	)$&&y<'?@ 	Dv. &""5)	 	 {d;H))+6G33<< = !#*+?#OG ))&(FGLGoo'Ok)))^^&&BHHV$))"((*50& F !9Q<:---33<<	V = 5#*+?#OG ))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' ++&:L"<?G))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' 33<<& = :#*+?#OG ))&(FGLG	rJ
L ,!//+,
 v6H))&1G..77 8 !#*+?#OG ))&(FGLGoo'Ok)))^^&&BHHV$))"((*50& F !9Q<:---..77 8 !#*+?#OG ))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' ..&:G))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' ..77& 8 :#*+?#OG ))&(FGLG	rJ
L ,!//+, ,_ 	 	R, ,N, ,s<   !^;"^^^ ,^-^^^	 ^*-^6c                     | S r   r   r   s     r   r"  r"  R  r#  r   c                 B    |j                  |j                  |             S r   r%  r   s     r   r"  r"  R  r&  r   c                 Z   t        j                         }t        j                         }t	        j
                  dt	        j                         fg      }t	        j                  t	        j                  t        d            g|      }t	        j
                  dt	        j                  d      fdt	        j                         fg      }t	        j
                  dt	        j                         fdt	        j                         fg      }t	        j
                  t        |      t        |      z         }t	        j
                  dt	        j                  d      fdt	        j                         fg      }	t	        j
                  dt	        j                         fdt	        j                         fg      }
d	}|j                  |       |j                  |d
z         5 }t        j                  j!                  ||      5 }|j#                  |       |j%                          d d d        d d d        t        j&                  dd      }t        j(                  d      }t        j*                  j-                  |      } | ||      |_        t        j0                  ||||      }|j3                         }||k(  sJ |j5                         j7                  dt        j8                  d      j;                  t	        j                               i      }|d   d   j=                         dk(  sJ t        j*                  j-                  d      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  |d      } | ||      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  j-                  d      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  |
d      } | ||      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  j-                  |	d      } | ||      |_        t        j0                  ||||      }tG        jH                  t        jJ                  d      5  |j3                         }d d d        y # 1 sw Y   pxY w# 1 sw Y   uxY w# 1 sw Y   y xY w)Nr]   r   rb   ztest'; dater  ztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24r9  r   Tr   r:  r)   r   r;  rV  r  z2021-05-04 00:00:00$r  r<  r=  r>  r?  r   )&rc   rd   r   r  r?   r=   rB   r   r~   r/   r  rD   rh   rf   rg   r  r@  rm   rA  r   r   r  r(  r*  r   rA  r   r   r@   r  rB  rK   r  r	  r   r  r  r_  )rw  r  rn   r   r=   r   rC  rD  rE  partition_schema_enstring_partition_schema_enrp   rF  rG  r   r   r*  r   r3  r  r  r   s                         r   ;test_partitioning_factory_hive_segment_encoding_key_encodedrO  Q  s    !FFYY
+,-FHHbhhuRy)*6:Eyy
c*	+.?-MNP ii
	%(9299;'GHJ))DL40@+AABK))
r||C0	1
%ryy{	3	56 "$
ryy{	+
%ryy{	3	5"61I
i 		"	"9|#;	< VV__T6* 	fu%LLN	 v6H))&1G..77 8 !#*+?#OG ))&(FGLGoo'Ok)))^^&&BHH]+00<0& F !9Q<:---..77 8  #*+?#OG ))&(FGLGW^^%3356IQ<,,33	-	 $9	9	#	$	+	-. . . &&%9L"<?G))&(FGLGW^^%3356IQ<,,33	-	 $9	9	#	$	+	-. . . ..77 8 !#*+?#OG ))&(FGLGW^^%3356IQ<,,33	%	&*C	C	-	.%	7	9: : : &&"V=L"<?G))&(FGLGW^^%3356IQ<,,33	%	&*C	C	-	.%	7	9: : : ..77"V 8 =#*+?#OG ))&(FGLG	rJ
L ,!//+, ,u	 	 v, ,s0   !Z7"ZZ-Z!Z	ZZ!Z*c           
         t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               t        j
                  dt        j                               g            }t        j                  t         j                        5  t        j                  || d|       d d d        y # 1 sw Y   y xY w)Nr  yNr  rR  r  rZ   r[   r\   r  r  )r?   r   r   r   r=   r@   rD   r  r  r_  r  r  r   r   s      r   /test_dictionary_partitioning_outer_nulls_raisesrU    s    HH+/BCE??
		288C-rxxRYY[/IJKMD	r	' J
DIJ J Js   6CC"c                     t        j                  g dg dd      }t        j                  t              5  t        j                  || d       d d d        y # 1 sw Y   y xY w)NrQ  rS  rZ   zbasename-{i}.arrow)r?   r   r  r  r  r   r  )r  r   s     r   test_positional_keywords_raisesrW    sJ    HH+/BCE	y	! ?
)=>? ? ?s   AA c                 0   d}t        j                  t        j                  d|dz         t        j                  |dz         d      }t        j                  |d | | dz  dg       t        j                  |d |dz    | dz  dg       t        j                  | dz  dg	      }|d   j                  dk(  sJ t        j                  | dz  dd
g	      }|d   j                  dk(  sJ t        j                  | dz  dg	      }|d   j                  dk(  sJ y )Ni   r   r   )r   r'   rJ  r   r  twor)   r'   rv   )	r?   r   repeatnparangerl   r  
read_table
num_chunks)r  
BATCH_SIZEr   s      r   test_read_partition_keys_onlyr`    s    J HHyyJN+:>*, -E kz%1 ozA~%1 MM'E/E7;E<""a'''MM'E/E73CDE<""a'''MM'E/E7;E<""a'''r   c                     t        j                  |       }t        |D cg c]?  }t         j                  j	                  t         j                  j                  | |            A c}      S c c}w r   )oslistdiranyrq   isdirr  )basedirelementsels      r   _has_subdirsri    sE    zz'"H8LRbggll7B78LMMLs   AA*c                 8   t        j                  |       D ]  }t         j                  j                  | |      }t         j                  j	                  |      sCt        j                  ||      }t        |      rt        |||       r|j                  |        y r   )	rb  rc  rq   r  re  	posixpathri  _do_list_all_dirsr0   )rf  path_so_farr7  r  true_nestednorm_nesteds         r   rl  rl    sn    ZZ  +ggll7A.77==%#..a8KK(!+{FCk*+r   c                 $    g }t        | d|       |S )Nr  )rl  )rf  r7  s     r   _list_all_dirsrq    s    Fgr6*Mr   c                 L    t        t        |             }|t        |      k(  sJ y r   )r   rq  )r  expected_directoriesactual_directoriess      r   _check_dataset_directoriesru    s&    ^G45%9!::::r   c           
      v   t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               t        j
                  dt        j                               g            }t        j                  || d|       t        | g d       y )	NrS  rQ  rZ   r[   r\   r  r  )zx/xzy/yr  )	r?   r   r   r   r=   r@   rD   r  ru  rT  s      r   (test_dictionary_partitioning_inner_nullsrw    sw    HH?1ABCE??
		288C-rxxRYY[/IJKMDUGEEw(;<r   c           
      z   t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               t        j
                  dt        j                               g      d d      }t        j                  || d|       t        | g d	       y )
N)r  Nr  rQ  rZ   r[   r\   r  r  r  )za=x/b=xz	a=xyz/b=yz	a=z/b=xyz)	r?   r   r   r  r=   r@   rD   r  ru  rT  s      r   test_hive_partitioning_nullsry    s    HH+2BCDEryy	#ryy{	#RXXc299;%?@ BCGPDUGEEw(MNr   c                  .   t        j                  dt        j                         fdt        j                         fg      } ddg}t	        j
                  |       }t        |t        j                        sJ t	        j
                  | d      }t        |t        j                        sJ t	        j
                  |      }t        |t        j                        sJ t        j                  t              5  t	        j
                          d d d        t        j                  t        d      5  t	        j
                  |        d d d        t        j                  t        d      5  t	        j
                  | |        d d d        t	        j
                  | d	
      }t        |t        j                        sJ t	        j
                  | dd	      }t        |t        j                        sJ t	        j
                  d	
      }t        |t        j                        sJ t        j                  t              5  t	        j
                  |d	
       d d d        t        j                  t        d      5  t	        j
                  |d	       d d d        t        j                  t              5  t	        j
                  | d
       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   kxY w# 1 sw Y   xY w# 1 sw Y   vxY w# 1 sw Y   y xY w)Nr   r   inferr  )field_nameszExpected listr   zCannot specify bothr   r  )r  r  zCannot specify 'field_names')r|  r  unsupported)r?   r=   r   r   r   r   rM   r   r)  r  r  rL  r  )r=   r  r   s      r   test_partitioning_functionr~    s<   YY,w	.BCDFWE ??6"DdB44555??68DdB22333??u-DdB22333	z	" 
	z	9 ,
F+,	z)>	? 4
F34 ??6&1DdB//000??6GDdB22333??&)DdB22333	z	" .
f-.	z)G	H :
E&9: 
z	" 6
}56 6+ , ,4 4. .: :6 6sH   3K+K%K&7K32K?+LKK#&K03K<?LLc                    t        j                  t        j                  dt        j                  t        j                         t        j
                                     t        j                  dt        j                  t        j                         t        j                                     g      }t        j                  j                  |      }t        j                  dd| |      }|j                  j                  |k(  sJ |j                         }|j                  d      j                  j                  |j                   d         sJ |j                  d      j#                         dgd	z  d
gd	z  z   k(  sJ |j                  d      j                  j                  |j                   d         sJ |j                  d      j#                         dgd	z  dgd	z  z   k(  sJ y )Nr   r   rb   r   r   r   r   r   r   r   r   rv   r"  r#  )r?   r=   r@   rz   r   r   rD   r   r   r(  rS   r   r   r-  rx   r	  typesrf  )rn   r=   r   rS   r   s        r   *test_directory_partitioning_dictionary_keyr  !	  su    YY
"--	288:>?
bggi=> F ##,,F,;DjjvDG &&&000E<< %%,,V\\!_===<< **,a1#'0AAAA<<##**6<<?;;;<<((*ugkUGaK.GGGGr   c                    t        j                  t        j                  dt        j                  t        j                         t        j
                                     t        j                  dt        j                  t        j                         t        j
                                     g      }t        j                  j                  |      }t        j                  dd| |      }|j                  j                  |k(  sJ |j                         }t        t        dd            }t        t        d	d
            }|j                  d      j                  j!                  |j"                  d         sJ |j                  d      j$                  D ]3  }|j                  j'                         }|j)                          ||k(  r3J  |j                  d      j                  j!                  |j"                  d	         sJ |j                  d      j$                  D ]3  }|j                  j'                         }|j)                          ||k(  r3J  y )Nr   r   rb   r   r   r  i  i  r      r   )r?   r=   r@   rz   r   r   r   r  r(  rS   r   r   rh   r/   r-  rx   r	  r  chunksrf  sort)	r   r=   r   rS   r   year_dictionarymonth_dictionaryr   r  s	            r   %test_hive_partitioning_dictionary_keyr  6	  s   YY
rwwy"((*=>
"--	288:>? F ''v'6Djjy]G &&&000E5t,-OE!RL)<<$$++FLLO<<<f%,, )!!++-((() << %%,,V\\!_===g&-- *!!++-))))*r   c                     |,t        j                  t        d      dgdz  dgdz  z   d      }| dz  }t        j                  |||       ||fS )	N	   r'  r{   r!  r   rZ   r  r  r?   r   r/   rl   rm   )base_dirr   r  rq   s       r   r  r  R	  sS    }uQxrdQh".ABCn$DNN5$~>$;r   c                 0   t        j                  t        d      dgdz  dgdz  z   d      }| dz  }t        j                  ||       t        j                  t        dd      dgdz  dgdz  z   d      }| d	z  }t        j                  ||       ||f||ffS )
Nr  r'  r{   r!  r   rZ   ztest1.parquetr  ztest2.parquetr  )r  r   r   r   r   s        r   _create_directory_of_filesr  Z	  s    XXE!HB4!8rdQh+>?@F&ENN65!XXE!RLtax2$(/BCDF&ENN65!FeU^++r   c                     | |j                  |j                  |             fD ]K  }| j                  j                  |j                        sJ |j	                  |       j                  |      rKJ  y r   )r  r  r=   r	  r   )rS   r   r   picklerr|   s        r   _check_datasetr  d	  sc    w}}W]]7%;<= >~~$$U\\222&&w/66u===>r   c                    t        | t        j                        sJ | t        |       | gt        |       gfD ]B  }t	        j
                  | fi |}t        |t        j                        sJ t        ||||       D t        | j                        5  t	        j
                  | j                  fi |}t        |t        j                        sJ t        ||||       d d d        y # 1 sw Y   y xY wr   )rM   pathlibPathr_   r   rS   r  r  r
   parentname)rq   r   r   r  r   r   rS   s          r   _check_dataset_from_pathr  k	  s    dGLL))) CIvD	{3 @**T,V,'2#7#7888w~w?@ 
DKK	  @**TYY1&1'2#7#7888w~w?@ @ @s   AC%%C.c                 <    t        |       \  }}t        ||||       y r   r  r  r  r   r  r   rq   s        r   test_open_dataset_single_filer  |	  s    %g.KE4T5.-Hr   c                 @    t        | d      \  }}t        ||||       y )Nr   r  r  r  s        r   test_deterministic_row_orderr  	  s"    
 &ga@KE4T5.-Hr   c                 f    t        |       \  }}t        j                  |      }t        | |||       y r   )r  r?   concat_tablesr  )r  r   r  tablesrr  r   s         r   test_open_dataset_directoryr  	  s.    *73IFAV$EWe^]Kr   c           	         t        |       \  }\  }}t        j                  |      }t        j                  ||g      t        j                  t        |      t        |      g      g}||D cg c]"  }|j                  |j                  |            $ c}z  }|D ]M  }	|	j                  j                  |j                        sJ |j                  |	      }
|
j                  |      rMJ  y c c}w r   )r  r?   r  r   rS   r_   r  r  r=   r	  r   )r  r   r  r  r   r   r   datasetsr|   rS   r7  s              r   test_open_dataset_list_of_filesr  	  s    7@FNUEV$E 	

E5>"


CJE
+,H =E89M//23 H  $~~$$U\\222((1}}U###$	s   ,'C+c                    t        |       \  }}t        |      }t        j                  |      }|j                  j                  |j                        sJ t        j                  |t        j                               }|j                  j                  |j                        sJ t        j                  t              5  t        j                  |t        j                                d d d        y # 1 sw Y   y xY w)Nr  )r  r   r   rS   r=   r	  rc   r   r  r  r  rd   )r  r   rq   fspathdataset1dataset2s         r   #test_open_dataset_filesystem_fspathr  	  s     &g.KE4T"F zz&!H??!!%,,/// zz&R-?-?-ABH??!!%,,/// 
y	! <


6b&8&8&:;< < <s   ?*C22C;c                    | dz  }|j                          t        |      \  }}|j                  |      }t        j                  |      }t        j                  |t        j                               }t        j                  t        |      t        |            }	|j                  |j                  |            }
|j                  |      |j                  |      cxk(  r*|j                  |	      cxk(  r|j                  |
      k(  sJ  J y )Nsingle-filer  )mkdirr  relative_tor   rS   rc   r   r_   r	   r  r  r   )r  r   r  rp   r   rq   relative_pathd1d2d3d4s              r   test_construct_from_single_filer  	  s    -'IOO%i0KE4$$Y/M 
D	B	DR%7%7%9	:B	C&?93M	NB			]004	5B""2&.*A*A
+ J&&r*J.<.E.Eb.IJ J J J Jr   c                     | dz  }|j                          t        |      \  }}t        j                  |      }t        j                  |t	        j
                               }t        j                  |j                  t        |             }|j                  |      }	|j                  |      }
|j                  |      }|	|
cxk(  r|k(  sJ  J |||fD ]8  }|j                  |j                  |            }|j                  |      |	k(  r8J  y )Nsingle-directoryr  )r  r  r   rS   rc   r   r  r	   r   r  r  )r  r   r  rp   r  r   r  r  r  t1t2t3r|   restoreds                 r   $test_construct_from_single_directoryr  	  s    ,,IOO.y9MFE	I	B	I"*<*<*>	?B	INNw/G	HB		 	 	$B		 	 	$B		 	 	$B>r>>> "b\ 7 &&}':':1'=>&&x0B6667r   c                    | dz  }|j                          t        |      \  }}|D cg c]  }|j                  |        }}t        |       5  t	        j
                  |      }|j                  |      }t        |      t        t        t        |            k(  sJ 	 d d d        t	        j
                  |t        |             }	|j                  |	      }
t	        j
                  |      }|j                  |      }t	        j
                  |t        j                               }|j                  |      }|
cxk(  r|cxk(  r|k(  sJ  J y c c}w # 1 sw Y   xY w)Nzlist-of-filesr  )r  r  r  r
   r   rS   r   r   sumri   r	   rc   r   )r  r   rp   r  r   r   relative_pathsr  r  r  r  r  r  r  t4s                  r   !test_construct_from_list_of_filesr  	  s,    /)IOO.y9MFE6;<ammG,<N<	G	 0ZZ'$$R(2w#c#v.////0
 
Nw/G	HB		 	 	$B	E	B		 	 	$B	Eb&8&8&:	;B		 	 	$BrR =0 0s   EAEEc                     ddg}t        j                  t        d      5  t        j                  ||        d d d        y # 1 sw Y   y xY w)Nr   z!subdir/1/xxx/doesnt-exist.parquetzdoesnt-existr   r  )r  r  r  r   rS   )rn   r
  s     r   -test_construct_from_list_of_mixed_paths_failsr  	  sE     	%+E 
(	? -


5V,- - -s   AA
c                    t        j                  ddg|       }t        j                  d|       }t        j                  ||g      }t        |t         j                        sJ t	        t        |j                                     dk(  sJ |j                         }t	        |      dk(  sJ |j                  dk(  sJ t	        |j                        dk(  sJ |j                  D ]  }|j                  ddgk(  rJ  y )	Nr   r   r  r   r{   r  r   rv   )r   rS   rM   UnionDatasetr   rh   rK   r   rT  childrenr
  )rn   r[   r\   rS   r   childs         r   (test_construct_from_mixed_child_datasetsr  
  s     	

002>D	FA


8/Ajj!Q Ggr///tG))+,-222Eu:!!!w A%%%!! ={{;;= = 	= ==r   c                      t        j                  g d      } | j                         }|j                  dk(  sJ |j                  dk(  sJ y )Nr  r  r   )r   rS   r   r   rT  )emptyr   s     r   test_construct_empty_datasetr  
  sD    JJr%(ENNE>>Q!!!r   c            
      .   t        j                  g dt        j                  dt        j                         fdt        j
                         fg            } t        j                  t        d      5  | j                          d d d        y # 1 sw Y   y xY w)Nr  r[   r   r=   zMultiple matches for .*a.* in r   )
r   rS   r?   r=   rB   rD   r  r  rL  r   )r  s    r   *test_construct_dataset_with_invalid_schemar   
  sp    JJr%			bhhj	biik; 1 E 
z)I	J   s   1BBc                    t        j                  | t        j                  d      t        j                               }t        j                  | t        j                  d      t        j                               }t
        j                  j                  t        j                  t        d            gdg      t
        j                  j                  t        j                  t        d            gdg      }t        j                  t        d	      5  t        j                  ||g       d d d        d
}t        j                  t        |	      5  t        j                  g d       d d d        d}t        j                  t        |	      5  t        j                  d        d d d        d}t        j                  t        |	      5  t        j                  fdt        d      D               d d d        d}t        j                  t        |	      5  t        j                  g        d d d        d}t        j                  t        |	      5  t        j                  |g       d d d        d}t        j                  t        |	      5  t        j                  dg       d d d        d}t        j                  t        |	      5  t        j                  dg       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   dxY w# 1 sw Y   $xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NrI  r  /schemar   r[   r  r\   z"Expected.*FileSystemDatasetFactoryr   zExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intr  zbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc              3   "   K   | ]  }  y wr   r   )r  rr  batch1s     r   r  z<test_construct_from_invalid_sources_raise.<locals>.<genexpr>N
  s     -qF-   rY   zEMust provide schema to construct in-memory dataset from an empty listzFItem has schema
b: int64
which does not match expected schema
a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:r   zCExpected a list of tables or batches. The given list contains a int)r   r   rc   r   r   r?   r,  r   r~   r/   r  r  r  rS   rL  InMemoryDataset)r   child1child2batch2rz  r  s        @r   )test_construct_from_invalid_sources_raiser  )
  s   ((
!##%F
 ((
	"##%F
 ^^''%))<(=cU'KF^^''%))<(=cU'KF	y(L	M %


FF#$%	 
 
y	1 


9	.  
y	1 


4	/  
y	1 /


-E!H-./ 	P  
z	2 
2	  
y	1 %


FF#$%	J  
y	1  


FA;  	N  
y	1 (
FA;'( (a% %  / / % %   ( (s`   K-K:L
(LL!L-L9M-K7:LLL!L*-L69MMc                 L   t         j                  j                  t        j                  t	        d            gdg      }t         j
                  j                  |j                  |g      }t         j                  j                  |g      }t        j                  g dt        j                  g             j                         }|t        j                  g       k(  sJ |||g|g|fD ]  }t        j                  |      }| j                  |      |k(  sJ t        t        |j                                     dk(  sJ t!        |j                               j                         |k(  sJ t         j                  j                  t        |j#                                     |k(  rJ  y )Nr   r[   r  r  r  r   )r?   r,  r   r~   r/   RecordBatchReaderrk   r=   rE   r   rS   r   r   r   rh   rK   r2   r   )r   rt   r   r   dataset_tablesourcerS   s          r   test_construct_in_memoryr  k
  sU   NN&&r(;'<SE&JE!!..u||eWEFHH!!5'*EJJr%		" !!) BHHRL(((%%5'6: J**V$&&w/58884--/01Q666G))+,5575@@@xx$$T'*<*<*>%?@EIIIJr   r   c                 "   t         j                  j                  t        j                  t	        d            gdg      t         j
                  j                  g      d}fdd ffdd ffdj                  ffD ]  \  }}t        j                  j                   |       ||       }|j                         k(  sJ t        j                  t         j                  |	      5  |j                          d d d         y # 1 sw Y   xY w)
Nr   r[   r  z#OneShotFragment was already scannedc                  Z    t         j                  j                   j                   g      S r   )r?   r  rk   r=   rt   s   r   r"  z$test_scan_iterator.<locals>.<lambda>
  s#    R))66ug' r   c                      t               S r   )r   r   s   r   r"  z$test_scan_iterator.<locals>.<lambda>
  s    '. r   c                  ,     fdt        d      D        S )Nc              3   "   K   | ]  }  y wr   r   )r  rr  rt   s     r   r  z7test_scan_iterator.<locals>.<lambda>.<locals>.<genexpr>
  s     .e.r  r   )r/   r  s   r   r"  z$test_scan_iterator.<locals>.<lambda>
  s    .U1X. r   r=   r   r   )r?   r,  r   r~   r/   rE   rk   r=   r   r^  r   r  r  r_  )r   r  r   r=   r   rt   r   s        @@r   test_scan_iteratorr  |
  s    NN&&r(;'<SE&JEHH!!5'*E1E'(,..5.=	  **))If+ * ?!U***]]2??%8 		 		 	s   )DD	c                    t        j                  t        d      dgdz  dgdz  z   d      }| dz  }|j                          t        d      D ]F  }|d	| z  }|j                          t	        j
                  |j                  d|z  d      |d
z         H |j                  dt        j                  dgdz  dgdz  z   dgdz  z   t        j                                     }||fS )Nr  r'  r{   r!  r   rZ   zdataset-partitionedrY   zpart=r  r   r   r   rv   rw   )
r?   r   r/   r  rl   rm   r  append_columnr~   r   )rf  r   rq   r:   r   
full_tables         r   _create_partitioned_datasetr  
  s    HH582$(bTAX*=>?E**DJJL1X CaSk!


u{{1Q3*D>,ABC
 $$!qA37*aS1W4288:FHJ tr   c           
         t        |       \  }}|j                  ddg      }t        ||||       t        j                  t        |      t        j                  d            }|j                  j                  |j                        sJ t        |       5  t        j                  dt        j                  d            }|j                  j                  |j                        sJ 	 d d d        t        j                  t        |      d      }|j                  j                  |j                        sJ t        j                  t        |      t        j                  t        j                  dt        j                         fg      d            }|j                  j                  t        j                  dt        j                                     }|j                  j                  |      sJ |j                         }|j                  dt        j                   dgd	z  d
gd	z  z   dgd	z  z   t        j                                     }	|j                  |	      sJ y # 1 sw Y   {xY w)Nr[   r\   r   r  r   zdataset-partitioned/r   r   rY   r   rv   rw   )r  r  r  r   rS   r_   r   r=   r	  r
   r?   r   r0   r@   r   r  r~   )
r  r   r  r  rq   r   rS   rD  r7  rz  s
             r   'test_open_dataset_partitioned_directoryr  
  s   27;J sCj)ET5.-H jjD	v >@G>>  !2!2333 
G	 8**3*,//*HJ~~$$Z%6%67778 jjT8G>>  !2!2333 jjD	__II	*+,V=>G ll))"((62779*EFO>>  111F""!qA37*aS1W42779EGH==""")8 8s   AI##I-c                    t        |       \  }}t        j                  t        |            }|j                  j                  |j                        sJ t        j                  t        |      t        j                               }|j                  j                  |j                        sJ t        |       5  t        j                  dt        j                               }d d d        j                  j                  |j                        sJ t        j                  t              5  t        j                  t        |      t        j                                d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr  r  )r  r   rS   r_   r=   r	  rc   r   r
   r  r  r  rd   )r  r   rq   r  r  dataset3s         r   test_open_dataset_filesystemr  
  s    &g.KE4 zz#d)$H??!!%,,/// zz#d)0B0B0DEH??!!%,,/// 
G	 O::n9K9K9MNO??!!%,,/// 
(	) ?


3t9););)=>? ?O O
? ?s   8*E&*3E2&E/2E;c                     t        |       \  }}t        j                  t        d      5  t	        j
                  |gd       d d d        y # 1 sw Y   y xY w)Nz format 'blabla' is not supportedr   blablar  )r  r  r  rL  r   rS   )r  rr  rq   s      r   $test_open_dataset_unsupported_formatr  
  sD    !'*GAt	z)K	L ,


D6(+, , ,s   AAc                 :   t        |       \  }}t        j                  |      }t        j                  ||g      }t        |t        j                        sJ |j                  |j                  |            }|j                  |      |j                  |      k(  sJ y r   )r  r   rS   rM   r  r  r  r   )r  r   r  rr  rq   rS   unionrw  s           r   test_open_union_datasetr  
  s    !'*GAtjjGJJ)*EeR__---!!-"5"5e"<=G""7+~/F/Fu/MMMMr   c                     t        j                  d| d      }t        j                  t        d      5  t        j                  |gd       d d d        y # 1 sw Y   y xY w)NrI  r   rJ  zcannot pass any additionalr   r  )r   rS   r  r  rL  )r   r  s     r   .test_open_union_dataset_with_additional_kwargsr  
  sH    JJxM)LE	z)E	F .


E79-. . .s   AAc                  0   t        j                  t              5  t        j                  dd       d d d        t        j                  t
        j                  d      5  t        j                  dd       d d d        y # 1 sw Y   OxY w# 1 sw Y   y xY w)Nzi-am-not-existing.arrowr  r  zcannot be relativer   zfile:i-am-not-existing.arrow)r  r  r  r   rS   r?   r_  r   r   r   #test_open_dataset_non_existing_filer  
  sx     
(	) <


,U;< 
r.B	C A


1%@A A< <A As   B B B	Br   rp   r   r  r  partition_keysr  BCr  )DEFr  )r   NrY   )r  Nr  )Nrv   rY   c           	         t        j                  t        d      dgdz  dgdz  z   d      }d |d   v xs d |d   v }|d	k(  r|ry |d	k(  r(t        j                  j                  d
dg      }d}d }nM|r"t        j                  j                  |      }n t        j                  j                        }d}|r|}nd}| dz  }	|	j                          |\  }
}|
D ]Q  }|D ]J  }|	|j                  |xs ||xs |      z  }|j                  d       t        j                  ||dz         L S t        j                  t        |	      |      }fd}|j                  j                  t        j                  d
 ||
d                     j                  t        j                  d ||d                     }|j                  j!                  |      sJ y )Nr  r'  r{   r!  r   rZ   r   r   rp   part1part2r1  z{0}/{1})r-  r  zpart1={0}/part2={1}__HIVE_DEFAULT_PARTITION__rS   T)parentsr  r  c                 8   r`t        | t              rt        j                         nt        j                         }t        j
                  t        j                         |      S t        | t              rt        j                         S t        j                         S r   )rM   r_   r?   rD   r   rz   )r   
value_typer-  s     r   r4  z/test_partition_discovery.<locals>.expected_type8  sY    (23(<"((*J==Z88",S#"6299;FBHHJFr   )r?   r   r/   r   r   r(  r  r  r   rl   rm   rS   r_   r=   r0   r@   r	  )r  r   r  r-  r  r   has_nullfmt
null_valuebasepath
part_keys1
part_keys2r  r  rq   rS   r4  rD  s      `              r   test_partition_discoveryr     s   " HH583%!)seai*?@AE~a((EDN14E,EH{"x{"//88g1A 9 C
..77!1 8 L ..77!1 8 3L#&J5J"HNN+J
 9 	9E

5.J0CDEDJJtJ$NN5$"78		99 jjX\BGG ll))
-
167f
-
167 
 >>  111r   c                    t        j                  t        j                  ddgd      t	        d      d      }t        j                  |j                  dg      j                  d      }t        j                  || |d	
       t        j                  | d	t
        j                  j                  d            }t        j                  |d   |d   j                         d      }|j                         j                  |      sJ t!        |j#                               d   }|j                  |j                        j                  |d d       sJ |j$                  }|j'                  |j)                  |            }|j                         j                  |      sJ |j'                  |j)                  |            }|j                  |j                        j                  |d d       sJ |j                  |j                        j+                         j                  |d d j+                               sJ |j$                  j                  |      sJ y )Nr  r  r   r   r   r  r   r   r  r  r   r   Tr1  r  r  )r  r   r   rb   )r?   r   r[  rZ  r/   r   r   r  r=   r  rS   r  r(  r}  r   r	  rh   rK   r  r  r  r  )	r  r  r   r   rS   rz  rT   	part_exprr  s	            r   4test_dataset_partitioned_dictionary_type_reconstructr  F  s     HHbiic
A6uRyIJE??5<<188HDUG$yIjj	((1141HG xxeeFm&E&E&GHH $$X...G))+,Q/HGNN3::8BQ<HHH--I""=#6#6w#?@H%%h///""=#6#6x#@AHGNN3::8BQ<HHHGNN3==?FF!    ((//	:::r   c           	      L   ddl m} | d   \  }}}}d| d| d| d| d	}|j                  |      \  }} |j                  d       t	        j
                  d	g d
i      }	 |j                  d      5 }
t        j                  |	|
       d d d        |	|||||||fS # 1 sw Y   xY w)Nr   
FileSystem
connections3://:z5@mybucket/data.parquet?scheme=http&endpoint_override=z&allow_bucket_creation=Truemybucketr[   r  zmybucket/data.parquet)	r   r  from_urirf   r?   r   rg   rl   rm   )	s3_serverr  rW  rX  rY  rZ  rV  rc   rq   r   rr   s              r   rU  rU  e  s    %)2<)@&D$
J

|1ZL )"V1TF*E	G 
 ""3'HBBMM*HHc9%&E			6	7 #3
uc"# $CtZCC# #s   1BB#c                     | \  }}}}}}}}t        j                  |d      }|j                  |      j                  |      sJ t        j                  |d|      }|j                  |      j                  |      sJ y )Nr   r  r   r   )r   rS   r   r	  )rU  r   r   rq   rc   rV  rr  rS   s           r   test_open_dataset_from_uri_s3r  y  s~     (9$E4S!Q1 jjY/G""7+225999 jjiB?G""7+225999r   c                     | \  }}}}}}}}t        j                  d      }|j                  |      }t        j                  |d|      }	|j                  |	      j                  |      sJ y )Nr  r   r  )rc   r   rQ  r   rS   r   r	  )
rU  r   r   rq   r   rV  rr  r   finfosrS   s
             r    test_open_dataset_from_fileinfosr!    sj     0A,E4S!Q1z*H%%h/Fjj	jIG""7+225999r   c           	         | \  }}}}}}}}t        j                  d      }ddlm}	m}
 |j                  ||dd| d| i      }t        j                  |d|	      }|j                         j                  |      sJ  |
 |	|            }t        j                  |d|	      }|j                         j                  |      sJ y )
Ns3fsr   )FSSpecHandlerr   endpoint_urlzhttp://r  )r   secretclient_kwargsr   r  )
r  importorskipr   r$  r   S3FileSystemr   rS   r   r	  )rU  r   rq   rr  rW  rX  rY  rZ  r#  r$  r   rc   rS   s                r   $test_open_dataset_from_uri_s3_fsspecr*    s     =N9E4AtT:zv&D6			gdV1TF3
 
 
B jjiB?G$$U+++ 
mB'	(BjjiB?G$$U+++r   c                 |   ddl m} | d   \  }}}}d}d}d| d| d| d	| d
| d| d}|j                  |      \  }	}|dk(  sJ  |	j                  |       t	        j
                  dg di      }
 |	j                  |      5 }t        j                  |
|       d d d        t        j                  |d      }|j                         j                  |
      sJ dj                  ||||      }g d}|D ]O  \  }}|j                  |      }t        j                  ||d      }|j                         j                  |
      rOJ  t        j                  t        j                   d      5  |j                  d	      }t        j                  d|       d d d        d}d}|j                  |      }t        j                  t"              5 }t        j                  d|       d d d        t%        j&                        |j                  d||      k(  sJ d}|j                  |      }t        j                  t"              5 }t        j                  d|       d d d        t%        |j&                        |j                  d||      k(  sJ y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   [xY w)Nr   r  r  theirbucketnested/folder/data.parquetr  r  @r   z?scheme=http&endpoint_override=z&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetr[   r  r   r  3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{}))ztheirbucket/nested/folder/z/data.parquet)ztheirbucket/nested/folderdata.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr1  )r,  z/nested/folder/data.parquet)r,  r-  rJ  zMissing bucket namer   z'/theirbucket/nested/folder/data.parquetr  zThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existr0  NotFoundFile)r   r  r  rf   r?   r   rg   rl   rm   r   rS   r   r	  r   r  r  r_  rL  r_   r'   )r  r  rW  rX  rY  rZ  bucketrq   rV  rc   r   rr   rS   templater  prefixr0  excs                     r   -test_open_dataset_from_s3_with_filesystem_urir8    s    &)2<)@&D$
JF'D

|1ZL&4& 9"V1TF*E	G 
 ""3'HB;;;;BMM&HHc9%&E			t	$ #
uc"# jjY/G$$U+++ 	>DD
D$	
 
E  0oof%**Tc)D!((///0
 
r.C	D Nooc"


<MN
	! 
 &D
//$
C	z	" 3c


>c23syy>U\\*dC@@@@3D
//$
C	z	" 3c


>c23syy>U\\&$<<<<]# #4N N3 33 3s0   J*)J	J& J2JJ#&J/2J;c                     t        |       \  }}t        j                  d      }|j                  d      }t	        j
                  ||      }|j                  j                  |j                        sJ y )Nfsspecfiler  )r  r  r(  r   r   rS   r=   r	  )r  r   rq   r:  r   rS   s         r   test_open_dataset_from_fsspecr<    s\    %g.KE4  *F'Gjj'2G>>  ...r   c                 J   t        j                  d      }t        j                  dg di      }| dz  }t	        j
                  ||       |j                  d      }|j                  |       d   j                  d      sJ t        j                         }t        j                  t        j                  |            }|j                  ||      }|j                  |j                         sJ |j#                  ||      }|j$                  j                  |j                         sJ y )Nr:  r[   r  r0  r;  r   )r  r(  r?   r   rl   rm   r   lsendswithr   r   rc   r   r$  rA  r	  r=   r  r  )	r  r:  r   rq   	fsspec_fsr   r   r=   rT   s	            r   test_file_format_inspect_fsspecrA    s       *F HHc9%&E^#DNN5$ !!&)I<< #,,^<<< !!#F !1!1)!<=J^^D*-F==&&&##D*5H##**5<<888r   c                 4   | dz  }t        j                  ddgdz  t        d      d      }t        j                  |j                  dg      j                  d	      }t        j                  |||d
       t        j                  t        j                  dt        j                  d      fg      d	      }t        j                  |d
|      }t        j                  d      t        j                  d      kD  }|j                  ||      }|j                  d      j                         g dk(  sJ dd l}t        j                  d       |j                   ddd      kD  }|j                  ||      }|j                  d      j                         g dk(  sJ y )Ntest_partition_timestamps
2012-01-01z
2012-01-02r   r   )datesr  rE  r   r  r  r  r  r  r   r  )r   rY   r   rS  r  r   i  r   )r?   r   r/   r   r   r  r=   r  r  rS   r@   r3   	Timestampr   r-  rf  r+   )r  r   rq   r   r   rS   r6  r+   s           r   test_filter_timestamprG    s\    00DHH-1Bi E
 ??5<<	299&IDUDtIF ??299wS0A&B%CD"(*DjjidCG!BLL$>>I##GI#>E<<'')_<<<!$5H$5$5dAq$AAI##GI#>E<<'')_<<<r   c                 P   t        j                  dt        j                  g dt        j                               i      }t	        | |      \  }}t        j                  t        |            }t        j                  d      dkD  }t        |j                  ||            dk(  sJ y )Nr[   )r   r   rv   rY   r{   r   rw   rv   r   rY   )r?   r   r~   r   r  r   rS   r_   r@   r   r   )r  r   r   rr  rq   rS   filter_s          r   test_filter_implicit_castrJ  1  s     HHc288$6RWWYGHIE!'51GAtjjT#GhhsmaG~&&ww&?@AEEEr   c                 *   t        j                  dg di      }t        | |      \  }}t        j                  t        |            }|j                  |t        j                  d      t        j                  d       k(        }|j                  dk(  sJ y )Nr  )r[   r\   Nr   r   )
r?   r   r  r   rS   r_   r   r@   r  r   )r  r   r   rr  rq   rS   s         r   test_filter_equal_nullrL  <  s}     HHc+,-E!'51GAtjjT#G##48 $ E >>Qr   c                    t        j                  g dt        d      D cg c]  }t        j                  ddd|       c}t        dd      D cg c]  }t        j                  dd|       c}d      }t	        | |      \  }}t        j                  t        |            }t        j                  t        j                  d      t        j                  dd	g            }|j                  ||
      j                  dk(  sJ t        j                  t        j                  d            dk\  }|j                  ||
      j                  dk(  sJ t        j                  t        j                  d      t        j                  d            }|j                  |d|i      }	|	d   j!                         g dk(  sJ y c c}w c c}w )N)r[   r\   Nr[   r}   r   i  r   r  r  r  r[   r\   r   rY   r  rv   r  r   r)   r   )r?   r   r/   r+   r  r   rS   r_   r  is_inr@   r~   r   r   hourdays_betweenrf  )
r  r   r:   r   rr  rq   rS   rI  r   r7  s
             r   test_filter_compute_expressionrQ  I  sg   HH'8=aA1haA.A5:1a[Aha+A E
 "'51GAtjjT#Ghhrxx}bhhSz&:;G""77";DDIIIggbhhsm$)G""77";DDIII??288C="((3-8D$$Wvtn$EF&>##%888 BAs   F<
Gc                 @   t        j                  | t        j                  d      t        j                               }t        j
                  |g      }t        |j                               dk(  sJ t        d |j                         D              sJ |j                         d   j                  |j                               sJ |j                         j                  |j                               sJ t        |j                         t         j                        sJ y )NrI  r  r   c              3   P   K   | ]  }t        |t        j                           y wr   )rM   r?   r+  )r  r  s     r   r  z%test_dataset_union.<locals>.<genexpr>g  s     KAz!RYY'Ks   $&r   )r   r   rc   r   r   UnionDatasetFactoryr   r>  r  r	  rA  rM   r   r*  )r   r  r   s      r   test_dataset_unionrU  ^  s    ''rx0##%E $$eW-G w&&()Q...K1H1H1JKKKK""$Q'..u}}?????##EMMO444gnn&

333r   c                 h	   t        j                  d|d      }t        j                  d|dddg      }t        j                  d|dd	      }|j                  |j                  cxk7  r|j                  k7  sJ  J t        j                  |||g      }t        |t         j                        sJ d
}t        j                  t        |      5  t        j                  ||g|       d d d        t        j                  dt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ |j                         j                  j                  |      sJ t        j                  ||g      }t        j                  dt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ |j                         j                  j                  |      sJ t        j                  dt        j                         fdt        j                         fdt        j                         fg      }t        j                  ||g|      }|j                         j                  j                  |      sJ t        j                  dt        j                         fdt        j                         fdt        j                         fg      }t        j                  ||g|      }|j                         j                  j                  |      sJ t        j                   t#        d      dgdz  dgdz  z   dgg d      }t%        | |      \  }	}
t        j                  |
      }t        j                  t        j&                  d      5  t        j                  ||g       d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)NrI  r   rJ  r  weekr(   r   r   r   /hiver   z$cannot pass any additional argumentsr   r  r%   r&   r'   r   r   rb   rX  r  r'  r{   r!  r   	abcdefghj)r%   r'   r&   r  r   zUnable to merge)r   rS   r=   rM   r  r  r  rL  r?   rA   rB   rC   rD   r   r	  r   r   r/   r  r@  )r  r   r  r  child3	assembledmsgrD  r   rr  rq   child4s               r   &test_union_dataset_from_other_datasetsr_  m  s   ZZ]9MFZZ	mI&,g%68FZZM)%+-F ==FMM:V]]:::::

FFF34Ii111
0C	z	- ?


FF#>? ii		"((*	"**,	"))+			"((*! O ""?333&&--o>>>

FF+,Iii		"((*	"**,	"))+		"((*! O ""?333&&--o>>>ii	"((*	"))+	! O
 

FF+ODI&&--o>>>ii	"((*	"))+	BIIK ! O
 

FF+ODI&&--o>>> HHeAhqB4!8 3[A57E!'7GAtZZF	r((0A	B %


FF#$% %c? ?b% %s   R:R(R%(R1c                     d}t        j                  t        |      5  t        j                  g d|        d d d        y # 1 sw Y   y xY w)Nz8points to a directory, but only file paths are supportedr   )rI  r  rY  r  )r  r  IsADirectoryErrorr   rS   )r   r]  s     r   4test_dataset_from_a_list_of_local_directories_raisesrb    s>    
DC	(	4 M


1mLM M Ms   AA
c           
         t        j                  t        j                  d|       t        j                  d|       t        j                  d|       g      }t        j                  dt        j                         fdt        j
                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ t        j                  t        j                  d|       t        j                  d|       t        j                  d| d	
      g      }t        j                  dt        j                         fdt        j
                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ y )NrI  r  r  rY  r%   r&   r'   r(   r   )r   r   r   r   )
r   rS   r?   r=   rA   rB   rC   rD   r	  r   )r   rS   rD  s      r   &test_union_dataset_filesystem_datasetsrd    sj   jj


86


97


7}5 G
 ii		"((*	"**,	"))+	! O >>  111 jj


86


97


7}6J G
 ii		"((*	"**,	"))+		"((*! O >>  111r   c                     t        j                  g dg dd      }t        j                  | dz         d fd	}d }|} ||||j                         |j                  }|} |||       t        j                  ddg      }t        j                  g dg dgd	d
g      } |||       t        j                  dg      }t        j                  g dgd
g      } |||       t        j                  ddg      }t        j                  g dt        j
                  g dd      gd
dg      } |||       t        j                  ddg      }t        j                  t         dz        |      }t        j                  |d
   j                  d      |d	   gd
d	g      } |||       t        j                  d
t        j                  t        j                               fdg      }t        j                  t         dz        |      }|j                  j                  |      sJ t        j                  t        d      5  j!                  |       d d d        y # 1 sw Y   y xY w)Nr  r  r  r  rZ   r0  c                    t        j                  t        dz        |       }||j                  j	                  |      sJ |j                  j	                  |       sJ j                  |      }|j	                  |      sJ y )Nr0  rb   )r   rS   r_   r=   r	  r   )r=   rz  rD  rS   r7  r   r  s        r   r  z-test_specified_schema.<locals>._check_dataset  su    **S>!9:6J&>>((999>>((000((1}}X&&&r   )rD  )r\   rC   )r[   rB   r\   r[   r  )r}   r   NNNr   rw   r}   )r[   r   rb   z#Unsupported cast from int64 to listr   r   )r?   r   rl   rm   r=   r~   r   rS   r_   r  list_r   r	  r  r  r  r   )r  r   r   r  r=   rz  rS   s   ``     r   test_specified_schemarj    s   HH9<89ENN5'N23' FH68U\\B \\FH68$ YY(.9:Fxxy1#sDH68$ YY'(FxxC51H68$ YY78Fxx"47CE"Cj*H 68$ YY(89:FjjW~56vFGxxs1s%"Cj*H 68$ YYbhhrxxz235EFGFjjW~56vFG>>  (((	*B
D )() ) )s   I##I,c                    | dz  }t        j                  dg di      }t        j                  ||       t        j                  dt        j
                         fg      }t        j                  t        |      gdz  |      }|j                  j                  |      sJ |j                  |      }t        j                  t        d      5  |j                         }|j                          d d d        y # 1 sw Y   y xY w)Nr0  r[   r  d   rb   z#Unsupported cast from int64 to nullr   )r?   r   rl   rm   r=   r  r   rS   r_   r	  r   r  r  r  rb  rc  )r  r   fnr   r=   rS   r   r   s           r   test_incompatible_schema_hangrn    s     
>	!BHHc9%&ENN5"YYbggi()*Fjj#b'S8G>>  ((($$W-G	*B
D ""$  s   !C66C?c                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }t        j                  |      5 }t        j
                  ||j                        }|j                  |j                         d          |j                          d d d        t        j                  |t        j                         	      }|j                  |      }|j                  |      sJ t        |       d
D ]=  }t        j                  ||	      }|j                  |      }|j                  |      r=J  y # 1 sw Y   xY w)Nr  r   rw   rf  rC   rZ   z
test.arrowr   r  )r  arrow)r?   r   r~   r_   output_streamRecordBatchFileWriterr=   write_batchr   rA  r   rS   r  r   r	  rV   )	r  r   r   rq   rF  rG  rS   r7  
format_strs	            r   test_ipc_formatru  !  s)   HH288IF;88LyAC DE w%&D			$	 4))$=5++-a01
 jjb&6&6&89G$$W-F==/8& $
**T*5((1}}U###$ s   *AE  E)c           	         ddl m} t        j                  t        j                  g dd      t        j                  g dd      d      }t        | d	z        }|j                  ||       t        j                  |t        j                         
      }t        |j                               }t        |d   t        j                        sJ |j                  |      }|j                  d       |j!                  |      sJ t#        |       t        j                  |d
      }|j                  |      }|j                  d       |j!                  |      sJ |j                  |dg      }|j                  d       |j!                  |j%                  dg            sJ |j                  |dt        j&                  d      dz  i      }|j                  d       |j!                  t        j                  dt        j                  g dd      i            sJ |j)                  |      dk(  sJ |j)                  |t        j&                  d      dkD        dk(  sJ y )Nr   orcr  r   rw   rf  rC   rZ   test.orcr  Tr>  rx  r\   r)   b2rv   )r  r  g333333?rY   r[   r   r   )r   rx  r?   r   r~   r_   rm   r   rS   r%  rh   rK   rM   FileFragmentr   rB  r	  rV   r  r@   r   )r  r   rx  r   rq   rS   r  r7  s           r   test_orc_formatr|  7  s   HH288IF;88LyAC DE w#$DOOE4 jjb&6&6&89GW**,-IilBOO444$$W-F
OOO==/8jje,G$$W-F
OOO==$$Wse$<F
OOO==se,---$$$ 12 % F OOO==
$I>?@   $$W-222$$WRXXc]Q5F$G1LLLr   c                    ddl m} t        j                  t        j                  g dd      t        j                  g dd      d      }t        | d	z        }|j                  ||       t        j                  |d
      }t        |j                  |            }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  |j                         d         sJ y )Nr   rw  r  r   rw   rf  rC   rZ   ry  rx  r  r   rY   )r   rx  r?   r   r~   r_   rm   r   rS   rh   r   r   r   r	  )r  r   rx  r   rq   rS   r7  s          r   test_orc_scan_optionsr~  ^  s    HH288IF;88LyAC DE w#$DOOE4 jje,G.++G45Fv;!!9"""!9E,,.q1222r   c                      	 ddl m}  y # t        $ rK t        j                  t
        d      5  t        j                  dd       d d d        Y y # 1 sw Y   Y y xY ww xY w)Nr   r%  z'not built with support for the ORC filer   r8  rx  r  )r  r%  r&  r  r  rL  r   rS   r  s    r   test_orc_format_not_supportedr  u  sP    *1 *]]G
 	* JJs5)	* 	* 	**s&   	 $AAAA	AAc            	      x   t        j                  t        d      5  t        j                  t        j                  dt        d      i      dd       d d d        t        j                         } t        j                  t        d      5  | j                          d d d        y # 1 sw Y   RxY w# 1 sw Y   y xY w)Nz9Writing datasets not yet implemented for this file formatr   r[   r   rx  z/tmp)r   r  )
r  r  r  r   r  r?   r   r/   r%  make_write_options)ofs    r   +test_orc_writer_not_implemented_for_datasetr    s    	I
 
 	HHc59%&uv	
	
 
			B	I
   		   
 
   s   7B$
B0$B-0B9c                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  |d	       t        j                  |t        j                         
      }|j                  |      }|j                  |      sJ t        |       t        j                  |d
      }|j                  |      }|j                  |      sJ y )Nr  rB   rw   rf  rC   rZ   test.csvFr&   r  r   )r?   r   r~   r_   r  to_csvr   rS   r  r   r	  rV   )r  r   r   rq   rS   r7  s         r   test_csv_formatr    s    HH288IG<88LyAC DE w#$D	OOT/jjb&6&6&89G$$W-F==/8jje,G$$W-F==r   compression)bz2gziplz4zstdc                    t         j                  j                  |      st        j                  | d       t        j                  t        j                  g dd      t        j                  g dd      d      }t        j                         }|dk7  r|nd	}t        | d
| z        }|j                  ||      5 }|j                         j                  d      }|j                  |j                  d             d d d        t!        j"                  |t!        j$                               }	|j'                  |	      }
|
j)                  |      sJ y # 1 sw Y   WxY w)Nz support is not builtr  rB   rw   rf  rC   rZ   r  gzz	test.csv.r  Fr  rc  r  )r   Codecis_availabler  skipr?   r   r~   rc   r   r_   rg   r  r  writerr  r   rS   r  r   r	  )r  r  r   r   r   suffixrq   rF  csv_strrS   r7  s              r   test_csv_format_compressedr    s$    ==%%k2{m#89:HH288IG<88LyAC DE##%J'61[tFw9VH--.D		&	&t	&	E , //#***7

7>>'*+, jjb&6&6&89G$$W-F==, ,s   ?AEEc           	         t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  |d      }|j                  |      }|j                  t        j                  dt        j                  g d      i            sJ t        j                  |t        j                  t        j                  j                  d	      
            }|j                  |      }|j                  t        j                  dt        j                  ddg      i            sJ t        j                  |t        j                  t        j                  j                  dg      
            }|j                  |      }|j                  t        j                  dt        j                  g d      i            sJ y # 1 sw Y   xY w)Nr  wzskipped
col0
foo
bar
r   r  skipped)col0r  r  r   )r  r  r  r  r  re  )r  r  r  r  )r_   rL   r  r   rS   r   r	  r?   r   r~   r  r   r"  )r  r   rq   rF  rS   r7  s         r   test_csv_format_optionsr    s|   w#$D	dC 0D

./0jje,G$$W-F==
)RXX&<=>?A A A jjb&6&6VV''!'4'6 7G$$W-F==6288UEN+C"DEFFFjjb&6&6VV''eW'='? @G$$W-F==
%"CDEFH H H0 0s   G

Gc           
      X   t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  |t        j
                  t        j                  j                  d                  }|j                  |      }g d}|j                  |k(  sJ |j                  t        j                  t        j                  d	g      t        j                  d
g      t        j                  dg      t        j                  d	g      d            sJ y # 1 sw Y   xY w)Nr  r  z1,a,true,1
T)autogenerate_column_namesr  r  )f0r  r  r  r   r[   )r_   rL   r  r   rS   r  r?   r   r"  r   re  r	  r   r~   )r  r   rq   rF  rS   r7  expected_column_namess          r   (test_csv_format_options_generate_columnsr    s    w#$D	dC #D

>"# jjb&6&6VV''$'G'I JG$$W-F4"7777==1#)+3%)+4&)9)+1##8 9 : : :# #s   D  D)c           	         t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  |d      }t
        j                  j                  dgd      }t        j                  |t        j                  j                  d	
            }|j                  ||      }|j                  t        j                  dt        j                  g d      i            sJ t        j                  |      }t        j                  ||      }|j                  |      }|j                  t        j                  dt        j                  g d      i            sJ t        j                         }|j                  ||      }|j                  t        j                  dt        j                  g d      i            sJ y # 1 sw Y   xY w)Nr  r  zcol0
foo
spam
MYNULL
r   r  MYNULLT)null_valuesr+  r.  r/  )r-  r  )fragment_scan_optionsr  )r  spamNr,  )r  r  r  )r_   rL   r  r   rS   r   r   r2  r1  r?   r"  r   r	  r   r~   r  )	r  r   rq   rF  rS   r-  r   r7  rv  s	            r   test_csv_fragment_optionsr    s   w#$D	dC 0D

./0jje,Gkk00hZEI 1 KO'''VV''5'9;G $$WG$LF==62884I+J"KLMMM!!/BJjjj1G$$W-F==62884I+J"KLMMM'')G$$WG$LF==
&"((#<=>?A A A%0 0s   GGc                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  d	      d
d j                  dd      }t        |d      5 }|j                  |       d d d        t        j                  |t        j                               }|j                  |      }|j                  |      sJ t        |       t        j                  |d      }|j                  |      }|j                  |      sJ y # 1 sw Y   xY w)Nr  rB   rw   rf  rC   rZ   	test.jsonrecordsorientr   rK  },{}
{r  r  r$  )r?   r   r~   r_   r  to_jsonreplacerL   r  r   rS   r#  r   r	  rV   r  r   r   rq   rr   r  rS   r7  s           r   test_json_formatr    s   HH288IG<88LyAC DE w$%D
//

#
#9
#
5a
;
C
CE6
RC	dC A	 jjb&7&7&9:G$$W-F==/8jjf-G$$W-F== s   EEc           	      B   t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  d	      d
d j                  dd      }t        |d      5 }|j                  |       d d d        t        j                  t        d      5  t        j                  |t        j                  t         j                  j!                  d                  }d d d        t        j                  |t        j                  t         j                  j!                  d                  }|j#                  |      }|j%                  |      sJ y # 1 sw Y   xY w# 1 sw Y   xY wNr  rB   rw   rf  rC   rZ   r  r  r  r   rK  r  r  r  ztry to increase block sizer   r{   r/  r  r  @   )r?   r   r~   r_   r  r  r  rL   r  r  r  rL  r   rS   r#  r$  r"  r   r	  r  s           r   test_json_format_optionsr    sL   HH288IG<88LyAC DE w$%D
//

#
#9
#
5a
;
C
CE6
RC	dC A	 
z9
; =**T"*;*;,,,:+< ==
 jjb&7&7WW((B(7'9 :G$$W-F== = =s   F		A
F	FFc                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  d	      d
d j                  dd      }t        |d      5 }|j                  |       d d d        t        j                  t        d      5  t        j                  t         j                  j                  d            }t        j                   |t        j"                  |            }d d d        t        j                  t         j                  j                  d            }t        j                   |t        j"                  |            }|j%                  |      }|j'                  |      sJ y # 1 sw Y   xY w# 1 sw Y   xY wr  )r?   r   r~   r_   r  r  r  rL   r  r  r  rL  r   r3  r$  r"  rS   r#  r   r	  )	r  r   r   rq   rr   r  r   rS   r7  s	            r   test_json_fragment_optionsr  '  sl   HH288IG<88LyAC DE w$%D
//

#
#9
#
5a
;
C
CE6
RC	dC A	 
z9
; F,,,,,:<**T"*;*;G*DE	F ((WW((B(79Gjjb&7&7&@AG$$W-F== F Fs   F3	AG 3F= G	c                 h   t        | dz        }dD ]  \  }}t        |d      5 }|j                  |       d d d        t        j                  dt        j
                         fdt        j
                         fg      }t        j                  dgdgd|	      }t        j                  j                  |
      }t        j                  |      }	t        j                  ||	      }
|
j                  j                  |      sJ |
j                         j                  |      rJ  y # 1 sw Y   xY w)Nr  ))latin-1s   a,b
un,lphant)utf16s    a , b 
 u n ,  l  p h a n t wbr[   r\   un
   éléphantrZ   rb   encodingr  r  )r_   rL   r  r?   r=   rD   r   r   r"  r   r  rS   r	  r   )r  r   rq   r  
input_rowsrF  rD  rE  r  r  dataset_transcodeds              r   test_encodingr  >  s   w#$D! D* $ 	#JJz"	# ))c299;%7#ryy{9K$LM)5#8@OQ vv))8)<&&LAZZ[A!((//@@@!**,33NCCC%D	# 	#s   D((D1	c                 ,   t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  dt        j
                         fdt        j
                         fg      }t        j                  dgdgd|	      }t        j                  |d
|      }t        j                  t        j                  j                  d      5  |j                  |       d d d        t        j                  j!                  d      }t        j"                  |      }t        j                  ||      }	|	j                  j%                  |      sJ |	j                         j%                  |      sJ y # 1 sw Y   RxY w# 1 sw Y   xY w)Nr  r  s   ,b
un,lphant   ér\   r  r  )r  r\   rb   r   r  zinvalid UTF8r   r  r  r  r  )r_   rL   r  r?   r=   rD   r   r   rS   r  r  r   rS  r_  r   r   r"  r  r	  )
r  r   rq   rF  rD  rE  rS   r  r  r  s
             r   test_column_names_encodingr  W  sJ   w#$D	dD	 1T

/01 ii$		!4sBIIK6H IJOXXdV%1N4<KMN jjeODG	w{{//~	F )() 66%%y%9L""=KD=$$++O<<<&&(//???%1 1) )s   E=F
=F
Fc                    ddl m} t        j                  t        j                  g dd      t        j                  g dd      d      }| d	z  }|j                           ||t        |d
z               t        j                  |t        j                               }|j                  |      }|j                  |      sJ t        |       t        j                  |d      }|j                  |      }|j                  |      sJ |j                  |ddg      }|j                  ddgk(  sJ |j                  |ddg      }|j                  ddgk(  sJ  ||t        |dz        d       t        j                  t               5  |j                  t        j                  |d             d d d        y # 1 sw Y   y xY w)Nr   )r  r  r   rw   rf  rC   rZ   feather_datasetr  r  r  r\   r[   r)   zdata1.featherr   version)pyarrow.featherr  r?   r   r~   r  r_   r   rS   r  r   r	  rV   re  r  r  rL  )r  r   r  r   rf  rS   r7  s          r   test_feather_formatr  o  s   -HH288IF;88LyAC DE ))GMMO%W~567jj)9)9);<G$$W-F==/8jj3G$$W-F== $$WsCj$AF3*,,,$$WsCj$AF3*,,, %W67C	z	" G

79 EFG G Gs   
'F::G)r  r  brotlic                    t        j                  t        j                  dgdz  d      t        j                  g ddz  d      d      }t         j                  j	                  |      st        j                          | d	z  }|j                          t        j                         }| d
z  }|j                          t        j                  |t        |dz        ||j                  d              |dk(  rt        j                  t        d      5  |j                  |      }d d d        t        j                  t        d      5  t        j                  |      }|j                  |      }d d d        y |j                  |      }t        j                  |t        |dz        ||       t        j                  |t        j                               }	|j!                  |	      }
|
j#                  |      sJ |dz  dz  }|j%                         j&                  }|dz  dz  }|j%                         j&                  }||k  sJ y # 1 sw Y   &xY w# 1 sw Y   y xY w)Nr   ,  r   rw   rf  rl  rC   rZ   feather_dataset_compressedfeather_dataset_uncompressedz
data.arrowr  r   file_optionsr  zCompression typer   r  part-0.arrow)r?   r   r~   r  r  r  r  r  r   r  r  r_   r  r  rL  rS   r   r	  statst_size)r  r  r   r   rf  r  uncompressed_basedirwrite_optionscodecrS   r7  compressed_filecompressed_sizeuncompressed_fileuncompressed_sizes                  r   test_feather_format_compressedr    s'    HH288QCG&988L$49EG HE88  -44GMMO""$K"%CC  </0 333E	 h]]:-?@ 	)'::' ; )M	) ]]:-?@ 	NHH[)E'::u:MM	N 	22{2KMGl"#"	 jj)9)9);<G$$W-F==,~=O%**,44O,|;nL)..088....1	) 	)	N 	s   I	(I	IIc           	      d   g }t        d      D ]h  }t        j                  |gdz  t        d      D cg c]  }t        j                          c}d      }t	        j
                  |t        |       |       j t        | dz        }t	        j                  j                  ||       ||fS c c}w )zO
    Creates a simple (flat files, no nested partitioning) Parquet dataset
    r{   r   r  metadata_collector	_metadata)	r/   r?   r   r  rl   r  r_   write_metadatar=   )	root_pathr  r:   rr  r   metadata_paths         r   _create_parquet_dataset_simpler    s    
 1X 
b%PR)0TQ0TUV
3y>6H	

 	K/0Mm- % 1Us   B-c                    | dz  }t        |      \  }}t        j                  |      }|j                  j	                  |j                        sJ t        |j                        dk(  sJ |j                         }|j                  dk(  sJ y )Nr  r{   (   )	r  r   parquet_datasetr=   r	  r   r
  r   r   )r  r  r  r   rS   r7  s         r   test_parquet_dataset_factoryr    s     00I9)DM5  /G>>  ...w}}"""F??b   r   win32z'Results in FileNotFoundError on Windows)reasonc                    t        j                  d      }| dz  }t        |      \  }}|j                  d      }t	        j
                  t	        j                  |            }t        j                  ||      }|j                  j                  |j                        sJ t        |j                        dk(  sJ |j                         }|j                  dk(  sJ y )Nr:  r  r;  r  r{   r  )r  r(  r  r   rc   r   r$  r   r  r=   r	  r   r
  r   r   )	r  r:  r  r  r   r@  r   rS   r7  s	            r   #test_parquet_dataset_factory_fsspecr    s       *F 00I9)DM5 !!&)I !1!1)!<=J  :FG>>  ...w}}"""F??b   r   c                    | dz  }t        j                  dgdz  t        j                  j	                  d      d      }g }t        j                  |t        |      |       t        |dz        }t        j                  |j                  ||       t        j                  |      }|j                  j                  |j                        sJ |j                         }|j                  dk(  sJ y )Nr  r   r   r  r  r  )r?   r   r[  r  randnrl   r  r_   r  r=   r   r  r	  r   r   )r  r  r   r  r  rS   r7  s          r   &test_parquet_dataset_factory_roundtripr    s     00IHHQC"HBIIOOB,?@AEs9~2D 	K/0Mm-   /G>>  ...F??b   r   c                    g }t        d      D ]l  }t        j                  dt        t        |dz  |dz   dz              i      }| | dz  }t	        j
                  |||       |d   j                  | d       n t        | dz        }t	        j                  j                  ||       t        j                  |      }|j                         }|j                  d      j                         }|t        t        dd	            k(  sJ y )
Nr   r  r   rX   r  rK  r  r   rl  )r/   r?   r   rh   rl   rm   set_file_pathr_   r  r=   r   r  r   r-  rf  )	r  	metadatasr:   r   
table_pathr  rS   scanned_tablescanned_cols	            r   "test_parquet_dataset_factory_orderr    s     I 2Y 44adQqS"H-./1!H~-

ujYG"##qcN34 +-.MellM9=  /G$$&M&&t,668K$uQ}----r   c                    | dz  }t        |      \  }}t        |j                  d            d   j                          t	        j
                  |      }|j                  j                  |j                        sJ t        |j                        dk(  sJ t        j                  t              5  |j                          d d d        y # 1 sw Y   y xY w)Ntest_parquet_dataset_invalid	*.parquetr   r{   )r  rh   globunlinkr   r  r=   r	  r   r
  r  r  r  r   )r  r  r  r   rS   s        r   $test_parquet_dataset_factory_invalidr  /  s     88I9)DM5	$%a(//1  /G>>  ...w}}"""	(	)   s   .CCc                    t        t        | j                  d                  }t        j                  |d         j
                  j                         }g }|D ][  }t        j                  |      j                  }|j                  t        |j                  |                    |j                  |       ] | dz  }t        j                  |||       |S )Nr  r   r  r  )rh   r3  rglobrl   r  r=   to_arrow_schemar  r  r_   r  r0   r  )r  parquet_pathsr=   r  rq   r  r  s          r   _create_metadata_filer  =  s    	 <=>M^^M!,-44DDFF ,>>$'00s4#3#3I#>?@!!(+,
 +M2D r   c           
         t        j                  t        j                  t        d            t        j                  t        j
                  j                  d            t        j                  t	        j                  ddgd            gg d      }|j                  ddi      }t        j                  |t        |       d	g
       t        |       |fS )Nr  r[   r\   r   r  r  r   r'   r   r  )r?   r   r~   r/   r[  r  r  rZ  rG   rl   r  r_   r  )r  r   s     r   #_create_parquet_dataset_partitionedr  O  s    HH
rRXXbiioob&9:
C:r*+- #E
 ))5'*:;Es9~vhG +U22r   c                    | dz  }t        |      \  }}t        j                  d      }t        j                  ||      }|j                  j                  |j                        sJ t        |j                        dk(  sJ |j                         }|j                  dk(  sJ |j                         j                  d      j                  d	      }|j                         }t        j                  j                  ||       y )
N(test_parquet_dataset_factory_partitionedr   r  r  rv   r  r  Tdrop)r  r   r   r  r=   r	  r   r
  r   r   r  sort_valuesreset_indexr3   testingassert_frame_equal)r  r  r  r   r   rS   r7  rz  s           r   r
  r
  Z  s     DDI>yIM5??&1L  \JG>>  ...w}}"""F??b    ++D1==4=HF HJJ!!&(3r   c                 F   | dz  }t        |      \  }}t        j                  |d      }|j                  j	                  |j                        sJ d|j                  j
                  v sJ t        |j                               }d|d   j                  j
                  v sJ y )N%test_parquet_dataset_factory_metadatar   r     keyr   )	r  r   r  r=   r	  r  rh   rK   r  )r  r  r  r   rS   r  s         r   r  r  n  s     AAI>yIM5  VDG>>  ...W^^,,,,,W**,-IYq\11:::::r   c                    |\  }}| dz  }t        |      \  }} ||g      5  t        j                  |t        j                  d      |      }d d d         |g       5  t	        j                               }d d d         |g       5  t	        j                  t        j                  d      dkD               d d d         |g       5  d   j                  t        j                  d      dkD         d d d         |g       5  d   j                         }	|	d   j                          d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   exY w# 1 sw Y   y xY w)N#test_parquet_dataset_lazy_filteringr   r  )r   r   r     r   )	r  r   r  r   rh   rK   r@   r  r  )
r  r   rc   r   r  r  rr  rS   r  rg_fragmentss
             r   r  r  }  sb    'B
 ??I5i@M1 
}o	& $$7 
b	 2..01	2 
b	 9W""288D>B#6789 
b	 =!''(;<= 
b	 3 |668Q0023 3' 2 29 9= =3 3s;   -D5E	1E
+E'E%5D>E
EE"%E.c                 H   t        j                  dg di      }| dz  }|j                  |       t        j                  |      }|j                  |      j                  }|j                  |dg      j                  }d|j                  v sJ |j                  |d      sJ y )Nr[   r  r  r)   s   pandasTr9  )	r3   r4   r  r   rS   r   r=   r  r	  )r  r   rH   rq   rS   r=   ra  s          r   test_dataset_schema_metadatar    s     
sI&	'B^#DMM$jjG$$W-44F%..w.FMM '''==)$=???r   c                    t        j                  dt        j                  g dd      i      }t        j                  |t        | dz               t        j                  dt        j                         fg      }t        j                  | dz  d|      }|j                  |t        j                  d      dkD  	      }|d   j                  |d   j                  d
      j                  d            sJ t        |j!                               d   }|j                  |t        j                  d      dkD  |      }|d   j                  |d   j                  d
      j                  d            sJ y )Nr  r  r   rw   r0  r   r  rv   r   rB   r   r  )r?   r   r~   rl   rm   r_   r=   rB   r   rS   r   r@   r	  r  r  rh   rK   )r  r   r   r=   rS   filteredrT   s          r   test_filter_mismatching_schemar    s<    HHeRXXlABCENN5#g678 YY
+,-Fjj. 6CG
 &&wrxx7J&KHE?!!%,"3"3G"<"B"B1"EFFFG))+,Q/H&&%1,V ' =HE?!!%,"3"3G"<"B"B1"EFFFr   c                    t        j                  dj                         t        t	        d            d      }t        | dz        }t        j                  ||dg       t        j                  |d      }|j                  |      }|j                  |dg	      }|j                  d      j                  |j                  d            sJ y )
Nza a b br{   r  r9  r   r  r   r  r)   )r?   r   r?  rh   r/   r_   rl   r  r   rS   r   r-  r	  )r  r   r   rq   rS   all_cols	part_onlys          r   +test_dataset_project_only_partition_columnsr     s     HHioo/U1XGHEw'(DtVH=jjF3G&&w/H''&'BI??6")))*:*:6*BCCCr   c           	         t        j                  dt        j                  g dd      i      }| dz  }|j	                  |d       t        j                  |dt        j                  dt        j                         fg      	      }t        j                  dt        j                  g dt        j                               i      }|j                  |      j                  |      sJ y )
Nr  rh  objectdtypez(test_dataset_project_null_column.parquetr   r  r   r  )r3   r4   r[  r~   r  r   rS   r?   r=   rB   r   r   r	  )r  r   rH   r  rS   rz  s         r    test_dataset_project_null_columnr%    s     
ubhh'9JK	LB<<AMM!IM&jj9 "		E288:+>*? @BGxx);RXXZ HIJH""7+228<<<r   c                    ddl m} t        j                  g dg dg dd      }|j	                  || dz         t        j                  | dz  d	      }|j                  |t        j                  d
      t        j                  d      j                  dd      t        j                  d      dk(  d      }t        j                  g dt        j                  g dd      g dd      }|j                  |      sJ t        j                  t        d      5  |j                  |d
d
i       d d d        y # 1 sw Y   y xY w)Nr   r  r  )r!  r(  r)  rd  r  r  r  r  r  r  r   Fsafer  r[   )	A_renamedB_as_intC_is_ar)   rw   )TFFzExpected an Expressionr   )r   r  r?   r   r  r   rS   r   r@   r  r~   r	  r  r  r  )r  r   r  r   rS   r7  rz  s          r   test_dataset_project_columnsr-    s   HH9<oNOE%>!9:jj>1)DG$$WXXc]HHSM&&wU&;((3-3&7$ F
 xxHHYW5& H
 ==""" 
y(@	A =#s<= = =s   D::Ec                    t        |       \  }}t        j                  |      }t        |j                  t        j
                        sJ t        |       \  }}t        j                  |      }t        |j                  t        j
                        sJ t        j                  |d      }|j                  }|J t        |t        j                        sJ |j                  t        j                  dt        j                         fg      k(  sJ t        |j                        dk(  sJ |j                  d   t        j                  g dt        j                               k(  sJ t        j                  t        j                  dt        j                         fg      d      }t        |t        j                        sJ t        |j                        dk(  sJ t        d |j                  D              sJ t        j                  ||      }|j                  }t        |t        j                        sJ |j                  t        j                  dt        j                         fg      k(  sJ t        |j                        dk(  sJ t        d	 |j                  D              sJ t        j                  |d      }t        j                  t!        |j#                               |j                  |j$                  |j&                  
      }|j                  J | dz  }t)        |      \  }}t        j*                  |d      }|j                  }|J t        |t        j                        sJ |j                  t        j                  dt        j,                         fg      k(  sJ t        |j                        dk(  sJ t/        |j                  d   j1                               ddhk(  sJ y )Nr   r  r   r   r   )r   r   rv   r  c              3   $   K   | ]  }|d u  
 y wr   r   r  s     r   r  z6test_dataset_preserved_partitioning.<locals>.<genexpr>$       4QqDy4r  c              3   $   K   | ]  }|d u  
 y wr   r   r  s     r   r  z6test_dataset_preserved_partitioning.<locals>.<genexpr>+  r0  r  r   zdata-partitioned-metadatar[   r\   )r  r   rS   rM   r   r   r  r  r=   r?   r   r   r  r~   r  r  rh   rK   r   r   r  r  rD   r   rf  )	r  rr  rq   rS   r  r   r  r  r  s	            r   #test_dataset_preserved_partitioningr2    sG    "'*GAtjjGg**B,D,DEEE 37;JjjGg**B,D,DEEE jjF3GDdB//000;;"))fbhhj%9$:;;;;t  !Q&&&Q288Irxxz#BBBB ??299vrxxz&:%;<VLDdB//000t  !Q&&&4$"3"34444jjD1GDdB//000;;"))fbhhj%9$:;;;;t  !Q&&&4$"3"34444 jjF3G##W""$%gnn~~'*<*<H   ((( 55I:9EM1  VDGDdB//000;;"))fbiik%:$;<<<<t  !Q&&& t  #--/0S#J>>>r   c                    t        j                  t        j                  dt        j                               t        j                  dt        j                  t        j
                         t        j                                     g      }t        j                  g dt        t        d            d|      }t        | dz        }t        j                  ||dg       t        j                  | dz        }|j                  d      j                         |j                  d      j                         k(  sJ |j                  d      j!                  |j                  d            sJ y )	Nr  r   )NNr[   r[   r{   r  rb   r9  r  )r?   r=   r@   rB   rz   r   rD   r   rh   r/   r_   rl   r  r]  r-  rf  r	  )r  r=   r   rq   actual_tables        r   +test_write_to_dataset_given_null_just_worksr5  C  s    YY

#
rxxz299;?@ F HH4!%(^-5;=E w'(DtVH===>!9:L v&00 	f		'	'	)* * *u%,,U\\%-@AAAr   c                     dd l m}  |j                  |  |j                  |dfg            } |j                  | |      S )Nr   	ascending)r   )pyarrow.computecomputesort_indicesSortOptionsr   )tabsort_colr  sorted_indicess       r   _sort_tabler?  X  sA     $R__^R^^h%<$=>@N2773''r   c                 `   |xs |}t        j                  | |d|d       t        |j                  d            }t	        |      t	        |      k(  sJ t        j
                  |d|      }t        |j                         |      j                  t        | j                         |            sJ y )Nrp  Fr   r   r   *r  )	r   r  rh   r  r   rS   r?  r   r	  )rS   r  expected_filesr=  base_dir_pathr   
file_pathsr  s           r   _check_dataset_roundtriprF  _  s    !-XMWhw".EC m))#./Jz?c.1111 zzgLBH x((*H5<<G$$&13 3 3r   c                    | dz  }|j                          t        |      }t        j                  |      }| dz  }|dz  g}t	        |t        |      |d|       | dz  }|dz  g}t	        |||d|       | dz  }|j                          t        |      }t        j                  |      }| dz  }|dz  g}t	        |t        |      |d|       y )Nr  zsingle-file-targetr  r[   zsingle-file-target2r  zsingle-directory-target)r  r  r   rS   rF  r_   r  )r  rp   rr  rS   targetrC  s         r   test_write_datasetrI  r  s     -'IOOI&Ajj#G ++F~-.NWc&k>3O ,,F~-.NWfnc6J ,,IOO"9-Ajj#G00F~-.NWc&k>3Or   c                 R   | dz  }t        |      }t        j                  d      }t        j                  ||      }| dz  }|dz  |dz  dz  |dz  |dz  dz  g}t        j                  t	        j
                  d	t	        j                         fg      d      }t        |t        |      |d
||       | dz  }|dz  |dz  dz  |dz  |dz  dz  g}t        j                  t	        j
                  d	t	        j                         fg            }t        |t        |      |d
||       y )Npartitionedr   r  r  zpartitioned-hive-targetpart=ar  part=br   r  partitioned-dir-targetr[   r\   )	r  r   r   rS   r?   r=   rD   rF  r_   )r  rp   rr  r   rS   rH  expected_pathsr  s           r   test_write_dataset_partitionedrP    s6    -'I+I6A??&1Ljj>G 00F6H,~=6H,~=N //
		FBIIK()*6;VndF(*
 //Ffsl^3fsl^3N //
		FBIIK()*,VndF(*r   c                    t        j                  g dg dd      }t        j                  || ddg       t        j                  | ddg      }|j
                  }|D ch c]9  }t        t        j                  |      j                  |       j                        ; }}|h dk(  sJ |j                         }|j                  |      sJ y c c}w )NrQ  rS  rZ   r  r\   r  >   r  rR  r  r?   r   r   r  rS   r
  r_   r  r  r  r  r   r	  r  r   r  r
  r  partitioning_dirsr  s          r   #test_write_dataset_with_field_namesrU    s    HH+/BCEUGE#&%) 

75uEIOOEBG=>GLLO''0778  ///((*O!!%(((    >Cc                    t        j                  g dg dd      }t        j                  || ddgd       t        j                  | dd      }|j
                  }|D ch c]9  }t        t        j                  |      j                  |       j                        ; }}|h d	k(  sJ |j                         }|j                  |      sJ y c c}w )
NrQ  rS  rZ   r  r\   r   )r   r   partitioning_flavorr  >   b=xb=yb=zrR  rS  s          r   (test_write_dataset_with_field_names_hiver\    s    HH+/BCEUGE#&%VE 

75vFIOOEBG=>GLLO''0778   5555((*O!!%(((rV  c                    t        j                  g dg dg dd      }t        j                  || ddg       t        j                  | ddg      }t        j                         5 }t        j                  |j                  ddg	      |ddg       t        j                  |ddg      }|j                         }t        |j                               |j                  d
      j                         k(  sJ 	 d d d        y # 1 sw Y   y xY w)NrQ  rS  r  rd  r  r\   r  r}   r)   r[   )r?   r   r   r  rS   r  r  r   r   r  r2  drop_columnsr  r   rS   tempdir2r  r  s         r   test_write_dataset_with_scannerra    s    HH+/$& 'E UGE#&%) jjcUCG		$	$	& =(
#s<!%se	E JJxSEJ	#,,.O--/ ++C0::<= 	= == = =s   'BDDc           	         	
 t        j                          G fddt              }t        j                   |t        j
                                     t        j                  t        j                  dt        j                               g      }t        j                  t        j                  t        t        d                  g|      	d
d}dd		
fd
}t        j                  j!                   |       |d	      t        j"                   fd      }|j%                          	 t'        j&                         fd}d}d} |       dk  r/
|kD  r

|k(  rd	}n"
}t'        j(                  d        |       dk  r/|sJ 	 dj+                          |j-                          y # dj+                          |j-                          w xY w)Nc                       e Zd Z fdZy)6test_write_dataset_with_backpressure.<locals>.GatingFsc                 ^    j                          | j                  j                  ||      S )Nr  )waitr   rg   )r   rq   r  consumer_gates      r   rg   zItest_write_dataset_with_backpressure.<locals>.GatingFs.open_output_stream  s)     88..th.GGr   N)r   r   r   rg   )rh  s   r   GatingFsrd    s	    	Hr   ri  r9   r	  rb   r          Tc               3   h   K   k  r(sy t        j                  d       dz    k  r'y y w)Ng{Gz?r   )r  sleep)rt   batches_readend
keep_goings   r   counting_generatorz@test_write_dataset_with_backpressure.<locals>.counting_generator  s:     S JJtALK S s   ,22r  c                  H    t        j                  t              d       S )Nr   r  )r   r  r_   )	gating_fsr   r  s   r   r"  z6test_write_dataset_with_backpressure.<locals>.<lambda>  s     r''S\)	K r   )rH  c                  2    t        j                           z
  S r   )r  )starts   r   durationz6test_write_dataset_with_backpressure.<locals>.duration  s    99;&&r   Fr   r  )	threadingEventr   rc   r   r   r?   r=   r@   r   rj   r~   rh   r/   r   r^  rk   Threadru  r  rm  r   r  )r  ri  r=   min_backpressurerq  write_threadrv  
last_valuebackpressure_probably_hitrt   rn  rh  ro  rs  rp  r   ru  s   `        @@@@@@@@r   $test_write_dataset_with_backpressurer~    s    OO%M
H< H
 ););)= >?IYY456F OORXXd5+;&<=>vNEL
CJ jj%%V & ?G ##KLL !			' 
$)!j2o..:-04-)
JJsO j2o )((
 
 
s   8AF6 F6 6$Gc                    t        j                  g dg dd      }t        j                  || ddg       t        j                  | ddg      }t        j                         5 }t        j                  ||ddg       t        j                  |ddg      }|j                         }t        |j                               |j                         k(  sJ 	 d d d        y # 1 sw Y   y xY w)NrS  r  r\   r}   r  r\   r  )
r?   r   r   r  rS   r  r  r   r  r2  r_  s         r   test_write_dataset_with_datasetr  9  s    HH?;<EUGE#&%) jjcUCG		$	$	& F(
( %SE	; JJxSEJ	#,,.O--/0EOO4EEEEF F Fs   $A0CC'c           	      X   | dz  }t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               g      d      }d }t        j                  |||d	
       t        j                  g dg dd      }t        j                  t         j                        5  t        j                  |||d	
       d d d        t        j                  ddgi      }|dz  dz  }t        j                  j                  ||       t        j                  |||d	d       t        j                  g dg dd      }t        j                  | d	|      j                         } |||       |j!                         sJ t        j                  |||d	d       t        j                  g dg dd      }t        j                  | d	|      j                         } |||       |j!                         rJ y # 1 sw Y   9xY w)Nr   rS  r  r  r}   r   )r=   r  c                     | j                         j                  d      j                  d      }|j                         j                  d      j                  d      }|j                  |      sJ y )Nr\   Tr  )r  r  r  r	  )r  r  df1df2s       r   compare_tables_ignoring_orderzGtest_write_dataset_existing_data.<locals>.compare_tables_ignoring_orderQ  sZ    lln((-99t9Dlln((-99t9Dzz#r   r  r  rd  r  r\   ezc=2z	foo.arrowoverwrite_or_ignore)r   r   existing_data_behavior)r  r  r[   r\   r}   )rv   r   rv   rY   r{   r  delete_matching)r  r[   r\   r}   r  )r?   r   r   r   r=   r@   rB   r  r  r  r_  r   r  r  rS   r   exists)	r  rp   r   r   r  extra_table
extra_fileoverwrittenreadbacks	            r    test_write_dataset_existing_datar  J  s   $IHH?;<E??"))	#rxxz	"#+%-35L UILOHH?;<E 
r	' B
	&25	BB ((C#<(KU"[0JOO!!+z: UIL!,AC (('o>@Kzz'%'355=XZ !(K8 UIL!:KM ((!5LIJKzz'%'355=XZ !(K8  """"9B Bs   HH)c                 f    t        |       D cg c]  }t        j                  ||       c}S c c}w r   )r/   r  randint)rR  r  r  rr  s       r   _generate_random_int_arrayr  {  s%    .3Dk:FNN3$:::s   .c                     g }g }t        |       D ]<  }|j                  t        |d|             |j                  dt        |      z          > t	        j
                  ||      }|S )Nr   )rR  r  r  r}   r9   r  )r/   r0   r  r_   r?   rj   )num_of_columnsnum_of_recordsr9   re  r:   rj   s         r   _generate_data_and_columnsr    sn    DL>" *.N343AC 	D 	C#a&L)	*
 ??LALr   c                 t    t        t        t        j                  |       j	                  d|                   S )Nz**/*.)r   rh   r  r  r  base_directoryr  s     r   _get_num_of_files_generatedr    s-    tGLL055k]6KLMNNr   c                    | dz  }dd}d}d}t        ||      }t        j                  ||d|       t        j                  |      }|z  dz   }t        |      |k(  sJ g }t        |      D ]V  \  }	}
|t        |
      z  }t        j                  |d      }|j                  |j                         j                  d	          X |t        |      k(  sJ |t        |      k(  sJ t        fd
|D              sJ y )Nr   r   rv   #   r   )r   max_rows_per_filemax_rows_per_groupr   r  r   c              3   (   K   | ]	  }|k    y wr   r   )r  file_rowcountr  s     r   r  z7test_write_dataset_max_rows_per_file.<locals>.<genexpr>  s      <   11 <s   )r  r   r  rb  rc  r   re   r_   rS   r0   r   shaper  r  )r  rp   r  r  r  rj   files_in_direxpected_partitionsresult_row_combinationrr  f_filef_pathrS   r  s                @r   $test_write_dataset_max_rows_per_filer    s1   $INN-n.<>L \9Y'8(:< ::i(L ),==A | 3333  |, C	6S[(**VI6%%g&6&6&8&>&>q&ABC #&<"====S!78888 <$:< < < <r   c                    | dz  }d}d}d}g d}|D cg c]  }t        ||       }}|dz  }t        j                  ||||d       t        j                  |      }	t        |	      D ]  \  }
}|t        |      z  }t        j                  |d	      }|j                         }|j                         }t        |      D ]6  \  }}|j                  }|t        |      d
z
  k  r||k\  r||k  r.J ||k  r6J   y c c}w )Nr   r  rP  rv   )
r   r   r   r   r   r{   r{   r{   r{   r{   min_rows_groupr   )r9   r  min_rows_per_groupr  r   r  r   )r  r   r  rb  rc  re   r_   rS   r   r   r   r   )r  rp   r  r  r  record_sizesr  record_batchesdata_sourcer  rr  r  r  rS   r   batchesr  rt   rows_per_batchs                      r   %test_write_dataset_min_rows_per_groupr    s9   $IN1L -9:( 11?A :N : ..K.;(:(:%'
 ::k*L|, <	6s6{***VI6  """$"7+ 	<IB"^^NCL1$$%);;"&889 9 &);;;;	<<:s   C:c                    | dz  }d}d}d}t        ||      }|dz  }t        j                  |||d       t        j                  |      }g }|D ]i  }	|t        |	      z  }
t        j                  |
d      }|j                         }|j                         }|D ]  }|j                  |j                          k |dd	gk(  sJ y )
Nr   r  rv      max_rows_groupr   )r9   r  r  r   r  rf  )r  r   r  rb  rc  r_   rS   r   r   r0   r   )r  rp   r  r  r  rj   r  r  batched_datar  r  rS   r   r  rt   s                  r   %test_write_dataset_max_rows_per_groupr    s    $INN-n.<>L ..K,(:%' ::k*LL 0s6{***VI6  """$ 	0E/	00 B8###r   c                    | dz  }d}d}ddg}t        j                  g dg dg|      }t        j                  g d	g d
g|      }t        j                  g dg dg|      }t        j                  g dg dg|      }t         j                  j                  ||||g      }	t	        j
                  t        j                  ||   t        j                         fg      d      }
|dz  }t	        j                  |	||
|       d } |||||      \  }}||k(  sJ |dz  }d}t	        j                  |	||
||d        |||||      \  }}||kD  sJ y )Nr   r   r   c1c2)r   rv   rY   r{   r   r   )r[   r\   r}   r|   r  r[   r  )r   r  rS  rP  r   r   )r[   r\   r}   r|   r  r}   )r  r   rg  rf  r   r   )r[   r\   r}   r|   r  r|   )r  r  r  rO  r   r   )r[   r\   r}   r|   r  r\   r   r  default)r9   r  r   r   c                 z    t        | |      }t        t        j                  j	                  ||               }||fS )Nr  )r  r   r?   r9  unique)r  rj   r  col_idnum_of_files_generatednumber_of_partitionss         r   _get_compare_pairz<test_write_dataset_max_open_files.<locals>._get_compare_pair  s>    !<&K"A"2::#4#4\&5I#JK%';;;r   max_1rY   F)r9   r  r   r   max_open_filesr   )	r?   rj   rE   rk   r   r   r=   rD   r  )r  rp   r  partition_column_idre  record_batch_1record_batch_2record_batch_3record_batch_4r   r   data_source_1r  r  r  data_source_2r  s                    r   !test_write_dataset_max_open_filesr    s   $IK$<L__+>+I+K+79N __+=+I+K+79N __+@+I+K+79N __+A+I+K+79N HH!!>>#1>#C DE ??
		L!45ryy{CDEL 	)M%-".{D< M>;/1 10 "%9999 'MN%-".{$2G
 M>;/1 10 "$8888r   c                    | dz  }t        |      }t        j                  |t        j                  j	                  d            }| dz  }|dz  |dz  dz  |dz  |dz  dz  g}t        j
                  t        j                  |j                  j                  d	      g      d	t        j                  ddg      i
      }t        |t        |      |d||       y )NrK  Tr1  r  rN  r[   r  r\   r   r  r  )r  r   rS   r  r(  r   r?   r=   r@   r~   rF  r_   )r  rp   rr  rS   rH  rO  r   s          r   #test_write_dataset_partitioned_dictr  :  s     -'I+I6A jj((1141HJG //Ffsl^3fsl^3N ??299V$.& $'bhhSz235L VndF!#r   c                    | dz  }t        |      }t        j                  |d      }t        j                  t	        j
                  dt	        j                         fg      d      }| dz  }g fd}t        j                  ||d|d	|
       |dz  dz  |dz  dz  h}t        t        t        j                              }||k(  sJ | dz  }	t        j                  ||	d|d       t        j                  |d|      }
t        j                  |	d|      }|
j                         j                  |j                               sJ y )NrK  r   r  r   r  partitioned1c                 <    j                  | j                         y r   )r0   rq   )written_filepaths_writtens    r   file_visitorz4test_write_dataset_use_threads.<locals>.file_visitora  s    \../r   r  Tr   r   r   r  rL  part-0.featherrM  partitioned2FrA  r  )r  r   rS   r   r?   r=   rD   r  r   ri   r  r  r   r	  )r  rp   rr  rS   r   target1r  rO  paths_written_settarget2result1result2r  s               @r   test_write_dataset_use_threadsr  T  sB    -'I+I6Ajj8G??
		FBIIK()*6;L &GM0 | 	(--(--N Cm<=...&G jjNGjjNG$$W%5%5%7888r   c                 R   t        j                  dt        d      i      }|j                  d      }t	        j
                  || ddd       t	        j                  |       j                  d	      d   j                         }d
}|D ]  }t        |      }||kD  s
J d|        |}  y )Nr[   rv  rv   )max_chunksizer   T)r   r   preserve_orderFr   rK  z!Sequence expected to be ordered: )
r?   r   r/   r   r   r  rS   r   to_numpyr  )r  r   r  seqprevitemcurrs          r   -test_write_dataset_use_threads_preserve_orderr  |  s     HHc5;'(EQ/GWgi!%d<
**W

&
&5
&
9#
>
G
G
ICD 4yd{E?uEE{r   c           	         t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }| dz  }t	        j
                  ||d	d
       t        |j                  d            }|dz  g}t        |      t        |      k(  sJ t	        j                  |d      j                         }|j                  |      sJ | dz  }|dz  |dz  dz  |dz  |dz  dz  g}g g fd}t	        j                  t        j                  dt        j                         fg      d      }t	        j
                  ||d
d	||       t        |j                  d            }t        |      t        |      k(  sJ D cg c]!  }t        j                   j#                  |      # }	}|	k(  sJ t	        j                  |d|      }|j                         j                  |      sJ t%              dk(  sJ D ]  }
t'        j(                  |
      |v rJ  y c c}w )Nr  c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z#test_write_table.<locals>.<genexpr>  r  r  r[   r   r\   r  r  singledat_{i}.arrowr  basename_templater   rB  zdat_0.arrowr  r  rK  rL  rM  c                 r    j                  | j                         j                  | j                         y r   )r0   rq   rR  )r  visited_pathsvisited_sizess    r   r  z&test_write_table.<locals>.file_visitor  s+    \../\../r   r   r   r  )r   r  r   r  r  rv   )r?   r   r~   r/   r   r  rh   r  r   rS   r   r	  r   r=   rD   rb  rq   getsizer   r  r  )r  r   r  rE  rO  r7  r  r   rq   actual_sizesvisited_pathr  r  s              @@r   test_write_tabler    s@   HH
rRXX%IuRy%II
#sebj() "#E
 !HUH'6yJ hnnS)*J./Nz?c.1111ZZ/88:F== &H8X0=@8X0=@N
 MM0 ??
		FBIIK()*6;LUHY'6".\K hnnS)*Jz?c.11116CDdBGGOOD)DLDL(((ZZ\JF??##E***}"""% <||L)^;;;< Es   &Ic           	      p   t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }t        j                  |gdz        }| d	z  }t        j                  ||d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  |      sJ | dz  }t        j                  |g|d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  |      sJ | dz  }t        j                  |j                         |d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  |      sJ | dz  }t        j                  ||g|d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  t        j                  |gdz              sJ y )Nr   c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z6test_write_table_multiple_fragments.<locals>.<genexpr>  r  r  r[   r   r\   r  r  rv   r  r  r  rB  r  r  zsingle-listmultiplezmultiple-table)r?   r   r~   r/   r  r   r  r   r  rS   r   r	  r   )r  r   r  s      r   #test_write_table_multiple_fragmentsr    sn   HH
rRXX%IuRy%II
#cUQY&' "#E eWQY'E !HUHY7x~~c"#sH7G,G+H'IIII::hu-668??FFF &HeWhy9x~~c"#sH7G,G+H'IIII::hu-668??FFF #HU%%')Dx~~c"#s	$	$%(' ' ' '::hu-668??FFF ))HeU^Xi@x~~c"#s	$	$%(' ' ' '::hu-668??
%#  r   c           	         t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }| dz  }t	        j
                  d	 |j                         D        ||j                  d
d       t	        j                  |d      j                         }|j                  |      sJ | dz  }t         j                  j                  |j                  |j                               }t	        j
                  ||d
d       t	        j                  |d      j                         }|j                  |      sJ | dz  }t        |      }t	        j
                  ||d
d       t	        j                  |d      j                         }|j                  |      sJ y )Nr  c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z&test_write_iterable.<locals>.<genexpr>  r  r  r[   r   r\   r  r  inmemory_iterablec              3       K   | ]  }|  y wr   r   )r  rt   s     r   r  z&test_write_iterable.<locals>.<genexpr>  s     <e<s   r  r  )r=   r  r   r  inmemory_readerr  inmemory_pycapsule)r?   r   r~   r/   r   r  r   r=   rS   r   r	  r  rk   r   )r  r   r  r7  r   streams         r   test_write_iterabler    s   HH
rRXX%IuRy%II
#sebj() "#E
 ,,H<)9)9);<h!LL'6uF ZZ/88:F==**H!!..u||/4/?/?/ACFVXQVWZZ/88:F==--H&FVXQVWZZ/88:F==r   c           	         t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }t	        j
                  |      }| dz  }t	        j                  |j                  |      |d	
       |j                  t	        j
                  |d
            }|j                  |      sJ | dz  }t	        j                  |j                  |dg      |d	
       |j                  t	        j
                  |d
            }|j                  |j                  dg            sJ t        j                  t        d      5  t	        j                  |j                  |      ||j                  d	       d d d        y # 1 sw Y   y xY w)Nr  c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z%test_write_scanner.<locals>.<genexpr>  r  r  r[   r   r\   r  r  dataset_from_scannerr  r  r  dataset_from_scanner2r  r)   zCannot specify a schemar   )r=   r   )r?   r   r~   r/   r   rS   r  r   r   r	  r  r  r  rL  r=   )r  r   r   rS   r  r7  s         r   test_write_scannerr    s   HH
rRXX%IuRy%II
#sebj() "#E jjG//H^++9.$$RZZ%GHF== 00H^++GdV+Di1$$RZZ%GHF==tf-... 
z)B	C @
//8( %Y	@@ @ @s   3GGc                    t        j                  t        j                  t        d            t        j                  dgdz  dgdz  z         j	                         gddg      }t        j                  |j                  dg      j                        }| dz  }t        j                  ||d	|
       t
        j                  j                  dgd      }t        j                  |d|
      j                         }|j                  |      sJ y )Nr  r[   r   r\   r  r   r  rS   r  r  Tr1  r  )r?   r   r~   r/   r}  r   r   r  r=   r  r   r(  rS   r   r	  )r  r   r   r  partitioning_readr7  s         r   !test_write_table_partitioned_dictr    s     HH
r
#sebj();;= V_E
 ??5<<#9#@#@AL"Hx	
 0099	4 : )ZZ->hj  ==r   c           
      >   t        j                  t        j                  t        d      d      t        j                  t	        j
                  ddd      j                  d            t        j                  t	        j                  dd	gd
            gg d      }| dz  }t        j                  ||d       t        |j                  d            }|dz  g}t        |      t        |      k(  sJ t        j                  |d      j                         }|j                  |      sJ dD ]n  }t        j                          }|j#                  |      }dt%        |      v sJ | d| z  }t        j                  ||||       t'        j(                  |dz        }	|dk(  rdnd}
|	j*                  |
k(  sJ t        j                  |d      j                         }|j,                  }|dk(  rB|j                  d|j/                  d      j1                  t        j2                                     }|dv rC|j                  d|j/                  d      j1                  t        j4                  d                  }|j7                  |      }|j                  |      roJ  y )Nr  r  rw   rD  zdatetime64[D]r#  zdatetime64[ns]r[   r\   r   r  r  r  r   r  rB  part-0.parquet)1.02.42.6r  z(<pyarrow.dataset.ParquetFileWriteOptionsparquet_dataset_versionr  r	  r  r   )r	  r
  r   r  )r?   r   r~   r/   r[  r\  r  rZ  r   r  rh   r  r   rS   r   r	  r   r  r  rl   read_metadataformat_versionr=   r@   	with_typerB   r  r  )r  r   r  rE  rO  r7  r  r   optsmetaexpected_versionr=   rz  s                r   test_write_dataset_parquetr  ,  sA    HH
r*
<?CJJ 	
C:r*+	
 "#E **HUHY7hnnS)*J!112Nz?c.1111ZZ3<<>F== ) '%%'(((99T$ZGGG6wi@@
dK+; ;<$+u$45%""&6666 HY7@@BeZZ6<<?#<#<RXXZ#HIFn$ZZ6<<?#<#<R\\$=O#PQF::f%}}X&&&''r   c           	      r   t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }| dz  }t	        j
                  ||d	
       t        |j                  d            }|dz  g}t        |      t        |      k(  sJ t	        j                  |d	
      j                         }|j                  |      sJ t	        j                  t        j                  j                  |j                   j"                              }|j%                  d      }| dz  }t	        j
                  ||||       t	        j                  ||
      j                         }|j                  |      sJ y )Nr  c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z)test_write_dataset_csv.<locals>.<genexpr>[  r  r  r[   r   r\   )r  r  chr1r  csv_datasetr   r  rB  z
part-0.csvr  r  F)include_headercsv_dataset_noheaderr  )r?   r   r~   r/   r   r  rh   r  r   rS   r   r	  r  r   r   r"  r=   r  r  )r  r   r  rE  rO  r7  r   r  s           r   test_write_dataset_csvr  Y  so   HH
rRXX%IuRy%II
#sebj() "#E
 &HUHU3hnnS)*J-.Nz?c.1111ZZ/88:F== 7;;+B+B\\'' ,C ,) *F$$E$:D//HUHV$GZZ099;F==r   c           	      J   t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }dfd	}| d
z  }t	        j
                  ||d|       sJ y )Nr  c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z:test_write_dataset_parquet_file_visitor.<locals>.<genexpr>v  r  r  r[   r   r\   r  r  Fc                 X    | j                   | j                   j                  dk(  rdy y y )NrY   T)r  rT  )r  visitor_calleds    r   r  z=test_write_dataset_parquet_file_visitor.<locals>.file_visitor|  s2    !!-%%11Q6!N 7 .r   r  r   )r   r  )r?   r   r~   r/   r   r  )r  r   r  r  r  s       @r   'test_write_dataset_parquet_file_visitorr  s  s    HH
rRXX%IuRy%II
#sebj() "#E
 N" **HUHY".0 >r   c                 r   t        d      D cg c]  }|gdz  D ]  }|  }}}t        d      D cg c]  }|gdz  D ]  }|dz  	  }}}t        j                  ||dgdz  dgdz  z   d      }| dz  }t        j                  t        j
                  dt        j                         fg      d	
      }g d fd}t        j                  ||d|d|       |dz  dz  |dz  dz  h}	t        t        t        j                              }
|
|	k(  sJ J j                  dk(  sJ y c c}}w c c}}w )Nr{   r   r[   r  r\   r  rK  r   r   r  c                 l    | j                   r| j                   j                  | j                         y r   )r  r0   rq   )r  r  sample_metadatas    r   r  zAtest_partition_dataset_parquet_file_visitor.<locals>.file_visitor  s+      *33O\../r   r   Tr  rL  r  rM  rv   )r/   r?   r   r   r   r=   rD   r  r   ri   r  r  rT  )r  r   r  f1_valsf2_valsr   r  r   r  rO  r  r  r"  s              @@r   +test_partition_dataset_parquet_file_visitorr%    s]   !&qCuglCdtCtCGC$)!HF5%2FtBwFwFGFHHG7"ebjC52:57 8E -'I??
		FBIIK()*6;L MO0 y| 	H//H//N Cm<=...&&&&&!+++? DFs
   D-D3c                 6   t        j                  dt        j                  dd      gi      }|d   j                  j
                  dk(  sJ t        j                  || d       t        j                  | dz        }|d   j                  j
                  dk(  sJ y )Nr[   rD  zEurope/Brussels)tzr   r  r  )
r?   r   r3   rF  rx   r'  r   r  rl   r]  )r  r   r7  s      r   (test_write_dataset_arrow_schema_metadatar(    s    
 HHcBLL:KLMNOE:??!2222UGI6]]7%556F#;"3333r   c                     ddl m} t        j                  dg di      }|j	                  ddi      }t        j                  || d       |j                  | d	z        j                  }|j                  ddik(  sJ y )
Nr   r'  r[   r  r     valuer  r  r  )
r   r  r?   r   rG   r   r  r]  r=   r  )r  r  r   r=   s       r   "test_write_dataset_schema_metadatar+    sr    HHc9%&E))68*<=EUGI6*: :;BBF??vx0000r   c                     t        j                  dg di      }|j                  ddi      }t        j                  || d       t        j                  | dz        j                  }|j                  ddik(  sJ y )Nr[   r  r  r*  r   r  r  )	r?   r   rG   r   r  rl   r]  r=   r  )r  r   r=   s      r   *test_write_dataset_schema_metadata_parquetr-    so     HHc9%&E))68*<=EUGI6]]7%556==F??vx0000r   c           	         | \  }}}}}}}}dj                  ||||      }t        j                  t        j                  t	        d            t        j                  d t	        d      D              t        j                  dgdz  dgdz  z         gg d      }t        j                  t        j                  d	t        j                         fg      d
      }	t        j                  |d|d|	       t        j                  d|dd
      j                         }
|
j                  |      sJ |j                  d      }t        j                  ||d|	       t        j                  d|dd
      j                         }
|
j                  |      sJ |j                  d      }t        j                  |d|d|	       t        j                  d|dd
      j                         }
|
j                  |      sJ y )Nr/  r  c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z(test_write_dataset_s3.<locals>.<genexpr>  r  r  r[   r   r\   r  r  r   r   r  zmybucket/datasetr  rX  r  zmybucket/dataset2r  r  r  zmybucket/dataset3)r   r?   r   r~   r/   r   r   r=   rD   r  rS   r   r	  )rU  rr  rc   rW  rX  rY  rZ  uri_templater   r   r7  rV  s               r   test_write_dataset_s3r1    s    7H3Aq"atZ=DD
D$	0 
 HH
rRXX%IuRy%II
#sebj()+ #E
 ??299vryy{&;%<=fMD !b
 ZZr%fhj  == 

1
2CUC	EZZ5vhj  == 

j
)Czc)$ ZZ5vhj  ==r   aC  {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:ListBucket",
                "s3:GetObjectVersion"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}c           	         ddl m} | d   \  }}}}t        | t        dd        |dd| d| d      }t	        j
                  t	        j                  t        d	            t	        j                  d
 t        d	      D              t	        j                  dgdz  dgdz  z         gg d      }t        j                  t	        j                  dt	        j                         fg      d      }t        j                  |d|dd|d       t        j                  d|dd      j                         }|j                  |      sJ t        j                  |d|dd|d       t        j                  d|dd      j                         }|j                  |      sJ t!        j"                  t$        d      5  t        j                  |d|ddd       d d d         |dd| d| dd       }t!        j"                  t$        d!      5  t        j                  |d|ddd       d d d        y # 1 sw Y   [xY w# 1 sw Y   y xY w)"Nr   )r)  r  test_dataset_limited_user
limited123r  http)rY  rZ  endpoint_overrideschemer  c              3   D   K   | ]  }t        j                            y wr   r  r  s     r   r  z1test_write_dataset_s3_put_only.<locals>.<genexpr>+  r  r  r[   r   r\   r  r  r   r   r  zexisting-bucketr  Fr  )r   r   rf   r   r  r  rX  Tz&Bucket 'non-existing-bucket' not foundr   znon-existing-bucket)r   r   rf   r  limited)rY  rZ  r6  r7  allow_bucket_creationz(Access Denied|ACCESS_DENIED))r   r)  r   _minio_put_only_policyr?   r   r~   r/   r   r   r=   rD   r  rS   r   r	  r  r  rT  )	r  r)  rW  rX  rr  rc   r   r   r7  s	            r   test_write_dataset_s3_put_onlyr<    s=    ( !.D$1y*@:LJ	.!F!D6*	
B HH
rRXX%IuRy%II
#rSEBJ&') #E
 ??299vryy{&;%<=fMD  RU4 ZZbVhj  ==  RT4 ZZbVhj  == 
wE
G 

(R#8	

 
!F!D6*"
B 
w&E	F 

(R#8	

 
!
 
 
 
s   3H)H5)H25H>c           
         t        j                  dd d gi      }t        j                  || dz         t        j                  t        j
                  dt        j                  t        j                         t        j                                     g      }t        j                  j                  | dz  g|t        j                         t        j                               }|j                  |      }|j                  |k(  sJ y )Nr[   r  )r   r=   r   r   )r?   r   rl   rm   r=   r@   rz   r   rD   r   r  r  r   rc   r   r   )r  r   r   r=   fsdss        r   $test_dataset_null_to_dictionary_castr?  b  s     HHcD$<()ENN5'N23YY
bmmBHHJ		<= F **'(##%%%'	 + D ##D)E<<6!!!r   c                 ~   t        j                  g dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg dd	      }t        j                  || d
z  d       t        j                  | d
z  d      }|j                  |dd      }|j                         t        j                  g dg dg dd      k(  sJ |j                  |ddd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ y )Nr   rv   r  r[   r\   r  colAr  r  r  r  c   rv   r   Zr  r  )colBcol3r  rD  rI  r  r  NrD  r  rJ  
full outer)	join_typer   rv   r  rF  r[   r\   r  Nr  r  NrH  r?   r   r   r  rS   r  r   r1  r  r  ds1r  ds2r7  s         r   test_dataset_joinrV  u  s%   	 
B R46
**Wt^E
2C	 
B R46
**Wt^E
2CXXc66*F?? * !    XXc66\XBF??$$V,%%: 1   r   c                 |   t        j                  g dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg dd	      }t        j                  || d
z  d       t        j                  | d
z  d      }|j                  |d      }|j                         t        j                  g dg dg dd      k(  sJ |j                  |ddd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ y )NrA  rB  rC  r  r  r  rE  rG  )rD  rJ  r  rD  rK  rL  rM  _rrN  right_suffixrO  rP  rQ  rR  rS  s         r   test_dataset_join_unique_keyr[    s#   	 
B R46
**Wt^E
2C	 
B R46
**Wt^E
2CXXc6"F?? * !    XXc6\XMF??$$V,%%: 1   r   c           	         t        j                  g dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg d	g d
d      }t        j                  || dz  d       t        j                  | dz  d      }|j                  |ddd      }|j                         j                  d      t        j                  g dg dg dg dg dgg d      k(  sJ y )NrA  r   r  <   rB  )rD  rI  colValsr  r  r  rE  rF  r  r   rG  r  rD  rM  rX  rY  rO  )r   r  r^  NrP  )r   r  NrF  rQ  )rD  rI  r_  colB_r	colVals_rr  rR  rS  s         r   test_dataset_join_collisionsrc    s    	" 
B
 R46
**Wt^E
2C	" 
B
 R46
**Wt^E
2CXXc6\XMF??$$V,: @1A A A Ar   c                 &   t         j                  j                  g dg dd      }t        j                  || dz  d       t        j
                  | dz  d      }t         j                  j                  g dg dg d	d
      }t        j                  || dz  d       t        j
                  | dz  d      }|j                  |ddddd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ y )N)r   r   r   r  rS  )r[   r\   r[   r\   r  rC  r  r  r  )rv   r  r  )r[   r\   g)r!  r)  g      @)rI  rJ  colCr  rD  r  r   rI  rJ  onby	toleranceright_onright_by)r!  NNNN)rD  r  rf  )
r?   rE   from_pydictr   r  rS   	join_asofr   r1  r   rS  s         r   test_dataset_join_asofro    s    			) 
B R46
**Wt^E
2C			 
B
 R46
**Wt^E
2C]]6Q&  F ??$$V,),: 1   r   c                    t        j                  g dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg d	g d
g dd      }t        j                  || dz  d       t        j                  | dz  d      }|j                  |dddgd      }|j                         j                  d      t        j                  g dg dg dg dd      k(  sJ y )NrA  r]  r  )rD  rI  rh  r  r  r  r`  rG  rE  r  )rI  r_  rD  rh  r  rh  rD  rI  r   rh  ri  rj  )Nr  NrD  rI  rh  r_  )r?   r   r   r  rS   rn  r   r1  rS  s         r   "test_dataset_join_asof_multiple_byrs    s    	 
B
 R46
**Wt^E
2C	"	 
B R46
**Wt^E
2C]]&&)Q  F ??$$V,$	: 1   r   c                    t        j                  dg di      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg dd      }t        j                  || d	z  d       t        j                  | d	z  d      }|j                  |dg d
      }|j                         t        j                  g dg dd      k(  sJ y )Nrh  r  r  r  r  rG  r  )r_  rh  r  r   rq  )rH  rH  r  )rh  r_  )r?   r   r   r  rS   rn  r   rS  s         r   test_dataset_join_asof_empty_byru  	  s    	i 
B R46
**Wt^E
2C	" 
B R46
**Wt^E
2C]]q  F ??"* !   r   c           
          t        j                  g dg dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g d	g d
g dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }d}t        j                  t        |      5  |j                  |dddgddddg       d d d        y # 1 sw Y   y xY w)NrA  r]  r  rB  rr  r  r  r  r`  rG  )rl  rk  r  rE  r  )rI  r_  colUniqrD  rh  r  zXColumns {'colVals'} present in both tables. AsofJoin does not support column collisions.r   rh  rD  rI  r   rg  )	r?   r   r   r  rS   r  r  rL  rn  )r  r  rT  r  rU  r]  s         r   !test_dataset_join_asof_collisionsrx  !  s    	"	 
B R46
**Wt^E
2C	"" 
B R46
**Wt^E
2C	7  
z	- 
Dff-VV$4 	 	

 
 
s   C44C=dstyperc   memc           	      R   t        j                  g dg dd      }|dk(  r6t        j                  || dz  d       t        j                  | dz  d      }n!|dk(  rt        j                  |      }nt
        |j                  t        j                  d	      d
k        j                  t        j                  d      dk(        }|dk(  rt        j                  nt        j                  }t        ||      sJ |j                         t        j                  dgdgd      k(  sJ |j                  d      t        j                  dgdgd      k(  sJ |j                  t        j                  d	      dk        j                  t        j                  d	      dkD        j                  t        j                  d	      dk7        }|j                         t        j                  dgdgd      k(  sJ t        j                  || dz  d       t        j                  | dz  d      }|j                         t        j                  dgdgd      k(  sJ |j                  t        j                  t        j                  ddgddgd            dd      }|j                         j!                  d      t        j                  dd gddgddgd      k(  sJ t#        j$                  t&              5  |j                  d        d d d        t#        j$                  t(              5  |j+                          d d d        |j,                  j/                  d      }	|j                  t        j                  d	      d
k        j1                  |	      }
|
j                         t        j                  d	ddgi      k(  sJ t#        j$                  t         j2                        5  |j1                  |	      j                          d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nr   rv   r  rP  r[   r\   r  re  rC  rc   r  r  r  rz  rD  rY   r  r[   r   r   rP  r  r   rv   r\   r  r   r  rI  r  zright outerkeysrN  rI  )rD  rI  r  )r?   r   r   r  rS   r  r   r  r@   r  r  rM   r   r   r   r  r1  r  r  r  rL  rK   r=   r  replace_schemar_  )r  ry  r  rT  r7  rz  r2r  joinedschema_without_col2	newschemas              r   test_dataset_filterr  A  s    
$ 
B ~
Wt^E:jj46	5jjn!! ZZ(1,-44RXXf5E5LMF'-~r##2;M;MHfh'''??* !   
 ;;q>RXX'     
BHHV$q(	)	0	0
1
%gRXXf-=-BgC ;;=BHH&     VWz1%@zz'J.u=H"((, #    [[BHHRc
. %    /F ??$$V,D	Rc
: 1    
y	! 

4 
z	"  **++A.


1n()  288A- $    
r	' > 	12;;=	> >  > >s$   /P"P PPPP&c           	         t        j                  g dg dd      }t        j                  g dg dd      }|dk(  rkt        j                  || dz  d	       t        j                  | dz  d	      }t        j                  || d
z  d	       t        j                  | d
z  d	      }n6|dk(  r+t        j                  |      }t        j                  |      }nt
        t        j                  ||f      j                  t        j                  d      dk  t        j                  d      dk(  z        }|j                         t        j                  g dg dd      k(  sJ |j                  t        j                  t        j                  ddgddgd            dd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ |j                  t        j                  d      dk        }|j                  t        j                  d      dk        }	t        j                  t        d      5  t        j                  ||	f       d d d        y # 1 sw Y   y xY w)Nr|  r}  rC  )r  r   rg  )hr:   lrc   r  r  r  r  rz  rD  rY   r  )r   rv   r  )r[   r\   r  r   r  r[   r\   r~  r  z
left outerr  )r   r  N)rD  r  rI  zcurrently not supportedr   )r?   r   r   r  rS   r  r   r  r@   r   r  r1  r  r  rL  )
r  ry  r  r  rT  rU  filtered_union_dsr  filtered_ds1filtered_ds2s
             r   test_union_dataset_filterr    s&    
$ 
B 
 
B ~
Wt^E:jj46
Wt^E:jj46	5jjnjjn!!

C:.55	&	A	"((6"2a"78 %%'2885 ,   
 ##BJJrxxRc
9 0 %  $ .F ??$$V,: 1    ::bhhv.23L::bhhv.34L	z)B	C 1


L,/01 1 1s   I//I8c                    | dz  }t        |      \  }}t        j                  |      }|j                         }|j                  dk(  sJ |j                  t        j                  d      dk        }|j                         j                  dk(  sJ t        j                  t              5  |j                          d d d        y # 1 sw Y   y xY w)Ntest_parquet_dataset_filterr  r  rv   r  )r  r   r  r   r   r   r  r@   r  r  rL  rK   )r  r  r  rr  rS   r7  filtered_dss          r   r  r    s    77I5i@M1  /GF??b   ..$!!34K!**b000	z	" $!!#$ $ $s   )CCc                    t        j                  t        j                  t        d            gdg      }t	        j
                  |      }dt	        j                  d      i}|j                  |      }t	        j                  || dgd       t        j                  t        d	      5  t	        j                  || dgd       d
d
d
       y
# 1 sw Y   y
xY w)z
    Ensure the projected schema is used to validate partitions for scanner

    https://issues.apache.org/jira/browse/ARROW-17228
    r  original_columnr  renamed_columnr)   r  r  z0'Column original_column does not exist in schemar   N)r?   r   r~   r/   r   rS   r@   r   r  r  r  KeyError)r  r   table_datasetr*   r   s        r   4test_write_dataset_with_scanner_use_projected_schemar    s     HHbhhuRy)*3D2EFEJJu%M"((#45G ##G#4G(8'9%I 	O	


 	W,=+>u	

 
 
s   )CCr   )r  r   c           
         |dk(  rt        j                  d       t        j                  ddgddgd dddgdd	id gd
ddg dddigd
gd      }t	        j
                  || dz  |       t	        j                  | dz  |      }|j                  g d      }|j                         dd ddgd d	dd gddddg ddd dgddgk(  sJ y )Nr   zpyarrow.parquetabc123qrs456r   rv   buttonr  r  )rx   elementvaluesstructsscrollwindow)NrY   r{   fizzbuzz)user_ida.dotted.fieldinteractionr   r  )r  zinteraction.typezinteraction.valueszinteraction.structsr  r)   )r  r  )r  rx   r  r  r  )	r  r(  r?   r   r   r  rS   r   rf  )r  r   r   rT  s       r   test_read_table_nested_columnsr    s   -.HH(H!5)*A(q6u~t&<	>hff-=,>	@& E UGg-f=
**Ww&v
6C LL:  ;E ??dq!f"5148A	Oh,$T23q	J!   r   c                 x   ddl m} | dz  }t        j                  j	                  t        j
                  g dt        j                               t        j
                  g dt        j                               gddg      } |j                  ||ddgd	
        |j                  |dd	t        j                  t        j                  dt        j                               t        j                  dt        j                               g            j                         j                         }||j                  d      k(  sJ |j                  d      j                         }t!        t#        |            }|D cg c]  }dt%        |d      z    }}t!        t'        j(                              }||k(  sJ y c c}w )Nr   )rS   zslash-writer-xr   rv   rY   r{   r   )experiment/A/f.csvzexperiment/B/f.csvr  zexperiment/C/k.csvzexperiment/M/i.csvexp_idexp_metar  r   )r9   r  r   r   rX  )r  r   r   r=   r   z	exp_meta=r  r(  )r   rS   r?   rE   r   r~   r   r  r  r=   r@   r   r2  r1  r-  rf  r3  r   r   rb  rc  )tmpdirr   rq   dt_tabler]  r  encoded_pathsrE  s           r   !test_dataset_partition_with_slashr    sr   %$$Dxx##
"((*-
 ()+	4%5 8@6L	NH B \" yy"((8RXXZ8((:rwwy9; <	 hj!  z))(3333q!++-Hc(m$HDLMD[5B#77MMM

4()JJ&&& Ns   8F7c                 d   t        j                  t        j                  dt        j                         d      t        j                  dt        j                         d      g      }g dg dg}t         j                  j                  ||      }t        j                  || d	z         t        j                  | d	z  d
      }|j                         j                  j                  |      sJ t        j                  || dz  d
       t        j                  | dz  d
      }|j                         j                  j                  |      sJ t        j                  ||g| dz  d
       t        j                  | dz  d
      }|j                         j                  j                  |      sJ y )Nr  F)nullablerR  Tr  Nr   Nrb   	nulltest1r   r  	nulltest2	nulltest3)r?   r=   r@   rB   rE   r   rl   r  r   rS   r   r	  r  )r  schema_nullabler  r   rS   s        r   'test_write_dataset_preserve_nullabilityr  +  sW    ii
bhhj51
bhhj40!2 3O )FHH   @Ew45jj;.yAG$$++O<<<UGk1)Djj;.yAG$$++O<<<eU^W{%:9Mjj;.yAG$$++O<<<r   c                    t        j                  t        j                  dt        j                         ddi      t        j                  dt        j                               g      }t        j                  t        j                  dt        j                               t        j                  dt        j                               g      }g dg dg}t         j                  j                  ||      }t         j                  j                  ||      }t        j                  ||g| d	z  d
       t        j                  | d	z  d
      }|j                         j                  j                  |d      sJ t        j                  ||g| dz  d
       t        j                  | dz  d
      }|j                         j                  j                  |d      sJ t        j                  ||g| dz  d
|       t        j                  | dz  d
      }|j                         j                  j                  |d      sJ y )Nr  s   foos   barrf  rR  r  r  rb   test1r   r  Tr9  test2test3r  )r?   r=   r@   rB   rE   r   r   r  rS   r   r	  )r  schema_metadataschema_no_metar  r   table_no_metarS   s          r   *test_write_dataset_preserve_field_metadatar  C  s   ii
bhhjFF+;<
bhhj!!# $O YY
bhhj!
bhhj! # $N )FHH   @EHH(((GM e]+Ww->yQjj7*9=G$$++OD+QQQmU+Ww->yQjj7*9=G$$++N4+PPP mU+Ww->y+-jj7*9=G$$++OD+QQQr   c                    dD ]_  }dD ]V  }t        j                  t        j                  dt        j                               t        j                  dt        j                               g      }g dg dg}t         j                  j                  ||      }t        j                         }| d| z  }t        j                  ||d|j                  ||	      d
       t        j                  |d      }|j                  D ]Z  }	t        j                  |	      }
|
j                  d      j                  d      }|j                   |u sJ |j"                  ||z  u rZJ  Y b y )N)TFr  rR  r  r  rb   write_page_index_r   )write_statisticswrite_page_indexr  )r   r  r  r  r   )r?   r=   r@   rB   rE   r   r   r   r  r  rS   r
  rl   r  r  r-  has_offset_indexhas_column_index)r  r  r  r=   r  r   r  r  rT  r;  r  ccs               r   #test_write_dataset_write_page_indexr  `  s_   ) R - 	RYYbhhj)bhhj) + ,F  1FHH(((?E..0K#45E4F!GGH (;;%5%5 <  (=	 **Xi8C		 R++D1''*11!4**.>>>>**.>AQ.QQQQR-	RRr   c                    t        j                  t        j                  g d      t        j                  g d      gddg      }|dk(  r6t        j                  || dz  d	       t        j
                  | dz  d	      }n!|d
k(  rt        j
                  |      }nt        |j                  d      j                         j                         g dg ddk(  sJ |j                  dg      j                         j                         g dg ddk(  sJ |j                  t        j                  d      dk        j                  d      j                         j                         g dg ddk(  sJ t         j                  j                  t        j                  g dt        j                               t        j                  g d      gddg      }t        j
                  |      }|j                  dg      }|j                         j                         }|d   g dk(  sJ |d   g dk(  sJ |j                  dg      }|j                         j                         }|d   g dk(  sJ |d   g dk(  sJ y )N)rY   r   r{   rv   r   )r\   r[   r\   r[   r}   r  r  r  rc   r  r  r  rz  )r[   r[   r\   r\   r}   r  )r  r  )r  
descending)r}   r\   r\   r[   r[   )r   r{   rY   rv   r   r{   )r[   r[   r\   r  )r   rS  rS  r  rw   )r  carr  foobarr[   r\   )r[   r  )r  rS  rS  r   )r  r  r  r  )r[   r7  )r?   r   r~   r   r  rS   r  r1  r   r2  r   r  r@   rE   r   rB   )r  ry  r   r   
sorted_tabsorted_tab_dicts         r   test_dataset_sort_byr    sJ    HH
!
*+ !E
 ~
$u=ZZ$u5	5ZZ!!::h((*446)!;   
 ::/01::<FFH)!M   
 99bhhx(1,.66hj!   HH  
RXXZ0
01" 3Z ! E 
E	B012J ))+557O3=0003#BBBB/01J ))+557O3=0003#BBBBr   c                 N   t        j                  dg di      }t         j                  j                         }|j	                  d      }| dz  }t        j                  ||||       t        j                  d      }t         j                  j                  |      }t        j                  ||	      j                         }||k(  sJ | d
z  }t        ||       t        |j                               }	t        |	      dk(  sJ |	d   }
t        |
j                               }|d   |d   k7  sJ |d   |d   c|d<   |d<   |
j                  |       t        j                  d      }t         j                  j                  |      }t        j                  ||	      j                         }||k7  sJ |t        j                  dg di      k(  sJ t!        j"                  t$        d      5  t        j                  ||	      j                         }ddd       y# 1 sw Y   yxY w)zwCheck that checksum verification works for datasets created with
    ds.write_dataset and read with ds.dataset.to_tabler[   r  T)write_page_checksumcorrect_dir)r9   r  r   r  r  )default_fragment_scan_optionsr  corrupted_dirr   r      $   F)r   rY   rv   r{   zCRC checksum verificationr   N)r?   r   rS   r   r  r   r  r
  r   r   rh   iterdirr   	bytearray
read_byteswrite_bytesr  r  rT  )r  
table_origpq_write_formatr  original_dir_pathpq_scan_opts_crcpq_read_format_crctable_checkcorrupted_dir_pathcorrupted_file_path_listcorrupted_file_pathbin_datapq_scan_opts_no_crcpq_read_format_no_crctable_corruptrr  s                   r   1test_checksum_write_dataset_read_dataset_to_tabler    s7    3-.J jj224O#66  7 "M  -/""	 44#')55&6 6 8**! hj  $$$ !?2 23
  $$6$>$>$@A'(A---215,779:H B<8B<'''!)"x|HRL(2, ##H- 77#(*JJ88&9 9 ;JJ#8::B(* 
 J&&&BHHc<%89999 
w&A	B JJ%
 (* 	
  s   ,&HH$c                     d} d}t        j                  t              5 }t        j                  j
                  j                  d       d d d        | t        j                        v s|t        |j                        v sJ t        j                  j                         }d}t        j                  t        |      5  |j                  d       d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)NzImake_write_options() should be called on an instance of ParquetFileFormatzqdescriptor 'make_write_options' for 'pyarrow._dataset_parquet.ParquetFileFormat' objects doesn't apply to a 'int'+   z;make_write_options\(\) takes exactly 0 positional argumentsr   )	r  r  r  r?   rS   r   r  r_   r'   )msg_1msg_2excinfopformatr]  s        r   test_make_write_options_errorr    s    -E(E 
y	! <W


$$77;<C&&%3w}}3E*EEEjj**,G
IC	y	, '""2&' '< <' 's   *C;C"C"C+c                 :   	 dd l m} d}d}| j                  j                  j                  |      |j                  j                  |            j                         }|j                         dddgik(  sJ y # t        $ r t        j                  d       Y w xY w)Nr   zsubstrait NOT enableds   
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml	
u64
	u32

str"i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
bs3  
/functions_comparison.yaml
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yamlequal:any1_any1	
u64
	u32

"
 "
("i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
bru  r_   4)
pyarrow.substrait	substraitr&  r  r  r   BoundExpressionsfrom_substraitr   r2  )rS   psr8  	filteringr7  s        r   test_scanner_from_substraitr    s    -&
WJ
CI __##22:>""11)<   hj  %#s!44449  -+,-s   A9 9BBr  r   )r{   r   r   (	  r   r+   rb  r  rk  r  sysr  ro  rw  r  shutilr   urllib.parser   numpyr[  r&  r  r   r?   r8  r9  r  pyarrow.csvr  r   rc   pyarrow.jsonpyarrow.libr   pyarrow.tests.utilr   r   r   r	   r
   r   r3   r  rS   r   pyarrow.parquetr   rl   mark
pytestmarkr   r;   rI   rV   fixturern   r   r   r   r   r  r  r9  rG  rM  rU  ri  rs  rw  r{  r}  r  r  r  parametrizer_   tupler  r  r  r  r  r  r)  r5  r   rG  rN  s3ra  rx  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r,  r5  rH  rO  rU  rW  r`  ri  rl  rq  ru  rw  ry  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rU  r  r!  r*  r8  r<  rA  rG  rJ  rL  rQ  rU  r_  rb  rd  rj  rn  ru  rx  r|  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r
  r  r  r  r  r   r%  r-  r2  r5  r?  rF  rI  rP  rU  r\  ra  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r%  r(  r+  r-  r1  r;  r<  r?  rV  r[  rc  ro  rs  ru  rx  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   <module>r     s  $   	    
              ,, ,   [[  
? ?K +%  D      F  @ h0  0f   	
 	H+	+\ DG DGN), 89 89v)GX&( " " -I -I` % %$ - -, 5 5 O OA!HF$ F8
8
 %8
 #(8
8
v#"6 & &R $ $" 6 6r$TNJ2 ,BOOH-$$/  t}5E"  6E"P 4 4$ 5-  5-pH2H, *D *DZ  " 7 7@ 	%  	% 9N 9Nx  : ) )" 
:  
: %0 %0P  "5Jp '  'T 7 7 7  7 ) ), &. &.R $ $6 ' '> 	 	  3  3F     F  @ACIC I< +eT]; @AC@C < @@  @ACa,Ca,H  @ACR,CR,jJ? (  (2N
+;
=O"6J H H(*8,>@" I I
 I I L L $ $$ < <& J J& 7 7&    , - - = =*"?(DJ" 6 7&" !# !#H ? ?, , , N N.A +v)>?5$-8+eT];)i  o&	?# !< , 62 < 9 @ 62r ; ;< D D& 
:  
: :  : ,  ,0 ?=  ?=D / / 9 92 = =6 F F 	 	 9 9(4?%DM2@ 6) 6)r  "$, #M #ML 3 3,*    "    $  )    (H*: A0    (    (    ,D2@0G@  ) 
,/ ,/^ , !  ! CLLG+D  F!F  !( !  !2 . .* 	  	$3 4  4$ 
;  
; 3  3N @  @  G G* D  D 	=  	==0 :?  :?z B  B&( ?C3&  P  PF *  *>)")"=& H  HVF" -# -#`;	O !< !<H  <  <F $ $8 A9 A9H #  #0 #9  #9L   ,<^!H 6@4 0 ('  ('V 4  *  ,  ,F 4  4	1 1 1 ,   , ^ $ I
  I
X " "$  <  < A A6  6  :  . 
 
> %$ K>K>\ %$ *1*1Z$
2 #56 76"'J = =.R:R@ %$ -C-C`FR'&5[w  	B"  	B
  	B
  	Bs[   AT 3AT- 8AT; ?AU	 TAT*T)AT*T-AT8T7AT8T;AUUAUU	AUUAU