
    Why                     <   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlZddlZddlZddlZddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlm Z m!Z!m"Z" dZ#dZ$dZ% G d d      Z& eed      Zd Z'ejP                  jS                  ddddidddfdddddddfdddidddfdddddddfd dd id!d"dfd d#ddd!d"dfd$dd$id%d&d'fd(dd(id)d*dfd(d+d,id)d*dfd-dd-id"d.dfd-d+d/id"d.dfd0dd0id1d%dfg      ejP                  jS                  d2d3d4g      ejP                  jS                  d5dd6g      d7                      Z*ejP                  jS                  ddddidddfdddddddfdddidddfdddddddfd dd id!d"dfd d#ddd!d"dfd$dd$id%d&d'fd(dd(id)d*dfd(d+d,id)d*dfd-dd-id"d.dfd-d+d/id"d.dfg      ejP                  jS                  d2d3d4g      d8               Z+ejP                  jS                  dg d9      d:        Z,ejP                  jS                  d2d3d4g      d;        Z-ejP                  jS                  d2d3d4g      d<        Z.ejP                  jS                  d2d3d4g      ejP                  jS                  d=d>d>d?gg      d@               Z/ejP                  jS                  dg dA      ejP                  jS                  d2d3d4g      dB               Z0ejP                  jS                  dg dC      ejP                  jS                  d2d3d4g      dD               Z1dE Z2 ejf                  dFG      dH        Z4 ejf                  dFG      dI        Z5ejP                  jS                  dJg dK      ejP                  jS                  d5dd6g      dL               Z6ejP                  jS                  dMd2dNidOfdPdNidQfg      dR        Z7ejP                  jS                  dSddTdUdTdTdUd6d4dUd6dTdUg      dV        Z8ejP                  js                  dW      ejP                  jS                  dMd2d4idXfdPdidYfd4ddZdYfg      d[               Z:ejP                  js                  dW      ejP                  jS                  d\d]d^g      d_               Z;d` Z<ejP                  jS                  d5dd6g      da        Z=ejP                  jS                  d5dd6g      db        Z>ejP                  jS                  d5dd6g      ejP                  jS                  d2d3d4g      dc               Z?ejP                  jS                  d5dd6g      ejP                  jS                  ddddeiddfddgg      dh               Z@ejP                  jS                  d5dd6g      ejP                  jS                  dided+dfieAdjfdddkdlgdmeAdnfd0d0d6doeAdpfdddqddreAdsfdddqd6dreAdsfdddtdmeBdufdddtdlgdmeBdufg      ejP                  jS                  d2d3d4g      dv                      ZCejP                  jS                  dwdxddydgeAdzfdxd{d|eAd}fdxd{dydgeAd~fi eAdfg      d        ZDejP                  jS                  d5dd6g      d        ZEejP                  jS                  d5dd6g      d        ZFejP                  jS                  d5dd6g      d        ZGd ZHejP                  jS                  d5dd6g      d        ZIejP                  jS                  ddd6g      d        ZJd ZKd ZLejP                  jS                  d5dd6g      d        ZMejP                  jS                  dg d      d        ZNd ZOejP                  jS                  d5dd6g      ejP                  jS                  d2d      d               ZPd ZQd ZRd ZSy)zTest the openml loader.    N)partial)	resources)BytesIO)	HTTPError)config_context)fetch_openml)_get_local_path_open_openml_url_retry_with_clean_cache)Bunch)check_pandas_support)SkipTestassert_allcloseassert_array_equalz"sklearn.datasets.tests.data.openmlTzdata/v1/download/{}c                   8    e Zd Zd Zd	dZd Zd Zd Zd Zd Z	y)
_MockHTTPResponsec                      || _         || _        y N)datais_gzip)selfr   r   s      \/var/www/html/jupyter_env/lib/python3.12/site-packages/sklearn/datasets/tests/test_openml.py__init__z_MockHTTPResponse.__init__'   s    	    c                 8    | j                   j                  |      S r   )r   read)r   amts     r   r   z_MockHTTPResponse.read+   s    yy~~c""r   c                 8    | j                   j                          y r   )r   closer   s    r   r   z_MockHTTPResponse.close.   s    		r   c                 &    | j                   rddiS i S )NzContent-Encodinggzipr   r    s    r   infoz_MockHTTPResponse.info1   s    <<&//	r   c                 ,    t        | j                        S r   )iterr   r    s    r   __iter__z_MockHTTPResponse.__iter__6   s    DIIr   c                     | S r    r    s    r   	__enter__z_MockHTTPResponse.__enter__9   s    r   c                      y)NFr)   )r   exc_typeexc_valexc_tbs       r   __exit__z_MockHTTPResponse.__exit__<   s    r   N))
__name__
__module____qualname__r   r   r   r$   r'   r*   r/   r)   r   r   r   r   &   s%    #
r   r   )	data_homec                 <  	
 dddddt         j                  t        dz   d| z   
fd
fd			fd
	fd	fd
fdfd}t        r,| j	                  t
        j                  j                  d|       y y )Nz(https://api.openml.org/api/v1/json/data/z1https://api.openml.org/api/v1/json/data/features/z'https://www.openml.org/data/v1/downloadz-https://api.openml.org/api/v1/json/data/list/z.gz.id_c                    t        j                  dd| t        d      d        |z   z   }|j                  dd      j                  dd      j                  dd	      j                  d
d      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      S )Nz\W-zhttps://api.openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resublenreplace)urlsuffixoutputpath_suffixs      r   
_file_namez4_monkey_patch_webbased_functions.<locals>._file_nameU   s    FF5#s3'@#A#CDE 	 NN,f5W*F3W+V4W\5)W\5)W[%(WXt$W_e,WY%W^W-WY'	
r   c                    | j                  |      sJ |d|         	| |      }t        j                  
      |z  }|j                  d      5 }|r0r.t	        |j                               }t        |d      cd d d        S  |d      }t	        |j                               }t        |d      cd d d        S # 1 sw Y   y xY w)N does not match rbTF)
startswithr   filesopenr   r   r   )r>   has_gzip_headerexpected_prefixr?   data_file_namedata_file_pathffpdecompressed_frB   data_modulegzip_responseread_fns            r   _mock_urlopen_sharedz>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedk   s    ~~o. 	
!!1#9	
. $C0"5F  & 	4!=QVVX&(T2	4 	4
 ")D!1^0023(U3	4 	4 	4s   )B;-B;;Cc                      | |d      S N.jsonr>   rI   rJ   r?   r)   )r>   rI   rS   url_prefix_data_descriptions     r   _mock_urlopen_data_descriptionzH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_description|   s    #+7	
 	
r   c                      | |d      S rU   r)   )r>   rI   rS   url_prefix_data_featuress     r   _mock_urlopen_data_featureszE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_features   s    #+4	
 	
r   c                 F    | j                  dd      d   } ||d      S )N/   r   z.arffrW   )rsplit)r>   rI   url_without_filenamerS   url_prefix_download_datas      r   _mock_urlopen_download_datazE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_data   s4      #zz#q1!4#$+4	
 	
r   c                    | j                        sJ d|         	| d      }t        j                  
      |z  }|j                  d      5 } |d      }|j	                         j                  d      }t        j                  |      }d d d        dv rt        d ddd t                     |j                  d      5 }|r.t        |j	                               }t        |d	      cd d d        S  |d      }t        |j	                               }t        |d
      cd d d        S # 1 sw Y   xY w# 1 sw Y   y xY w)NrD   rV   rE   zutf-8error  Simulated mock errorr>   codemsghdrsrN   TF)rF   r   rG   rH   r   decodejsonloadsr   r   r   )r>   rI   rK   rL   rM   rO   	decoded_s	json_datarN   rB   rP   rR   url_prefix_data_lists            r   _mock_urlopen_data_listzA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_list   sK   ~~23 	
#&&6sg>	
3 $C1"5F   & 	.!$Q-N&++-44W=I

9-I	. is(>Tgi    & 	4!QVVX&(T2	4 	4
 ")D!1^0023(U3	4 	4	. 	.	4 	4s   >D,'D85-D8,D58Ec                 8   | j                         }| j                  d      dk(  }|j                        r	 ||      S |j                  
      r	 ||      S |j                        r	 ||      S |j                  	      r	 ||      S t        d|z        )NzAccept-encodingr"   zUnknown mocking URL pattern: %s)get_full_url
get_headerrF   
ValueError)requestargskwargsr>   rI   rY   r\   rr   rc   rX   r[   rq   rb   s        r   _mock_urlopenz7_monkey_patch_webbased_functions.<locals>._mock_urlopen   s    ""$!,,->?6I>>./*3@@^^45.sODD^^45.sODD^^781#GG>DEEr   urlopen)r"   rH   OPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextdata_idrQ   rz   rB   rY   r\   rr   rc   rS   rP   rA   rR   rX   r[   rq   rb   s     ` @@@@@@@@@@@@@r    _monkey_patch_webbased_functionsr   G   s     #MRHJKiiG)C/Cy/AK
,4"


$46F F ((00)]K r   z9data_id, dataset_params, n_samples, n_features, n_targets=   r         r_   iris)nameversion      &   anneal1        cpu鍞     H      _  
      r   zadult-census  M   MiceProtein  i  parser	liac-arffpandasrQ   Fc                    t        j                  d      }t        | ||       t        d	dd|d|}	t	        |	j
                  d         |k(  sJ t        |	t              sJ t        |	j                  |j                        sJ |	j                  j                  |||z   fk(  sJ t        |	j                  |j                        sJ |	j                  j                  ||fk(  sJ |dk(  r>t        |	j                  |j                        sJ |	j                  j                  |fk(  sAJ t        |	j                  |j                        sJ |	j                  j                  ||fk(  sJ |	j                  J y)
zCheck the behaviour of `fetch_openml` with `as_frame=True`.

    Fetch by ID and/or name (depending if the file was previously cached).
    r   rQ   TFas_framecacher   idr_   Nr)   )pytestimportorskipr   r   intdetails
isinstancer   frame	DataFrameshaper   targetSeries
categories)
monkeypatchr   dataset_params	n_samples
n_features	n_targetsr   rQ   pdbunchs
             r   test_fetch_openml_as_frame_truer      s`   P 
		X	&B$['W  	E u}}T"#w...eU###ekk2<<000;;J,B CCCCejj",,///::	:6666A~%,,		222||!!i\111%,,555||!!i%;;;;###r   c                 h   t        j                  d       t        | |d       t        d	dd|d|}t	        |j
                  d         |k(  sJ t        |t              sJ |j                  J t        |j                  t        j                        sJ |j                  j                  ||fk(  sJ t        |j                  t        j                        sJ |dk(  r|j                  j                  |fk(  sJ |j                  j                  ||fk(  sJ t        |j                  t              sJ y)
znCheck the behaviour of `fetch_openml` with `as_frame=False`.

    Fetch both by ID and/or name + version.
    r   Tr   Fr   r   Nr_   r)   )r   r   r   r   r   r   r   r   r   r   npndarrayr   r   r   dict)r   r   r   r   r   r   r   r   s           r    test_fetch_openml_as_frame_falser     s    H !$['N  	E u}}T"#w...eU###;;ejj"**---::	:6666ellBJJ///A~||!!i\111||!!i%;;;;e&&---r   )r   r   r   c                   
 t        j                  d      t        | |d       t        |ddd      }t        |ddd      }|j                  |j                  c}

fd}|j                  |      }j                  j                  |
       |j                  |j                  c}j                  j                  |j                     
       fd}|j                  |      }	j                  j                  |	       y	)
z:Check the consistency of the LIAC-ARFF and pandas parsers.r   Tr   Fr   r   r   r   r   c                     | j                      }j                  j                  j                  |      r| j	                  |j
                        S | S r   )r   apitypesis_numeric_dtypeastypedtype)seriespandas_seriesdata_pandasr   s     r   convert_numerical_dtypeszFtest_fetch_openml_consistency_parser.<locals>.convert_numerical_dtypesk  sA    #FKK066<<((7==!4!455Mr   c                 D   | j                      }j                  j                  j                  |      r| j	                  |j
                        S t        |j
                  j                        r/| j                  j                  |j                  j                        S | S r   )r   r   r   r   r   r   r   CategoricalDtypecatrename_categoriesr   )r   r   frame_pandasr   s     r   (convert_numerical_and_categorical_dtypeszVtest_fetch_openml_consistency_parser.<locals>.convert_numerical_and_categorical_dtypes  sz    $V[[166<<((7==!4!455++R-@-@A :://0A0A0L0LMMMr   N)
r   r   r   r   r   applytestingassert_frame_equalr   feature_names)r   r   
bunch_liacbunch_pandas	data_liacr   data_liac_with_fixed_dtypes
frame_liacr   frame_liac_with_fixed_dtypesr   r   r   s             @@@r   $test_fetch_openml_consistency_parserr   S  s     
		X	&B$['N	J  	L (__l.?.?I{ #,//2J"KJJ!!"={K  *//1C1CJ JJ!!,|/I/I"JKX $.#3#30$  JJ!!">Mr   c                 
   t        j                  d       d}t        | |d       t        |dd|      }t        |dd|      }t	        |j
                  |j
                         t        |j                  |j                         y)z^Check the equivalence of the dataset when using `as_frame=False` and
    `as_frame=True`.
    r   r   Tr   Fr   N)r   r   r   r   r   r   r   r   )r   r   r   bunch_as_frame_truebunch_as_frame_falses        r   -test_fetch_openml_equivalence_array_dataframer     s    
 !G$['N&	 (	 (--/B/G/GH+224G4N4NOr   c                 ,   t        j                  d      }|j                  j                  j                  }d}d}d}d} |g d      }t
        j                  gdz  }	g d}
d	}t        | |d
       t        |d
d|      }|j                  }|j                  }|j                  }t        ||j                        sJ t        j                  |j                  |	k(        sJ |j                   |k(  sJ t        j                  |j"                  |
k(        sJ t        j                  |j$                  |
k(        sJ |j&                  |gk(  sJ t        ||j(                        sJ |j*                  |k(  sJ |j                   |k(  sJ |j,                  |k(  sJ |j.                  j0                  sJ t        ||j                        sJ |j                   |k(  sJ t        j                  |j                  |	|gz   k(        sJ |j.                  j0                  sJ y)z>Check fetching on a numerical only dataset with string labels.r   r   r   r   )r   )r      )zIris-setosazIris-versicolorzIris-virginicar   )sepallength
sepalwidthpetallength
petalwidthclassTFr   N)r   r   r   r   r   r   float64r   r   r   r   r   r   r   alldtypesr   columnsr   target_namesr   r   r   index	is_unique)r   r   r   r   r   
data_shapetarget_shapeframe_shapetarget_dtypedata_dtypes
data_namestarget_namer   r   r   r   s                   r   test_fetch_openml_iris_pandasr     s    
		X	&Bvv||44GJLK#<L ::,"KKJK$['4@	E ::D\\FKKEdBLL)))66$++,---::###66$,,*,---66%%%3444+...fbii(((<<<'''<<<''';;+%%%<<!!!!eR\\***;;+%%%66%,,+">>???;;    r   target_columnr   r   c                 ,   t        j                  d      }d}t        | |d       t        |dd||      }t        |dd|      }|j                  j                  |j                  |j                         t        |t              r[|j                  j                  |j                  j                  |j                  |             |j                  j                  dk(  sJ y	|j                  j                  |k(  sJ |j                  j                  dk(  sJ y	)
z@Check that we can force the target to not be the default target.r   r   TF)r   r   r   r   r   r   )r      r   N)r   r   r   r   r   r   r   r   listassert_index_equalr   r   Indexr   r   r   )r   r   r   r   r   bunch_forcing_targetbunch_defaults          r   !test_fetch_openml_forcing_targetsr     s     
		X	&BG$['4@'# !	M JJ!!"6"<"<m>Q>QR-&


%% ''//-1H	
 $((..(:::#**//=@@@#((..(:::r   )r   r   r   r   r   c                    t        j                  d      }t        | |d       t        |ddd|      }t        |ddd|      \  }}|j                  j                  |j                  |       t        ||j                        r'|j                  j                  |j                  |       y|j                  j                  |j                  |       y)z>Check the behaviour of `return_X_y=True` when `as_frame=True`.r   Tr   Fr   r   r   
return_X_yr   N)r   r   r   r   r   r   r   r   r   assert_series_equalr   )r   r   r   r   r   Xys          r   .test_fetch_openml_equivalence_frame_return_X_yr    s     
		X	&B$['NE DAq JJ!!%**a0!RYY


&&u||Q7


%%ellA6r   )r   r   r   r   c                     t        j                  d       t        | |d       t        |ddd|      }t        |ddd|      \  }}t	        |j
                  |       t	        |j                  |       y)z?Check the behaviour of `return_X_y=True` when `as_frame=False`.r   Tr   Fr  N)r   r   r   r   r   r   r   )r   r   r   r   r  r  s         r   .test_fetch_openml_equivalence_array_return_X_yr    st     !$['NE DAq uzz1%u||Q'r   c                    t        j                  d       d}t        | |d       d}t        ||dd      }t        ||dd      }|j                  j
                  j                  dk(  sJ |j                  j
                  d	k(  sJ y
)z9Check the difference between liac-arff and pandas parser.r   r   Tr   Fr   r   rM   ON)r   r   r   r   r   r   kind)r   r   r   bunch_liac_arffr   s        r   $test_fetch_openml_difference_parsersr  6  s    
!G$['N H"	O  	L %%**c111""c)))r   module)scopec                  2    g dg dg dg dg dg dg ddS )	z+Returns the columns names for each dataset.)r   r   r   r   r   )'familyzproduct-typesteelcarbonhardnesstemper_rolling	conditionformabilitystrength
non-ageingsurface-finishzsurface-qualityenamelabilitybcbfbtbw%2Fmeblmchromphoscbondmarviexptlferrocorrblue%2Fbright%2Fvarn%2Fcleanlustrejurofmspr   thickwidthr<   oilborepackingr   )vendorMYCTMMINMMAXCACHCHMINCHMAXr   )N Mean_Acc1298_Mean_Mem40_CentroidMean_Acc1298_Mean_Mem40_RolloffMean_Acc1298_Mean_Mem40_FluxMean_Acc1298_Mean_Mem40_MFCC_0Mean_Acc1298_Mean_Mem40_MFCC_1Mean_Acc1298_Mean_Mem40_MFCC_2Mean_Acc1298_Mean_Mem40_MFCC_3Mean_Acc1298_Mean_Mem40_MFCC_4Mean_Acc1298_Mean_Mem40_MFCC_5Mean_Acc1298_Mean_Mem40_MFCC_6Mean_Acc1298_Mean_Mem40_MFCC_7Mean_Acc1298_Mean_Mem40_MFCC_8Mean_Acc1298_Mean_Mem40_MFCC_9Mean_Acc1298_Mean_Mem40_MFCC_10Mean_Acc1298_Mean_Mem40_MFCC_11Mean_Acc1298_Mean_Mem40_MFCC_12Mean_Acc1298_Std_Mem40_CentroidMean_Acc1298_Std_Mem40_RolloffMean_Acc1298_Std_Mem40_FluxMean_Acc1298_Std_Mem40_MFCC_0Mean_Acc1298_Std_Mem40_MFCC_1Mean_Acc1298_Std_Mem40_MFCC_2Mean_Acc1298_Std_Mem40_MFCC_3Mean_Acc1298_Std_Mem40_MFCC_4Mean_Acc1298_Std_Mem40_MFCC_5Mean_Acc1298_Std_Mem40_MFCC_6Mean_Acc1298_Std_Mem40_MFCC_7Mean_Acc1298_Std_Mem40_MFCC_8Mean_Acc1298_Std_Mem40_MFCC_9Mean_Acc1298_Std_Mem40_MFCC_10Mean_Acc1298_Std_Mem40_MFCC_11Mean_Acc1298_Std_Mem40_MFCC_12Std_Acc1298_Mean_Mem40_CentroidStd_Acc1298_Mean_Mem40_RolloffStd_Acc1298_Mean_Mem40_FluxStd_Acc1298_Mean_Mem40_MFCC_0Std_Acc1298_Mean_Mem40_MFCC_1Std_Acc1298_Mean_Mem40_MFCC_2Std_Acc1298_Mean_Mem40_MFCC_3Std_Acc1298_Mean_Mem40_MFCC_4Std_Acc1298_Mean_Mem40_MFCC_5Std_Acc1298_Mean_Mem40_MFCC_6Std_Acc1298_Mean_Mem40_MFCC_7Std_Acc1298_Mean_Mem40_MFCC_8Std_Acc1298_Mean_Mem40_MFCC_9Std_Acc1298_Mean_Mem40_MFCC_10Std_Acc1298_Mean_Mem40_MFCC_11Std_Acc1298_Mean_Mem40_MFCC_12Std_Acc1298_Std_Mem40_CentroidStd_Acc1298_Std_Mem40_RolloffStd_Acc1298_Std_Mem40_FluxStd_Acc1298_Std_Mem40_MFCC_0Std_Acc1298_Std_Mem40_MFCC_1Std_Acc1298_Std_Mem40_MFCC_2Std_Acc1298_Std_Mem40_MFCC_3Std_Acc1298_Std_Mem40_MFCC_4Std_Acc1298_Std_Mem40_MFCC_5Std_Acc1298_Std_Mem40_MFCC_6Std_Acc1298_Std_Mem40_MFCC_7Std_Acc1298_Std_Mem40_MFCC_8Std_Acc1298_Std_Mem40_MFCC_9Std_Acc1298_Std_Mem40_MFCC_10Std_Acc1298_Std_Mem40_MFCC_11Std_Acc1298_Std_Mem40_MFCC_12BH_LowPeakAmpBH_LowPeakBPMBH_HighPeakAmpBH_HighPeakBPMBH_HighLowRatioBHSUM1BHSUM2BHSUM3zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)age	workclasszfnlwgt:z
education:zeducation-num:zmarital-status:zoccupation:zrelationship:zrace:zsex:zcapital-gain:zcapital-loss:zhours-per-week:znative-country:r   )NDYRK1A_NITSN1_NBDNF_NNR1_NNR2A_NpAKT_NpBRAF_N	pCAMKII_NpCREB_NpELK_NpERK_NpJNK_NPKCA_NpMEK_NpNR1_NpNR2A_NpNR2B_NpPKCAB_NpRSK_NAKT_NBRAF_NCAMKII_NCREB_NELK_NERK_NGSK3B_NJNK_NMEK_NTRKA_NRSK_NAPP_N
Bcatenin_NSOD1_NMTOR_NP38_NpMTOR_NDSCR1_NAMPKA_NNR2B_NpNUMB_NRAPTOR_NTIAM1_NpP70S6_NNUMB_NP70S6_NpGSK3B_NpPKCG_NCDK5_NS6_NADARB1_NAcetylH3K9_NRRP1_NBAX_NARC_NERBB4_NnNOS_NTau_NGFAP_NGluR3_NGluR4_NIL1B_NP3525_NpCASP9_NPSD95_NSNCA_NUbiquitin_NpGSK3B_Tyr216_NSHH_NBAD_NBCL2_NpS6_NpCFOS_NSYP_N	H3AcK18_NEGR1_NH3MeK4_NCaNA_Nr   )pclasssurvivedr   sexr  sibspparchticketfarecabinembarkedboatbody	home.destr   r   r   r   r   r   r   r)   r)   r   r   datasets_column_namesr  U  s9     P(
R SO
`
"O
`
{m mr   c                      i i ddddddddd	d
dddddddd
dddddddddddddd
ddddddddddd
dd
i i i ddiddddddd d!d"S )#Nr  r   r  	   r  r   r  r   r  r   r  r  r  r  r  r     r   r!  r"  r#  r$  r%  )
r&  r'  r(  r)  r*  r+  r,  r-  r0  r2  r  r   i  r_   i  i7  i  i4  )r  r  r  r  r  r  r  r  r)   r)   r   r   datasets_missing_valuesr  H  s0    
b
a
 
 1	

 "
 b
 R
 "
 "
 "
 q
 !
 
 R
 B
  R!
" R#
$ ,.7
: !}
G, ,r   zJdata_id, parser, expected_n_categories, expected_n_floats, expected_n_ints))r   r   r_   r   r   )r   r   r_   r   r   )r   r   !   r   r   )r   r   r  r   r   )r   r   r_   r   r   )r   r   r_   r   r   )r   r   r   r   r   )r   r   r   E   r   )r   r   r  r   r   )r   r   r  r   r   )r   r   r_   r   r   )r   r   r_   r   r   )r   r   r   r   r   )r   r   r   r   r   c	           	      8   t        j                  d      }	|	j                  j                  j                  }
t        | ||       t        |dd|      }|j                  }t        |j                  D cg c]  }t        ||
      s| c}      }t        |j                  D cg c]  }|j                  dk(  s| c}      }t        |j                  D cg c]  }|j                  dk(  s| c}      }||k(  sJ ||k(  sJ ||k(  sJ |j                  j                         ||   k(  sJ |j                         j                         j!                         }|j#                         D ]!  \  }}||   j%                  |d      }||k(  r!J  y	c c}w c c}w c c}w )
zYCheck that `fetch_openml` infer the right number of categories, integers, and
    floats.r   r   TFr   rM   ir   N)r   r   r   r   r   r   r   r   r<   r   r   r  r   tolistisnasumto_dictitemsget)r   r   r   expected_n_categoriesexpected_n_floatsexpected_n_intsrQ   r  r  r   r   r   r   r   n_categoriesn_floatsn_intsframe_feature_to_n_nanr   	n_missingexpected_missings                        r   !test_fetch_openml_types_inferencer  y  s   P 
		X	&Bvv||44$['W	E KKE!LLP5Ju>N,OPL u||IeuzzS7HEIJHU\\GEUZZ35F%GHF0000((((_$$$==!%:7%CCCC"ZZ\--/7791779 -i27;??aH,,,,- 	QIGs$   2FF"F7FF*Fzparams, err_msgunknownz:The 'parser' parameter of fetch_openml must be a str amongr   z<The 'as_frame' parameter of fetch_openml must be an instancec                     d}t        | |d       t        j                  t        |      5  t	        dd|i| d d d        y # 1 sw Y   y xY w)Nr   Tmatchr   r)   )r   r   raisesrv   r   r   paramserr_msgr   s       r   &test_fetch_openml_validation_parameterr    sH     G$['4@	z	1 0/W//0 0 0s   AAr  auto)r   r   c                     d}	 t        d       t        d      # t        $ rP t        | |d       d}t	        j
                  t        |      5  t        d	d|i| ddd       Y y# 1 sw Y   Y yxY ww xY w)
z=Check that we raise the proper errors when we require pandas.r   !test_fetch_openml_requires_pandasz.This test requires pandas to not be installed.Tz:requires pandas to be installed. Alternatively, explicitlyr  r   Nr)   )r   r   ImportErrorr   r   r  r   )r   r  r   r  s       r   'test_fetch_openml_requires_pandas_errorr    s{     GI@A GHH  4(gtDN]];g6 	433F3	4 	4 	44s'    3A3A%A3%A/	*A3/A3z2ignore:Version 1 of dataset Australian is inactivez:Sparse ARFF datasets cannot be loaded with parser='pandas'z9Sparse ARFF datasets cannot be loaded with as_frame=True.)r   r   c                     t        j                  d       d}t        | |d       t        j                  t        |      5  t        d|dd| ddd       y# 1 sw Y   yxY w)	ztCheck that we raise the expected error for sparse ARFF datasets and
    a wrong set of incompatible parameters.
    r   $  Tr  F)r   r   Nr)   )r   r   r   r  rv   r   r  s       r   #test_fetch_openml_sparse_arff_errorr    sc    , !G$['4@	z	1 
 	
	
 	

 
 
s    AA!zdata_id, data_type)r   	dataframe)r  sparsec                     t        j                  d      }t        | |d       t        |dd      }|dk(  r|j                  nt
        j                  j                  }t        |j                  |      sJ y)z&Check the auto mode of `fetch_openml`.r   Tr   F)r   r   r   r  N)
r   r   r   r   r   scipyr	  
csr_matrixr   r   )r   r   	data_typer   r   klasss         r   test_fetch_openml_auto_moder    s^     
		X	&B$['4@&FD%4BLL%,,:Q:QEdii'''r   c                    t        j                  d       d}t        | |d       d}t        j                  t        |      5  t        d      5  t        |ddd	
       ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)z[Check that we raise a warning regarding the working memory when using
    LIAC-ARFF parser.r   r   Tz*Could not adhere to working_memory config.r  gư>)working_memoryFr   r   N)r   r   r   warnsUserWarningr   r   )r   r   rj   s      r   :test_convert_arff_data_dataframe_warning_low_memory_pandasr  #  s     !G$['4@
6C	k	- 40 	"		 	 	 s$   A<A0A<0A9	5A<<Bc                     d}d}t        | ||       t        j                  d      }t        j                  t
        |      5  t        |ddd       ddd       y# 1 sw Y   yxY w)	z\Check that a warning is raised when multiple versions exist and no version is
    requested.r   r   a;  Multiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1. Available versions:
- version 1, status: active
  url: https://www.openml.org/search?type=data&id=61
- version 3, status: active
  url: https://www.openml.org/search?type=data&id=969
r  Fr   )r   r   r   r   N)r   r:   escaper   r  r  r   )r   rQ   r   	data_namerj   s        r   ,test_fetch_openml_iris_warn_multiple_versionr  6  sg     GI$['=I
))	BC 
k	- 
		

 
 
s   AA$c                     d}d}d}d}t        | ||       t        ||ddd      }|j                  j                  ||fk(  sJ |j                  J y)z/Check that we can get a dataset without target.r   Nr   r   Fr   r   r   r   r   r   )r   r   r   r   r   )r   rQ   r   r   expected_observationsexpected_featuresr   s          r   test_fetch_openml_no_targetr  Q  sn     GM$['=I#D 99??46GHHHH;;r   c                 (   t        j                  d       d}t        | ||       t        |dd|      }|j                  j
                  d   }|j                  d   j                         j                         sJ t        |j                  g d       y	)
zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  r   FTr   r   r   r   r  )FEMALEMALE_N)
r   r   r   r   r   r   r  anyr   r   )r   rQ   r   r   penguins	cat_dtypes         r   test_missing_values_pandasr&  e  s    
 !G$['W	H $$U+I==$$&**,,,y++-DEr   r     glass2)r   r   r   c                    d}t        | ||       d}t        j                  t        |      5  t	        ddddd|}ddd       j
                  j                  dk(  sJ |j                  d	   d
k(  sJ y# 1 sw Y   9xY w)z;Check that we raise a warning when the dataset is inactive.r'  z(Version 1 of dataset glass2 is inactive,r  Fr   )r   r   r   N)   r  r   40675r)   )r   r   r  r  r   r   r   r   )r   rQ   r   r   rj   r(  s         r   test_fetch_openml_inactiver,  {  s     G$['=I
4C	k	- 
 
%
?M

 ;;(((>>$7***
 
s   A55A>z"data_id, params, err_type, err_msgzNo active dataset glass2 foundr   r   )r   r   z1Can only handle homogeneous multi-target datasets)r   r   zOSTRING attributes are not supported for array representation. Try as_frame=Truer  )r   r   r   zTarget column 'family'	undefinedz(Could not find target_column='undefined'c                     t        | ||       |j                  dd      s|dk(  rt        j                  d       t        j                  ||      5  t        dd|d| d d d        y # 1 sw Y   y xY w)Nr   Tr   r  F)r   r   r)   )r   r  r   r   r  r   )r   rQ   r   r  err_typer  r   s          r   test_fetch_openml_errorr0    si    d %['=Izz*d#v'9H%	xw	/ ;:5:6:; ; ;s   A))A2zparams, err_type, err_msgr0   r   zCThe 'version' parameter of fetch_openml must be an int in the rangenAmE)r   r   zCThe 'data_id' parameter of fetch_openml must be an int in the rangez6The 'version' parameter of fetch_openml must be an intzFNeither name nor data_id are provided. Please provide name or data_id.c                 r    t        j                  ||      5  t        di |  d d d        y # 1 sw Y   y xY w)Nr  r)   )r   r  r   )r  r/  r  s      r   )test_fetch_openml_raises_illegal_argumentr3    s2    4 
xw	/ v  s   -6c                    d}d}d}t        | ||       d}|j                  |      }t        j                  t        |      5  t        ||ddd       d d d        d	}|j                  |      }t        j                  t        |      5  t        ||ddd       d d d        d}|j                  |      }t        j                  t        |      5  t        ||d
gddd       d d d        d	}|j                  |      }t        j                  t        |      5  t        ||d
gddd       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   kxY w# 1 sw Y   y xY w)Nr   z.target_column='{}' has flag is_row_identifier.z&target_column='{}' has flag is_ignore.MouseIDr  Fr   r  Genotyper   )r   formatr   r  r  r   )r   rQ   r   expected_row_id_msgexpected_ignore_msg
target_colrj   s          r   test_warn_ignore_attributer;    sp   GJB$['=IJ

$
$Z
0C	k	- 
$	

 J

$
$Z
0C	k	- 
$	

 J

$
$Z
0C	k	- 
%w/	

 J

$
$Z
0C	k	- 
%w/	

 
?
 

 

 

 
s0   D5	EEE5D>E
EE"c                     d}t        | ||       d}t        j                  t        |      5  t	        |ddd       d d d        y # 1 sw Y   y xY w)Nr_   zJOpenML registered a problem with the dataset. It might be unusable. Error:r  Fr   r  r   r   r  r  r   r   rQ   r   rj   s       r   test_dataset_with_openml_errorr?    sM    G$['=I
VC	k	- WWEE+VW W W   AAc                     d}t        | ||       d}t        j                  t        |      5  t	        |ddd       d d d        y # 1 sw Y   y xY w)Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:r  Fr   r  r=  r>  s       r    test_dataset_with_openml_warningrB  "  sM    G$['=I
RC	k	- WWEE+VW W Wr@  c                 h   t        j                  d       d}t        | |d       |dddd}t        di |}t        di |dddii}t	        d	 |j
                  d
   j                  j                  D              sJ t        d |j
                  d
   j                  j                  D              rJ y)zACheck that we can overwrite the default parameters of `read_csv`.r   6  Fr   rQ   Tr   read_csv_kwargsskipinitialspacec              3   >   K   | ]  }|j                  d         yw NrF   .0r   s     r   	<genexpr>zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>>  s       #s   r   c              3   >   K   | ]  }|j                  d         ywrI  rK  rL  s     r   rN  zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>A  s!       	srO  Nr)   )	r   r   r   r   r   r   r   r   r#  )r   r   common_paramsadult_without_spacesadult_with_spacess        r   3test_fetch_openml_overwrite_default_params_read_csvrT  +  s    
!G$['QVW 	M (8-8$ 
*<e)D  '8'>'>w'G'K'K'V'V     '--g6::EE    r   c                 `   d}t        | ||       t        j                  |      dz   }d| }t        |j	                  d            }t        ||      }t        ||      }t        j                  j                  |      sJ t        ||      }	|j                         |	j                         k(  sJ y )Nr   /filename.arffhttps://www.openml.org/scikit_learn_data)r   _MONKEY_PATCH_LOCAL_OPENML_PATHr7  strmkdirr
   r	   ospathisfiler   )
r   rQ   tmpdirr   openml_pathr>   cache_directory	response1location	response2s
             r   test_open_openml_url_cachere  K  s    G$['=I188ADTTK#K=
1C&,,':;<O o6I{O<H77>>(### o6I>>y~~////r   write_to_diskc                    d}t         j                  |      dz   }d| }t        |j                  d            }t	        ||      fd}| j                  t        j                  j                  d|       t        j                  t        d      5  t        ||       d d d        t        j                  j                        rJ y # 1 sw Y   +xY w)	Nr   rV  rW  rX  c                     r1t        d      5 }|j                  d       d d d        t        d      t        d      # 1 sw Y   t        d      xY w)Nw Invalid request)rH   writerv   )rw   rx   ry   rM   rc  rf  s       r   rz   z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopene  sP    h$ *++j*++*++s   A  Ar{   rk  r  )rY  r7  rZ  r[  r	   r~   r   r   r   r   r  rv   r
   r\  r]  exists)	r   r_  rf  r   r`  r>   ra  rz   rc  s	     `     @r   'test_open_openml_url_unlinks_local_pathrn  ]  s    G188ADTTK#K=
1C&,,':;<O{O<H, ((00)]K	z):	; /o./ ww~~h''''/ /s   CCc                    d}t         j                  |      }t        | j                  d            }t	        ||      t        j                  t
        j                  j                               t        d      5 }|j                  d       d d d        t        ||      fd       }d}t        j                  t        |      5   |       }d d d        dk(  sJ y # 1 sw Y   RxY w# 1 sw Y   xY w)	Nr   rX  ri  rj  c                  Z    t         j                  j                         rt        d      y)NzFile exist!r_   )r\  r]  rm  	Exception)rc  s   r   
_load_dataz/test_retry_with_clean_cache.<locals>._load_data}  s#     77>>(#M**r   z!Invalid cache, redownloading filer  r_   )rY  r7  rZ  r[  r	   r\  makedirsr]  dirnamerH   rl  r   r   r  RuntimeWarning)	r_  r   r`  ra  rM   rr  warn_msgresultrc  s	           @r   test_retry_with_clean_cacherx  s  s    G188AK&,,':;<O{O<HKK)*	h	 	 [/: ; 3H	nH	5 Q;;  s   =C C, C),C5c                     d}t         j                  |      }t        | j                  d            }t	        ||      d        }d}t        j                  t        |      5   |        d d d        y # 1 sw Y   y xY w)Nr   rX  c                  2    t        d ddd t                     )Nrf   rg   rh   r   r   r)   r   r   rr  z:test_retry_with_clean_cache_http_error.<locals>._load_data  s    3$:')
 	
r   rg   r  )rY  r7  rZ  r[  r   r   r  r   )r_  r   r`  ra  rr  	error_msgs         r   &test_retry_with_clean_cache_http_errorr}    ss    G188AK&,,':;<O[/:
 ;

 'I	y		2   s   !A22A;c                    d }d}t        |j                  d            }t        | ||       t        |d|ddd      \  }}| j	                  t
        j                  j                  d|       t        |d|ddd      \  }}	t        j                  j                  ||       t        j                  j                  ||	       y )	Nc                 :    t        d| j                         z        )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)rv   rt   rw   rx   ry   s      r   _mock_urlopen_raisez4test_fetch_openml_cache.<locals>._mock_urlopen_raise  s%     ")"6"6"89
 	
r   r   rX  TFr   )r   r   r4   r  r   r   r{   )rZ  r[  r   r   r~   r   r   r   r   r   r   )
r   rQ   r_  r  r   ra  	X_fetched	y_fetchedX_cachedy_cacheds
             r   test_fetch_openml_cacher    s    
 G&,,':;<O$['=I'!Iy ((00)=PQ%!Hh JJ!!)X6JJ!!)X6r   zas_frame, parser))Tr   )Fr   )Tr   )Fr   c                 p   |s|dk(  rt        j                  d       d}t        | |d       t        dz   d| z   }d}t	        j
                  |      |z  }|dz  |j                  d      5 }t        j                  |d      }	t        |	j                               }
d	|
t        |
      d
z
  <   ddd       t        j                  d      5 }|j                  
       ddd       t        j                  j                  j                   fd}| j#                  t        j                  j                  d|       t        j$                  t&              5 }t        j                  j)                  |d||       ddd       j+                  d      sJ y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   5xY w)z/Check that the checksum is working as expected.r   r   Tr6   r7   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffrE   %   r_   Nwbc                     | j                         }|j                  d      r;t        d      5 }|j                         }d d d        t	        t              d      S  |       S # 1 sw Y   'xY w)Nz$data/v1/download/1666876/anneal.arffrE   Tr#   )rt   endswithrH   r   r   r   )rw   rx   ry   r>   rM   corrupted_datacorrupt_copy_pathmocked_openml_urls         r   swap_file_mockz9test_fetch_openml_verify_checksum.<locals>.swap_file_mock  sh    ""$<<>?'. *!!"*$W^%<dKK$W--	* *s   A&&A/r{   Fr  1666876)r   r   r   r|   r   rG   rH   r"   	bytearrayr   r<   GzipFilerl  r   r   r   r{   r~   r  rv   r   r  )r   r   r_  r   r   original_data_moduleoriginal_data_file_nameoriginal_data_path	orig_file	orig_gzipr   modified_gzipr  excr  r  s                 @@r   !test_fetch_openml_verify_checksumr    s    6X%H%G$['4@ 3S8S	?J:")=>AXX!==		 	 	& !)IIi.	)* SY]!
 
($	/ "=D!"  ((0088. ((00)^L 
z	" 
c%%58F 	& 	


 99Y=! !
" "(
 
s%   -AFF $F,F F),F5c                    d }| j                  t        j                  j                  d|       d}t	        j
                  t        t        j                  d| d            5 }t	        j                  t        d      5 }t        |d d	       d d d        t        |      d
k(  sJ j                  j                          d d d        y # 1 sw Y   <xY w# 1 sw Y   y xY w)Nc                 2    t        d ddd t                     )Ni  Simulated network errorrh   r{  r  s      r   _mock_urlopen_network_errorzPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_error  s    3$=DWY
 	
r   r{   z"https://api.openml.org/invalid-urlz+A network error occurred while downloading z. Retrying...r  r  r   )delayr   )r~   r   r   r   r   r  r  r:   r  r  r   r
   r<   valuer   )r   r  invalid_openml_urlrecordexc_infos        r   +test_open_openml_url_retry_on_network_errorr    s    

   )-H >	ii"#=2

  
]]9,EF 	@(/Q?	@6{a 	@ 	@ s$   #C?C	2C	C	CC)r   r   c                     |dk(  rt        j                  d       d}t        | ||       t        j                  j                  |dd|      }|J |d   j                  dk(  sJ d|d	   vsJ y)
zCheck that we can load the "zoo" dataset.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14340
    r   >   Fr  Nr   )e      animalr   )r   r   r   r   r   r   r   )r   rQ   r   r   datasets        r   &test_fetch_openml_with_ignored_featurer    s     H%G$['=I++uuV , G  6?  I---7?3333r   c                 F   t        j                  d      }d}t        | |d       dd|d}t        dddi|}t        dddi|}|j                  j                  |j                  |j                         |j                  j                  j                  d	      j                         rJ |j                  j                  j                  d	      j                         rJ t        ddd
d|}t        ddd
d|}|j                  j                  |j                  d   |j                  d          |j                  d   j                  j                  d	      j                         rJ |j                  d   j                  j                  d	      j                         rJ y)zCheck that we strip the single quotes when used as a string delimiter.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23381
    r   r   FrE  Tr   r   r   r   r   'r  )r   r   r   Nr)   )r   r   r   r   r   r  r   rZ  rF   r#  r  r   )r   r   r   rQ  mice_pandasmice_liac_arffs         r   test_fetch_openml_strip_quotesr  (  s    
		X	&BG$['QVW!%'JM@h@-@K!FFFNJJ"";#5#5~7L7LM!!%%00599;;;!!%%..s377999 XhhX-XK! (6CN JJ""'"N$8$8$A   )--88=AACCC  )--66s;??AAAAr   c                     t        j                  d      }d}t        | |d       dd|d}t        dddi|}t        dddi|}|j                  j                  |j                  d	   |j                  d	          y
)zCheck that we can strip leading whitespace in pandas parser.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25311
    r   rD  FrE  Tr  r   r   r   Nr)   )r   r   r   r   r   r  r   r   r   r   rQ  adult_pandasadult_liac_arffs         r   $test_fetch_openml_leading_whitespacer  E  s     
		X	&BG$['QVW!%'JMAxA=AL"G+GGOJJ""7#_%:%:7%Cr   c                     t        j                  d      }d}t        | |d       dd|d}t        d
ddi|}t        d
ddi|}|j                  j                  |j                  |j                         y	)zCheck that we can handle escapechar and single/double quotechar.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25478
    r   iZ  FrE  Tr  r   r   Nr)   )r   r   r   r   r   r   r   r  s         r   &test_fetch_openml_quotechar_escapecharr  W  sv     
		X	&BG$['QVW!%'JMAxA=AL"G+GGOJJ!!,"4"4o6K6KLr   )T__doc__r"   rm   r\  r:   	functoolsr   	importlibr   ior   urllib.errorr   numpyr   r   scipy.sparser  r   r   sklearn.datasetsr   fetch_openml_origsklearn.datasets._openmlr	   r
   r   sklearn.utilsr   $sklearn.utils._optional_dependenciesr   sklearn.utils._testingr   r   r   r|   r}   rY  r   r   markparametrizer   r   r   r   r   r   r  r  r  fixturer  r  r  r  r  filterwarningsr  r  r  r  r  r&  r,  rv   KeyErrorr0  r3  r;  r?  rB  rT  re  rn  rx  r}  r  r  r  r  r  r  r  r)   r   r   <module>r     sY
      	 	    "     " > 
   E  ? "7  < (D9BLR ? 
i_c1a(	f+S!Q7	
YNBA&	
X!,b"a8	y#Q*	u+S!Q7	E"BA.		4 "b!,	'R3	E"Ar1-	'B2	E"D"a0'0 K#:;4-8($ 9 <14($V ? 
i_c1a(	f+S!Q7	
YNBA&	
X!,b"a8	y#Q*	u+S!Q7	E"BA.		4 "b!,	'R3	E"Ar1-	'B2#, K#:;$. <-.$.N $56<N 7<N~ K#:;P <P4 K#:;,! <,!^ K#:;<,9V*WX; Y <;> $=>K#:;7 < ?76 $:;K#:;( < <(0*> ho  od h-  -` P4 4-8)- 956)-`  y!H	

 #J	
	00 V,v.h/f-	II PQ x H	

 G	

  T2G	
"
# R$
  PQ( R(& 4-8
 9
4 4-8 9& 4-8K#:;F < 9F( 4-8	E(q9
+ 9
+ 4-8(	"J0PQlG-DE?		
 51!	
 H$G$		
 H%H$		
 [96		
 k7-CD6		
G),Z K#:;; <[, 9^;  DY?Q	
 F+Q	
 FyAD	
 T	
!232
 4-8.
 9.
b 4-8W 9W 4-8W 9W@ 4-80 90" 4-8( 9(*.  4-87 97D + + \< 4-8#:;4 < 94(B:$Mr   