2424
2525
2626@pytest .fixture
27- def dtype (string_storage ):
28- """Fixture giving StringDtype from parametrized 'string_storage'"""
29- return pd .StringDtype (storage = string_storage )
27+ def dtype (string_dtype_arguments ):
28+ """Fixture giving StringDtype from parametrized storage and na_value arguments"""
29+ storage , na_value = string_dtype_arguments
30+ return pd .StringDtype (storage = storage , na_value = na_value )
3031
3132
3233@pytest .fixture
@@ -521,50 +522,34 @@ def test_arrow_array(dtype):
521522 assert arr .equals (expected )
522523
523524
524- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
525+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
525526@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
526- def test_arrow_roundtrip (dtype , string_storage2 , request , using_infer_string ):
527+ def test_arrow_roundtrip (dtype , string_storage , using_infer_string ):
527528 # roundtrip possible from arrow 1.0.0
528529 pa = pytest .importorskip ("pyarrow" )
529530
530- if using_infer_string and string_storage2 not in ("python_numpy" , "pyarrow_numpy" ):
531- request .applymarker (
532- pytest .mark .xfail (
533- reason = "infer_string takes precedence over string storage"
534- )
535- )
536-
537531 data = pd .array (["a" , "b" , None ], dtype = dtype )
538532 df = pd .DataFrame ({"a" : data })
539533 table = pa .table (df )
540534 if dtype .storage == "python" :
541535 assert table .field ("a" ).type == "string"
542536 else :
543537 assert table .field ("a" ).type == "large_string"
544- with pd .option_context ("string_storage" , string_storage2 ):
538+ with pd .option_context ("string_storage" , string_storage ):
545539 result = table .to_pandas ()
546540 assert isinstance (result ["a" ].dtype , pd .StringDtype )
547- expected = df .astype (f"string[{ string_storage2 } ]" )
541+ expected = df .astype (f"string[{ string_storage } ]" )
548542 tm .assert_frame_equal (result , expected )
549543 # ensure the missing value is represented by NA and not np.nan or None
550544 assert result .loc [2 , "a" ] is result ["a" ].dtype .na_value
551545
552546
553- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
547+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
554548@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
555- def test_arrow_load_from_zero_chunks (
556- dtype , string_storage2 , request , using_infer_string
557- ):
549+ def test_arrow_load_from_zero_chunks (dtype , string_storage , using_infer_string ):
558550 # GH-41040
559551 pa = pytest .importorskip ("pyarrow" )
560552
561- if using_infer_string and string_storage2 != "pyarrow_numpy" :
562- request .applymarker (
563- pytest .mark .xfail (
564- reason = "infer_string takes precedence over string storage"
565- )
566- )
567-
568553 data = pd .array ([], dtype = dtype )
569554 df = pd .DataFrame ({"a" : data })
570555 table = pa .table (df )
@@ -574,10 +559,10 @@ def test_arrow_load_from_zero_chunks(
574559 assert table .field ("a" ).type == "large_string"
575560 # Instantiate the same table with no chunks at all
576561 table = pa .table ([pa .chunked_array ([], type = pa .string ())], schema = table .schema )
577- with pd .option_context ("string_storage" , string_storage2 ):
562+ with pd .option_context ("string_storage" , string_storage ):
578563 result = table .to_pandas ()
579564 assert isinstance (result ["a" ].dtype , pd .StringDtype )
580- expected = df .astype (f"string[{ string_storage2 } ]" )
565+ expected = df .astype (f"string[{ string_storage } ]" )
581566 tm .assert_frame_equal (result , expected )
582567
583568
0 commit comments