@@ -414,13 +414,15 @@ def test_createDataFrame_with_map_type(self):
414
414
def test_createDataFrame_with_string_dtype (self ):
415
415
# SPARK-34521: spark.createDataFrame does not support Pandas StringDtype extension type
416
416
with self .sql_conf ({"spark.sql.execution.arrow.pyspark.enabled" : True }):
417
- pandas_df = pd .DataFrame ([["abc" ], ["def" ], [None ], ["ghi" ], [None ]], columns = ["col" ], dtype = "string" )
417
+ data = [["abc" ], ["def" ], [None ], ["ghi" ], [None ]]
418
+ pandas_df = pd .DataFrame (data , columns = ["col" ], dtype = "string" )
418
419
schema = StructType ([StructField ("col" , StringType (), True )])
419
420
df = self .spark .createDataFrame (pandas_df , schema = schema )
420
421
421
- # dtypes won't match. Pandas has two different ways to store string columns: using ndarray (when dtype isn't
422
- # specified) or using a StringArray when dtype="string". When calling dataframe#toPandas() it will use the
423
- # ndarray version. Changing that to use a StringArray would be backwards incompatible.
422
+ # dtypes won't match. Pandas has two different ways to store string columns:
423
+ # using ndarray (when dtype isn't specified) or using a StringArray when dtype="string".
424
+ # When calling dataframe#toPandas() it will use the ndarray version.
425
+ # Changing that to use a StringArray would be backwards incompatible.
424
426
assert_frame_equal (pandas_df , df .toPandas (), check_dtype = False )
425
427
426
428
def test_toPandas_with_map_type (self ):
0 commit comments