@@ -1409,7 +1409,7 @@ def _maybe_convert_to_int_keys(convert_dates, varlist):
1409
1409
return new_dict
1410
1410
1411
1411
1412
- def _dtype_to_stata_type (dtype ):
1412
+ def _dtype_to_stata_type (dtype , column ):
1413
1413
"""
1414
1414
Converts dtype types to stata types. Returns the byte of the given ordinal.
1415
1415
See TYPE_MAP and comments for an explanation. This is also explained in
@@ -1425,13 +1425,14 @@ def _dtype_to_stata_type(dtype):
1425
1425
If there are dates to convert, then dtype will already have the correct
1426
1426
type inserted.
1427
1427
"""
1428
- #TODO: expand to handle datetime to integer conversion
1428
+ # TODO: expand to handle datetime to integer conversion
1429
1429
if dtype .type == np .string_ :
1430
1430
return chr (dtype .itemsize )
1431
1431
elif dtype .type == np .object_ : # try to coerce it to the biggest string
1432
1432
# not memory efficient, what else could we
1433
1433
# do?
1434
- return chr (244 )
1434
+ itemsize = max_len_string_array (column .values )
1435
+ return chr (max (itemsize , 1 ))
1435
1436
elif dtype == np .float64 :
1436
1437
return chr (255 )
1437
1438
elif dtype == np .float32 :
@@ -1461,6 +1462,7 @@ def _dtype_to_default_stata_fmt(dtype, column):
1461
1462
int16 -> "%8.0g"
1462
1463
int8 -> "%8.0g"
1463
1464
"""
1465
+ # TODO: Refactor to combine type with format
1464
1466
# TODO: expand this to handle a default datetime format?
1465
1467
if dtype .type == np .object_ :
1466
1468
inferred_dtype = infer_dtype (column .dropna ())
@@ -1470,8 +1472,7 @@ def _dtype_to_default_stata_fmt(dtype, column):
1470
1472
itemsize = max_len_string_array (column .values )
1471
1473
if itemsize > 244 :
1472
1474
raise ValueError (excessive_string_length_error % column .name )
1473
-
1474
- return "%" + str (itemsize ) + "s"
1475
+ return "%" + str (max (itemsize , 1 )) + "s"
1475
1476
elif dtype == np .float64 :
1476
1477
return "%10.0g"
1477
1478
elif dtype == np .float32 :
@@ -1718,10 +1719,11 @@ def _prepare_pandas(self, data):
1718
1719
self ._convert_dates [key ]
1719
1720
)
1720
1721
dtypes [key ] = np .dtype (new_type )
1721
- self .typlist = [_dtype_to_stata_type ( dt ) for dt in dtypes ]
1722
+ self .typlist = []
1722
1723
self .fmtlist = []
1723
1724
for col , dtype in dtypes .iteritems ():
1724
1725
self .fmtlist .append (_dtype_to_default_stata_fmt (dtype , data [col ]))
1726
+ self .typlist .append (_dtype_to_stata_type (dtype , data [col ]))
1725
1727
1726
1728
# set the given format for the datetime cols
1727
1729
if self ._convert_dates is not None :
0 commit comments