@@ -1409,7 +1409,7 @@ def _maybe_convert_to_int_keys(convert_dates, varlist):
14091409 return new_dict
14101410
14111411
1412- def _dtype_to_stata_type (dtype ):
1412+ def _dtype_to_stata_type (dtype , column ):
14131413 """
14141414 Converts dtype types to stata types. Returns the byte of the given ordinal.
14151415 See TYPE_MAP and comments for an explanation. This is also explained in
@@ -1425,13 +1425,14 @@ def _dtype_to_stata_type(dtype):
14251425 If there are dates to convert, then dtype will already have the correct
14261426 type inserted.
14271427 """
1428- #TODO: expand to handle datetime to integer conversion
1428+ # TODO: expand to handle datetime to integer conversion
14291429 if dtype .type == np .string_ :
14301430 return chr (dtype .itemsize )
14311431 elif dtype .type == np .object_ : # try to coerce it to the biggest string
14321432 # not memory efficient, what else could we
14331433 # do?
1434- return chr (244 )
1434+ itemsize = max_len_string_array (column .values )
1435+ return chr (max (itemsize , 1 ))
14351436 elif dtype == np .float64 :
14361437 return chr (255 )
14371438 elif dtype == np .float32 :
@@ -1461,6 +1462,7 @@ def _dtype_to_default_stata_fmt(dtype, column):
14611462 int16 -> "%8.0g"
14621463 int8 -> "%8.0g"
14631464 """
1465+ # TODO: Refactor to combine type with format
14641466 # TODO: expand this to handle a default datetime format?
14651467 if dtype .type == np .object_ :
14661468 inferred_dtype = infer_dtype (column .dropna ())
@@ -1470,8 +1472,7 @@ def _dtype_to_default_stata_fmt(dtype, column):
14701472 itemsize = max_len_string_array (column .values )
14711473 if itemsize > 244 :
14721474 raise ValueError (excessive_string_length_error % column .name )
1473-
1474- return "%" + str (itemsize ) + "s"
1475+ return "%" + str (max (itemsize , 1 )) + "s"
14751476 elif dtype == np .float64 :
14761477 return "%10.0g"
14771478 elif dtype == np .float32 :
@@ -1718,10 +1719,11 @@ def _prepare_pandas(self, data):
17181719 self ._convert_dates [key ]
17191720 )
17201721 dtypes [key ] = np .dtype (new_type )
1721- self .typlist = [_dtype_to_stata_type ( dt ) for dt in dtypes ]
1722+ self .typlist = []
17221723 self .fmtlist = []
17231724 for col , dtype in dtypes .iteritems ():
17241725 self .fmtlist .append (_dtype_to_default_stata_fmt (dtype , data [col ]))
1726+ self .typlist .append (_dtype_to_stata_type (dtype , data [col ]))
17251727
17261728 # set the given format for the datetime cols
17271729 if self ._convert_dates is not None :
0 commit comments