@@ -969,7 +969,7 @@ def read(self) -> DataFrame | Series:
969969 else :
970970 return obj
971971
972- def _get_object_parser (self , json ) -> DataFrame | Series :
972+ def _get_object_parser (self , json : str ) -> DataFrame | Series :
973973 """
974974 Parses a json document into a pandas object.
975975 """
@@ -985,16 +985,14 @@ def _get_object_parser(self, json) -> DataFrame | Series:
985985 "date_unit" : self .date_unit ,
986986 "dtype_backend" : self .dtype_backend ,
987987 }
988- obj = None
989988 if typ == "frame" :
990- obj = FrameParser (json , ** kwargs ).parse ()
991-
992- if typ == "series" or obj is None :
989+ return FrameParser (json , ** kwargs ).parse ()
990+ elif typ == "series" :
993991 if not isinstance (dtype , bool ):
994992 kwargs ["dtype" ] = dtype
995- obj = SeriesParser (json , ** kwargs ).parse ()
996-
997- return obj
993+ return SeriesParser (json , ** kwargs ).parse ()
994+ else :
995+ raise ValueError ( f" { typ = } must be 'frame' or 'series'." )
998996
999997 def close (self ) -> None :
1000998 """
@@ -1107,7 +1105,6 @@ def __init__(
11071105 self .convert_dates = convert_dates
11081106 self .date_unit = date_unit
11091107 self .keep_default_dates = keep_default_dates
1110- self .obj : DataFrame | Series | None = None
11111108 self .dtype_backend = dtype_backend
11121109
11131110 @final
@@ -1121,26 +1118,22 @@ def check_keys_split(self, decoded: dict) -> None:
11211118 raise ValueError (f"JSON data had unexpected key(s): { bad_keys_joined } " )
11221119
11231120 @final
1124- def parse (self ):
1125- self ._parse ()
1121+ def parse (self ) -> DataFrame | Series :
1122+ obj = self ._parse ()
11261123
1127- if self .obj is None :
1128- return None
11291124 if self .convert_axes :
1130- self ._convert_axes ()
1131- self ._try_convert_types ()
1132- return self . obj
1125+ obj = self ._convert_axes (obj )
1126+ obj = self ._try_convert_types (obj )
1127+ return obj
11331128
1134- def _parse (self ) -> None :
1129+ def _parse (self ) -> DataFrame | Series :
11351130 raise AbstractMethodError (self )
11361131
11371132 @final
1138- def _convert_axes (self ) -> None :
1133+ def _convert_axes (self , obj : DataFrame | Series ) -> DataFrame | Series :
11391134 """
11401135 Try to convert axes.
11411136 """
1142- obj = self .obj
1143- assert obj is not None # for mypy
11441137 for axis_name in obj ._AXIS_ORDERS :
11451138 ax = obj ._get_axis (axis_name )
11461139 ser = Series (ax , dtype = ax .dtype , copy = False )
@@ -1153,9 +1146,10 @@ def _convert_axes(self) -> None:
11531146 )
11541147 if result :
11551148 new_axis = Index (new_ser , dtype = new_ser .dtype , copy = False )
1156- setattr (self .obj , axis_name , new_axis )
1149+ setattr (obj , axis_name , new_axis )
1150+ return obj
11571151
1158- def _try_convert_types (self ) -> None :
1152+ def _try_convert_types (self , obj ) :
11591153 raise AbstractMethodError (self )
11601154
11611155 @final
@@ -1182,8 +1176,10 @@ def _try_convert_data(
11821176
11831177 elif self .dtype is True :
11841178 pass
1185- else :
1186- # dtype to force
1179+ elif not _should_convert_dates (
1180+ convert_dates , self .keep_default_dates , name
1181+ ):
1182+ # convert_dates takes precedence over columns listed in dtypes
11871183 dtype = (
11881184 self .dtype .get (name ) if isinstance (self .dtype , dict ) else self .dtype
11891185 )
@@ -1194,8 +1190,8 @@ def _try_convert_data(
11941190 return data , False
11951191
11961192 if convert_dates :
1197- new_data , result = self ._try_convert_to_date (data )
1198- if result :
1193+ new_data = self ._try_convert_to_date (data )
1194+ if new_data is not data :
11991195 return new_data , True
12001196
12011197 converted = False
@@ -1245,16 +1241,16 @@ def _try_convert_data(
12451241 return data , converted
12461242
12471243 @final
1248- def _try_convert_to_date (self , data : Series ) -> tuple [ Series , bool ] :
1244+ def _try_convert_to_date (self , data : Series ) -> Series :
12491245 """
12501246 Try to parse a ndarray like into a date column.
12511247
12521248 Try to coerce object in epoch/iso formats and integer/float in epoch
1253- formats. Return a boolean if parsing was successful.
1249+ formats.
12541250 """
12551251 # no conversion on empty
12561252 if not len (data ):
1257- return data , False
1253+ return data
12581254
12591255 new_data = data
12601256
@@ -1265,7 +1261,7 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
12651261 try :
12661262 new_data = data .astype ("int64" )
12671263 except OverflowError :
1268- return data , False
1264+ return data
12691265 except (TypeError , ValueError ):
12701266 pass
12711267
@@ -1277,57 +1273,45 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
12771273 | (new_data ._values == iNaT )
12781274 )
12791275 if not in_range .all ():
1280- return data , False
1276+ return data
12811277
12821278 date_units = (self .date_unit ,) if self .date_unit else self ._STAMP_UNITS
12831279 for date_unit in date_units :
12841280 try :
1285- new_data = to_datetime (new_data , errors = "raise" , unit = date_unit )
1281+ return to_datetime (new_data , errors = "raise" , unit = date_unit )
12861282 except (ValueError , OverflowError , TypeError ):
12871283 continue
1288- return new_data , True
1289- return data , False
1284+ return data
12901285
12911286
12921287class SeriesParser (Parser ):
12931288 _default_orient = "index"
12941289 _split_keys = ("name" , "index" , "data" )
1295- obj : Series | None
12961290
1297- def _parse (self ) -> None :
1291+ def _parse (self ) -> Series :
12981292 data = ujson_loads (self .json , precise_float = self .precise_float )
12991293
13001294 if self .orient == "split" :
13011295 decoded = {str (k ): v for k , v in data .items ()}
13021296 self .check_keys_split (decoded )
1303- self . obj = Series (** decoded )
1297+ return Series (** decoded )
13041298 else :
1305- self . obj = Series (data )
1299+ return Series (data )
13061300
1307- def _try_convert_types (self ) -> None :
1308- if self .obj is None :
1309- return
1310- obj , result = self ._try_convert_data (
1311- "data" , self .obj , convert_dates = self .convert_dates
1312- )
1313- if result :
1314- self .obj = obj
1301+ def _try_convert_types (self , obj : Series ) -> Series :
1302+ obj , _ = self ._try_convert_data ("data" , obj , convert_dates = self .convert_dates )
1303+ return obj
13151304
13161305
13171306class FrameParser (Parser ):
13181307 _default_orient = "columns"
13191308 _split_keys = ("columns" , "index" , "data" )
1320- obj : DataFrame | None
13211309
1322- def _parse (self ) -> None :
1310+ def _parse (self ) -> DataFrame :
13231311 json = self .json
13241312 orient = self .orient
13251313
1326- if orient == "columns" :
1327- self .obj = DataFrame (
1328- ujson_loads (json , precise_float = self .precise_float ), dtype = None
1329- )
1330- elif orient == "split" :
1314+ if orient == "split" :
13311315 decoded = {
13321316 str (k ): v
13331317 for k , v in ujson_loads (json , precise_float = self .precise_float ).items ()
@@ -1341,90 +1325,61 @@ def _parse(self) -> None:
13411325 orig_names ,
13421326 is_potential_multi_index (orig_names , None ),
13431327 )
1344- self . obj = DataFrame (dtype = None , ** decoded )
1328+ return DataFrame (dtype = None , ** decoded )
13451329 elif orient == "index" :
1346- self . obj = DataFrame .from_dict (
1330+ return DataFrame .from_dict (
13471331 ujson_loads (json , precise_float = self .precise_float ),
13481332 dtype = None ,
13491333 orient = "index" ,
13501334 )
13511335 elif orient == "table" :
1352- self . obj = parse_table_schema (json , precise_float = self .precise_float )
1336+ return parse_table_schema (json , precise_float = self .precise_float )
13531337 else :
1354- self .obj = DataFrame (
1338+ # includes orient == "columns"
1339+ return DataFrame (
13551340 ujson_loads (json , precise_float = self .precise_float ), dtype = None
13561341 )
13571342
1358- def _process_converter (
1359- self ,
1360- f : Callable [[Hashable , Series ], tuple [Series , bool ]],
1361- filt : Callable [[Hashable ], bool ] | None = None ,
1362- ) -> None :
1363- """
1364- Take a conversion function and possibly recreate the frame.
1365- """
1366- if filt is None :
1367- filt = lambda col : True
1368-
1369- obj = self .obj
1370- assert obj is not None # for mypy
1371-
1372- needs_new_obj = False
1373- new_obj = {}
1374- for i , (col , c ) in enumerate (obj .items ()):
1375- if filt (col ):
1376- new_data , result = f (col , c )
1377- if result :
1378- c = new_data
1379- needs_new_obj = True
1380- new_obj [i ] = c
1381-
1382- if needs_new_obj :
1383- # possibly handle dup columns
1384- new_frame = DataFrame (new_obj , index = obj .index )
1385- new_frame .columns = obj .columns
1386- self .obj = new_frame
1387-
1388- def _try_convert_types (self ) -> None :
1389- if self .obj is None :
1390- return
1391- if self .convert_dates :
1392- self ._try_convert_dates ()
1393-
1394- self ._process_converter (
1395- lambda col , c : self ._try_convert_data (col , c , convert_dates = False )
1343+ def _try_convert_types (self , obj : DataFrame ) -> DataFrame :
1344+ arrays = []
1345+ for col_label , series in obj .items ():
1346+ result , _ = self ._try_convert_data (
1347+ col_label ,
1348+ series ,
1349+ convert_dates = _should_convert_dates (
1350+ self .convert_dates ,
1351+ keep_default_dates = self .keep_default_dates ,
1352+ col = col_label ,
1353+ ),
1354+ )
1355+ arrays .append (result .array )
1356+ return DataFrame ._from_arrays (
1357+ arrays , obj .columns , obj .index , verify_integrity = False
13961358 )
13971359
1398- def _try_convert_dates (self ) -> None :
1399- if self .obj is None :
1400- return
1401-
1402- # our columns to parse
1403- convert_dates_list_bool = self .convert_dates
1404- if isinstance (convert_dates_list_bool , bool ):
1405- convert_dates_list_bool = []
1406- convert_dates = set (convert_dates_list_bool )
1407-
1408- def is_ok (col ) -> bool :
1409- """
1410- Return if this col is ok to try for a date parse.
1411- """
1412- if col in convert_dates :
1413- return True
1414- if not self .keep_default_dates :
1415- return False
1416- if not isinstance (col , str ):
1417- return False
1418-
1419- col_lower = col .lower ()
1420- if (
1421- col_lower .endswith (("_at" , "_time" ))
1422- or col_lower == "modified"
1423- or col_lower == "date"
1424- or col_lower == "datetime"
1425- or col_lower .startswith ("timestamp" )
1426- ):
1427- return True
1428- return False
14291360
1430- self ._process_converter (lambda col , c : self ._try_convert_to_date (c ), filt = is_ok )
1361+ def _should_convert_dates (
1362+ convert_dates : bool | list [str ],
1363+ keep_default_dates : bool ,
1364+ col : Hashable ,
1365+ ) -> bool :
1366+ """
1367+ Return bool whether a DataFrame column should be cast to datetime.
1368+ """
1369+ if convert_dates is False :
1370+ # convert_dates=True means follow keep_default_dates
1371+ return False
1372+ elif not isinstance (convert_dates , bool ) and col in set (convert_dates ):
1373+ return True
1374+ elif not keep_default_dates :
1375+ return False
1376+ elif not isinstance (col , str ):
1377+ return False
1378+ col_lower = col .lower ()
1379+ if (
1380+ col_lower .endswith (("_at" , "_time" ))
1381+ or col_lower in {"modified" , "date" , "datetime" }
1382+ or col_lower .startswith ("timestamp" )
1383+ ):
1384+ return True
1385+ return False
0 commit comments