Skip to content

Commit 723cba8

Browse files
committed
Merge branch 'main' into td-string-parse
2 parents 8f9fca0 + fb517ba commit 723cba8

File tree

23 files changed

+195
-134
lines changed

23 files changed

+195
-134
lines changed

pandas/_libs/lib.pyx

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ from pandas._libs.tslibs.nattype cimport (
106106
)
107107
from pandas._libs.tslibs.offsets cimport is_offset_object
108108
from pandas._libs.tslibs.period cimport is_period_object
109-
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
110109
from pandas._libs.tslibs.timezones cimport tz_compare
111110

112111
# constants that will be compared to potentially arbitrarily large
@@ -2674,11 +2673,6 @@ def maybe_convert_objects(ndarray[object] objects,
26742673
elif is_timedelta(val):
26752674
if convert_non_numeric:
26762675
seen.timedelta_ = True
2677-
try:
2678-
convert_to_timedelta64(val, "ns")
2679-
except OutOfBoundsTimedelta:
2680-
seen.object_ = True
2681-
break
26822676
break
26832677
else:
26842678
seen.object_ = True

pandas/_libs/tslibs/conversion.pxd

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1
4545

4646
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
4747
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
48-
cdef (int64_t, int) precision_from_unit(
49-
NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
50-
)
5148

5249
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
5350

pandas/_libs/tslibs/nattype.pyx

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1260,7 +1260,9 @@ class NaTType(_NaT):
12601260
Return new Timestamp object representing current time local to tz.
12611261
12621262
This method returns a new `Timestamp` object that represents the current time.
1263-
If a timezone is provided, the current time will be localized to that timezone.
1263+
If a timezone is provided, either through a timezone object or an IANA
1264+
standard timezone identifier, the current time will be localized to that
1265+
timezone.
12641266
Otherwise, it returns the current local time.
12651267
12661268
Parameters
@@ -1279,6 +1281,11 @@ class NaTType(_NaT):
12791281
>>> pd.Timestamp.now() # doctest: +SKIP
12801282
Timestamp('2020-11-16 22:06:16.378782')
12811283
1284+
If you want a specific timezone, in this case 'Brazil/East':
1285+
1286+
>>> pd.Timestamp.now('Brazil/East') # doctest: +SKIP
1287+
Timestamp('2025-11-11 22:17:59.609943-03:00)
1288+
12821289
Analogous for ``pd.NaT``:
12831290
12841291
>>> pd.NaT.now()

pandas/_libs/tslibs/timedeltas.pxd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1
99
cpdef int64_t delta_to_nanoseconds(
1010
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*
1111
) except? -1
12-
cdef convert_to_timedelta64(object ts, str unit)
1312
cdef bint is_any_td_scalar(object obj)
1413

1514

pandas/_libs/tslibs/timedeltas.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def array_to_timedelta64(
7171
values: npt.NDArray[np.object_],
7272
unit: str | None = ...,
7373
errors: str = ...,
74+
creso: int = ...,
7475
) -> np.ndarray: ... # np.ndarray[m8ns]
7576
def parse_timedelta_unit(unit: str | None) -> UnitChoices: ...
7677
def delta_to_nanoseconds(

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 114 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ from pandas._libs.missing cimport checknull_with_nat_and_na
4242
from pandas._libs.tslibs.base cimport ABCTimestamp
4343
from pandas._libs.tslibs.conversion cimport (
4444
cast_from_unit,
45-
precision_from_unit,
4645
)
4746
from pandas._libs.tslibs.dtypes cimport (
4847
c_DEPR_UNITS,
@@ -291,68 +290,6 @@ cpdef int64_t delta_to_nanoseconds(
291290
) from err
292291

293292

294-
@cython.overflowcheck(True)
295-
cdef object ensure_td64ns(object ts):
296-
"""
297-
Overflow-safe implementation of td64.astype("m8[ns]")
298-
299-
Parameters
300-
----------
301-
ts : np.timedelta64
302-
303-
Returns
304-
-------
305-
np.timedelta64[ns]
306-
"""
307-
cdef:
308-
NPY_DATETIMEUNIT td64_unit
309-
int64_t td64_value, mult
310-
311-
td64_unit = get_datetime64_unit(ts)
312-
if (
313-
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
314-
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
315-
):
316-
317-
td64_value = cnp.get_timedelta64_value(ts)
318-
319-
mult = precision_from_unit(td64_unit)[0]
320-
try:
321-
# NB: cython#1381 this cannot be *=
322-
td64_value = td64_value * mult
323-
except OverflowError as err:
324-
raise OutOfBoundsTimedelta(ts) from err
325-
326-
return np.timedelta64(td64_value, "ns")
327-
328-
return ts
329-
330-
331-
cdef convert_to_timedelta64(object ts, str unit):
332-
"""
333-
Convert an incoming object to a timedelta64 if possible.
334-
Before calling, unit must be standardized to avoid repeated unit conversion
335-
336-
Handle these types of objects:
337-
- timedelta/Timedelta
338-
339-
Return a timedelta64[ns] object
340-
"""
341-
# Caller is responsible for checking unit not in ["Y", "y", "M"]
342-
if isinstance(ts, _Timedelta):
343-
# already in the proper format
344-
if ts._creso != NPY_FR_ns:
345-
ts = ts.as_unit("ns").asm8
346-
else:
347-
ts = np.timedelta64(ts._value, "ns")
348-
349-
elif PyDelta_Check(ts):
350-
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
351-
elif not cnp.is_timedelta64_object(ts):
352-
raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}")
353-
return ts.astype("timedelta64[ns]")
354-
355-
356293
cdef _numeric_to_td64ns(object item, str unit):
357294
# caller is responsible for checking
358295
# assert unit not in ["Y", "y", "M"]
@@ -371,10 +308,34 @@ cdef _numeric_to_td64ns(object item, str unit):
371308
return ts
372309

373310

311+
# TODO: de-duplicate with DatetimeParseState
312+
cdef class ResoState:
313+
cdef:
314+
NPY_DATETIMEUNIT creso
315+
bint creso_ever_changed
316+
317+
def __cinit__(self, NPY_DATETIMEUNIT creso):
318+
self.creso = creso
319+
self.creso_ever_changed = False
320+
321+
cdef bint update_creso(self, NPY_DATETIMEUNIT item_reso) noexcept:
322+
# Return a bool indicating whether we bumped to a higher resolution
323+
if self.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
324+
self.creso = item_reso
325+
elif item_reso > self.creso:
326+
self.creso = item_reso
327+
self.creso_ever_changed = True
328+
return True
329+
return False
330+
331+
374332
@cython.boundscheck(False)
375333
@cython.wraparound(False)
376334
def array_to_timedelta64(
377-
ndarray values, str unit=None, str errors="raise"
335+
ndarray values,
336+
str unit=None,
337+
str errors="raise",
338+
NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_GENERIC,
378339
) -> ndarray:
379340
# values is object-dtype, may be 2D
380341
"""
@@ -396,6 +357,10 @@ def array_to_timedelta64(
396357
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
397358
cnp.flatiter it
398359
str parsed_unit = parse_timedelta_unit(unit or "ns")
360+
NPY_DATETIMEUNIT item_reso
361+
ResoState state = ResoState(creso)
362+
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
363+
ndarray iresult = result.view("i8")
399364

400365
if values.descr.type_num != cnp.NPY_OBJECT:
401366
# raise here otherwise we segfault below
@@ -423,18 +388,58 @@ def array_to_timedelta64(
423388
ival = NPY_NAT
424389

425390
elif cnp.is_timedelta64_object(item):
426-
td64ns_obj = ensure_td64ns(item)
427-
ival = cnp.get_timedelta64_value(td64ns_obj)
391+
# TODO: de-duplicate this with Timedelta.__new__
392+
ival = cnp.get_timedelta64_value(item)
393+
dt64_reso = get_datetime64_unit(item)
394+
if not (
395+
is_supported_unit(dt64_reso) or
396+
dt64_reso in [
397+
NPY_DATETIMEUNIT.NPY_FR_m,
398+
NPY_DATETIMEUNIT.NPY_FR_h,
399+
NPY_DATETIMEUNIT.NPY_FR_D,
400+
NPY_DATETIMEUNIT.NPY_FR_W,
401+
NPY_DATETIMEUNIT.NPY_FR_GENERIC
402+
]
403+
):
404+
err = npy_unit_to_abbrev(dt64_reso)
405+
raise ValueError(
406+
f"Unit {err} is not supported. "
407+
"Only unambiguous timedelta values durations are supported. "
408+
"Allowed units are 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns'")
409+
410+
item_reso = get_supported_reso(dt64_reso)
411+
state.update_creso(item_reso)
412+
if infer_reso:
413+
creso = state.creso
414+
if dt64_reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
415+
try:
416+
ival = convert_reso(
417+
ival,
418+
dt64_reso,
419+
creso,
420+
round_ok=True,
421+
)
422+
except (OverflowError, OutOfBoundsDatetime) as err:
423+
raise OutOfBoundsTimedelta(item) from err
424+
else:
425+
# e.g. NaT
426+
pass
428427

429428
elif isinstance(item, _Timedelta):
430-
if item._creso != NPY_FR_ns:
431-
ival = item.as_unit("ns")._value
432-
else:
433-
ival = item._value
429+
item_reso = item._creso
430+
state.update_creso(item_reso)
431+
if infer_reso:
432+
creso = state.creso
433+
434+
ival = (<_Timedelta>item)._as_creso(creso)._value
434435

435436
elif PyDelta_Check(item):
436437
# i.e. isinstance(item, timedelta)
437-
ival = delta_to_nanoseconds(item)
438+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
439+
state.update_creso(item_reso)
440+
if infer_reso:
441+
creso = state.creso
442+
ival = delta_to_nanoseconds(item, reso=creso)
438443

439444
elif isinstance(item, str):
440445
if (
@@ -445,13 +450,27 @@ def array_to_timedelta64(
445450
else:
446451
ival = parse_timedelta_string(item)
447452

453+
item_reso = NPY_FR_ns
454+
state.update_creso(item_reso)
455+
if infer_reso:
456+
creso = state.creso
457+
448458
elif is_tick_object(item):
449-
ival = item.nanos
459+
item_reso = get_supported_reso(item._creso)
460+
state.update_creso(item_reso)
461+
if infer_reso:
462+
creso = state.creso
463+
ival = delta_to_nanoseconds(item, reso=creso)
450464

451465
elif is_integer_object(item) or is_float_object(item):
452466
td64ns_obj = _numeric_to_td64ns(item, parsed_unit)
453467
ival = cnp.get_timedelta64_value(td64ns_obj)
454468

469+
item_reso = NPY_FR_ns
470+
state.update_creso(item_reso)
471+
if infer_reso:
472+
creso = state.creso
473+
455474
else:
456475
raise TypeError(f"Invalid type for timedelta scalar: {type(item)}")
457476

@@ -469,6 +488,29 @@ def array_to_timedelta64(
469488

470489
cnp.PyArray_MultiIter_NEXT(mi)
471490

491+
if infer_reso:
492+
if state.creso_ever_changed:
493+
# We encountered mismatched resolutions, need to re-parse with
494+
# the correct one.
495+
return array_to_timedelta64(
496+
values,
497+
unit=unit,
498+
errors=errors,
499+
creso=state.creso,
500+
)
501+
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
502+
# i.e. we never encountered anything non-NaT, default to "s". This
503+
# ensures that insert and concat-like operations with NaT
504+
# do not upcast units
505+
result = iresult.view("m8[s]")
506+
else:
507+
# Otherwise we can use the single reso that we encountered and avoid
508+
# a second pass.
509+
abbrev = npy_unit_to_abbrev(state.creso)
510+
result = iresult.view(f"m8[{abbrev}]")
511+
else:
512+
abbrev = npy_unit_to_abbrev(creso)
513+
result = result.view(f"m8[{abbrev}]")
472514
return result
473515

474516

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1917,7 +1917,9 @@ class Timestamp(_Timestamp):
19171917
Return new Timestamp object representing current time local to tz.
19181918

19191919
This method returns a new `Timestamp` object that represents the current time.
1920-
If a timezone is provided, the current time will be localized to that timezone.
1920+
If a timezone is provided, either through a timezone object or an IANA
1921+
standard timezone identifier, the current time will be localized to that
1922+
timezone.
19211923
Otherwise, it returns the current local time.
19221924

19231925
Parameters
@@ -1936,6 +1938,11 @@ class Timestamp(_Timestamp):
19361938
>>> pd.Timestamp.now() # doctest: +SKIP
19371939
Timestamp('2020-11-16 22:06:16.378782')
19381940

1941+
If you want a specific timezone, in this case 'Brazil/East':
1942+
1943+
>>> pd.Timestamp.now('Brazil/East') # doctest: +SKIP
1944+
Timestamp('2025-11-11 22:17:59.609943-03:00)
1945+
19391946
Analogous for ``pd.NaT``:
19401947

19411948
>>> pd.NaT.now()

pandas/core/arrays/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ def _objects_to_td64ns(
12511251
values = np.asarray(data, dtype=np.object_)
12521252

12531253
result = array_to_timedelta64(values, unit=unit, errors=errors)
1254-
return result.view("timedelta64[ns]")
1254+
return result
12551255

12561256

12571257
def _validate_td64_dtype(dtype) -> DtypeObj:

pandas/tests/arithmetic/test_timedelta64.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ def test_tdi_add_overflow(self):
736736
)
737737

738738
# These should not overflow!
739-
exp = TimedeltaIndex([NaT])
739+
exp = TimedeltaIndex([NaT], dtype="m8[ns]")
740740
result = pd.to_timedelta([NaT]) - Timedelta("1 days")
741741
tm.assert_index_equal(result, exp)
742742

@@ -2235,7 +2235,7 @@ def test_float_series_rdiv_td64arr(self, box_with_array, names):
22352235

22362236
def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array):
22372237
# GH#39750 make sure we infer the result as td64
2238-
tdi = TimedeltaIndex([NaT, NaT])
2238+
tdi = TimedeltaIndex([NaT, NaT], dtype="m8[ns]")
22392239

22402240
left = tm.box_expected(tdi, box_with_array)
22412241
right = np.array([2, 2.0], dtype=object)

pandas/tests/dtypes/test_inference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,7 @@ def test_maybe_convert_objects_datetime(self):
808808
tm.assert_numpy_array_equal(out, exp)
809809

810810
arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
811-
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
811+
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[s]")
812812
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
813813
tm.assert_numpy_array_equal(out, exp)
814814

@@ -863,7 +863,7 @@ def test_maybe_convert_objects_datetime_overflow_safe(self, dtype):
863863
if dtype == "datetime64[ns]":
864864
expected = np.array(["2363-10-04"], dtype="M8[us]")
865865
else:
866-
expected = arr
866+
expected = arr.astype("m8[us]")
867867
tm.assert_numpy_array_equal(out, expected)
868868

869869
def test_maybe_convert_objects_mixed_datetimes(self):

0 commit comments

Comments
 (0)