|
19 | 19 | import pandas.core.algorithms as algos |
20 | 20 | import pandas.core.common as com |
21 | 21 | from pandas.core.common import(_possibly_downcast_to_dtype, isnull, |
22 | | - notnull, _DATELIKE_DTYPES) |
| 22 | + notnull, _DATELIKE_DTYPES, is_numeric_dtype, |
| 23 | + is_timedelta64_dtype, is_datetime64_dtype) |
23 | 24 |
|
24 | 25 | import pandas.lib as lib |
| 26 | +from pandas.lib import Timestamp |
25 | 27 | import pandas.algos as _algos |
26 | 28 | import pandas.hashtable as _hash |
27 | 29 |
|
@@ -257,6 +259,16 @@ def indices(self): |
257 | 259 | """ dict {group name -> group indices} """ |
258 | 260 | return self.grouper.indices |
259 | 261 |
|
| 262 | + def _get_index(self, name): |
| 263 | + """ safe get index """ |
| 264 | + try: |
| 265 | + return self.indices[name] |
| 266 | + except: |
| 267 | + if isinstance(name, Timestamp): |
| 268 | + name = name.value |
| 269 | + return self.indices[name] |
| 270 | + raise |
| 271 | + |
260 | 272 | @property |
261 | 273 | def name(self): |
262 | 274 | if self._selection is None: |
@@ -350,7 +362,7 @@ def get_group(self, name, obj=None): |
350 | 362 | if obj is None: |
351 | 363 | obj = self.obj |
352 | 364 |
|
353 | | - inds = self.indices[name] |
| 365 | + inds = self._get_index(name) |
354 | 366 | return obj.take(inds, axis=self.axis, convert=False) |
355 | 367 |
|
356 | 368 | def __iter__(self): |
@@ -676,7 +688,7 @@ def _try_cast(self, result, obj): |
676 | 688 | def _cython_agg_general(self, how, numeric_only=True): |
677 | 689 | output = {} |
678 | 690 | for name, obj in self._iterate_slices(): |
679 | | - is_numeric = _is_numeric_dtype(obj.dtype) |
| 691 | + is_numeric = is_numeric_dtype(obj.dtype) |
680 | 692 | if numeric_only and not is_numeric: |
681 | 693 | continue |
682 | 694 |
|
@@ -714,7 +726,7 @@ def _python_agg_general(self, func, *args, **kwargs): |
714 | 726 |
|
715 | 727 | # since we are masking, make sure that we have a float object |
716 | 728 | values = result |
717 | | - if _is_numeric_dtype(values.dtype): |
| 729 | + if is_numeric_dtype(values.dtype): |
718 | 730 | values = com.ensure_float(values) |
719 | 731 |
|
720 | 732 | output[name] = self._try_cast(values[mask], result) |
@@ -1080,7 +1092,7 @@ def aggregate(self, values, how, axis=0): |
1080 | 1092 | raise NotImplementedError |
1081 | 1093 | out_shape = (self.ngroups,) + values.shape[1:] |
1082 | 1094 |
|
1083 | | - if _is_numeric_dtype(values.dtype): |
| 1095 | + if is_numeric_dtype(values.dtype): |
1084 | 1096 | values = com.ensure_float(values) |
1085 | 1097 | is_numeric = True |
1086 | 1098 | else: |
@@ -1474,6 +1486,15 @@ def __init__(self, index, grouper=None, name=None, level=None, |
1474 | 1486 | self.grouper = None # Try for sanity |
1475 | 1487 | raise AssertionError(errmsg) |
1476 | 1488 |
|
| 1489 | + # if we have a date/time-like grouper, make sure that we have Timestamps like |
| 1490 | + if getattr(self.grouper,'dtype',None) is not None: |
| 1491 | + if is_datetime64_dtype(self.grouper): |
| 1492 | + from pandas import to_datetime |
| 1493 | + self.grouper = to_datetime(self.grouper) |
| 1494 | + elif is_timedelta64_dtype(self.grouper): |
| 1495 | + from pandas import to_timedelta |
| 1496 | + self.grouper = to_timedelta(self.grouper) |
| 1497 | + |
1477 | 1498 | def __repr__(self): |
1478 | 1499 | return 'Grouping(%s)' % self.name |
1479 | 1500 |
|
@@ -1821,7 +1842,7 @@ def transform(self, func, *args, **kwargs): |
1821 | 1842 | # need to do a safe put here, as the dtype may be different |
1822 | 1843 | # this needs to be an ndarray |
1823 | 1844 | result = Series(result) |
1824 | | - result.iloc[self.indices[name]] = res |
| 1845 | + result.iloc[self._get_index(name)] = res |
1825 | 1846 | result = result.values |
1826 | 1847 |
|
1827 | 1848 | # downcast if we can (and need) |
@@ -1860,7 +1881,7 @@ def true_and_notnull(x, *args, **kwargs): |
1860 | 1881 | return b and notnull(b) |
1861 | 1882 |
|
1862 | 1883 | try: |
1863 | | - indices = [self.indices[name] if true_and_notnull(group) else [] |
| 1884 | + indices = [self._get_index(name) if true_and_notnull(group) else [] |
1864 | 1885 | for name, group in self] |
1865 | 1886 | except ValueError: |
1866 | 1887 | raise TypeError("the filter must return a boolean result") |
@@ -1921,7 +1942,7 @@ def _cython_agg_blocks(self, how, numeric_only=True): |
1921 | 1942 | for block in data.blocks: |
1922 | 1943 | values = block.values |
1923 | 1944 |
|
1924 | | - is_numeric = _is_numeric_dtype(values.dtype) |
| 1945 | + is_numeric = is_numeric_dtype(values.dtype) |
1925 | 1946 |
|
1926 | 1947 | if numeric_only and not is_numeric: |
1927 | 1948 | continue |
@@ -2412,7 +2433,7 @@ def filter(self, func, dropna=True, *args, **kwargs): |
2412 | 2433 | res = path(group) |
2413 | 2434 |
|
2414 | 2435 | def add_indices(): |
2415 | | - indices.append(self.indices[name]) |
| 2436 | + indices.append(self._get_index(name)) |
2416 | 2437 |
|
2417 | 2438 | # interpret the result of the filter |
2418 | 2439 | if isinstance(res, (bool, np.bool_)): |
@@ -2973,12 +2994,6 @@ def _reorder_by_uniques(uniques, labels): |
2973 | 2994 | } |
2974 | 2995 |
|
2975 | 2996 |
|
2976 | | -def _is_numeric_dtype(dt): |
2977 | | - typ = dt.type |
2978 | | - return (issubclass(typ, (np.number, np.bool_)) |
2979 | | - and not issubclass(typ, (np.datetime64, np.timedelta64))) |
2980 | | - |
2981 | | - |
2982 | 2997 | def _intercept_function(func): |
2983 | 2998 | return _func_table.get(func, func) |
2984 | 2999 |
|
|
0 commit comments