Skip to content

Commit 489286c

Browse files
committed
BUG: Bug in groupby.get_group where a datetlike wasn't always accepted (5267)
1 parent c70b4ae commit 489286c

File tree

3 files changed

+63
-11
lines changed

3 files changed

+63
-11
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ Bug Fixes
365365
would only replace the first occurrence of a value (:issue:`6689`)
366366
- Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332)
367367
- Bug in `Series.__unicode__` when `max_rows` is `None` and the Series has more than 1000 rows. (:issue:`6863`)
368+
- Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`)
368369

369370
pandas 0.13.1
370371
-------------

pandas/core/groupby.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import types
22
from functools import wraps
33
import numpy as np
4+
import datetime
45

56
from pandas.compat import(
6-
zip, builtins, range, long, lrange, lzip, OrderedDict, callable
7+
zip, builtins, range, long, lrange, lzip,
8+
OrderedDict, callable
79
)
810
from pandas import compat
911

@@ -402,14 +404,32 @@ def indices(self):
402404
return self.grouper.indices
403405

404406
def _get_index(self, name):
405-
""" safe get index """
406-
try:
407-
return self.indices[name]
408-
except:
409-
if isinstance(name, Timestamp):
410-
name = name.value
411-
return self.indices[name]
412-
raise
407+
""" safe get index, translate keys for datelike to underlying repr """
408+
409+
def convert(key, s):
410+
# possibly convert to they actual key types
411+
# in the indices, could be a Timestamp or a np.datetime64
412+
413+
if isinstance(s, (Timestamp,datetime.datetime)):
414+
return Timestamp(key)
415+
elif isinstance(s, np.datetime64):
416+
return Timestamp(key).asm8
417+
return key
418+
419+
sample = list(self.indices)[0]
420+
if isinstance(sample, tuple):
421+
if not isinstance(name, tuple):
422+
raise ValueError("must supply a tuple to get_group with multiple grouping keys")
423+
if not len(name) == len(sample):
424+
raise ValueError("must supply a a same-length tuple to get_group with multiple grouping keys")
425+
426+
name = tuple([ convert(n, k) for n, k in zip(name,sample) ])
427+
428+
else:
429+
430+
name = convert(name, sample)
431+
432+
return self.indices[name]
413433

414434
@property
415435
def name(self):
@@ -554,7 +574,7 @@ def apply(self, func, *args, **kwargs):
554574
path. This can lead to unexpected behavior if func has
555575
side-effects, as they will take effect twice for the first
556576
group.
557-
577+
558578
559579
See also
560580
--------

pandas/tests/test_groupby.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
assert_series_equal, assert_almost_equal,
1818
assert_index_equal)
1919
from pandas.compat import(
20-
range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict
20+
range, long, lrange, StringIO, lmap, lzip, map,
21+
zip, builtins, OrderedDict
2122
)
2223
from pandas import compat
2324
from pandas.core.panel import Panel
@@ -479,6 +480,36 @@ def test_get_group(self):
479480
expected = wp.reindex(major=[x for x in wp.major_axis if x.month == 1])
480481
assert_panel_equal(gp, expected)
481482

483+
484+
# GH 5267
485+
# be datelike friendly
486+
df = DataFrame({'DATE' : pd.to_datetime(['10-Oct-2013', '10-Oct-2013', '10-Oct-2013',
487+
'11-Oct-2013', '11-Oct-2013', '11-Oct-2013']),
488+
'label' : ['foo','foo','bar','foo','foo','bar'],
489+
'VAL' : [1,2,3,4,5,6]})
490+
491+
g = df.groupby('DATE')
492+
key = list(g.groups)[0]
493+
result1 = g.get_group(key)
494+
result2 = g.get_group(Timestamp(key).to_datetime())
495+
result3 = g.get_group(str(Timestamp(key)))
496+
assert_frame_equal(result1,result2)
497+
assert_frame_equal(result1,result3)
498+
499+
g = df.groupby(['DATE','label'])
500+
501+
key = list(g.groups)[0]
502+
result1 = g.get_group(key)
503+
result2 = g.get_group((Timestamp(key[0]).to_datetime(),key[1]))
504+
result3 = g.get_group((str(Timestamp(key[0])),key[1]))
505+
assert_frame_equal(result1,result2)
506+
assert_frame_equal(result1,result3)
507+
508+
# must pass a same-length tuple with multiple keys
509+
self.assertRaises(ValueError, lambda : g.get_group('foo'))
510+
self.assertRaises(ValueError, lambda : g.get_group(('foo')))
511+
self.assertRaises(ValueError, lambda : g.get_group(('foo','bar','baz')))
512+
482513
def test_agg_apply_corner(self):
483514
# nothing to group, all NA
484515
grouped = self.ts.groupby(self.ts * np.nan)

0 commit comments

Comments
 (0)