Skip to content

Commit 739950a

Browse files
committed
BUG: HDFStore - missing implementation of bool columns in selection
1 parent b0ee363 commit 739950a

File tree

2 files changed

+53
-25
lines changed

2 files changed

+53
-25
lines changed

pandas/io/pytables.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,8 @@ def set_kind(self):
10761076
self.kind = 'integer'
10771077
elif self.dtype.startswith('date'):
10781078
self.kind = 'datetime'
1079+
elif self.dtype.startswith('bool'):
1080+
self.kind = 'bool'
10791081

10801082
def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs):
10811083
""" create and setup my atom from the block b """
@@ -3057,6 +3059,9 @@ def convert_value(self, v):
30573059
elif self.kind == 'float':
30583060
v = float(v)
30593061
return [v, v]
3062+
elif self.kind == 'bool':
3063+
v = bool(v)
3064+
return [v, v]
30603065
elif not isinstance(v, basestring):
30613066
return [str(v), None]
30623067

pandas/io/tests/test_pytables.py

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -953,19 +953,21 @@ def test_table_values_dtypes_roundtrip(self):
953953
assert df1.dtypes == store['df_f4'].dtypes
954954
assert df1.dtypes[0] == 'float32'
955955

956-
# check with mixed dtypes (but not multi float types)
957-
df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
956+
# check with mixed dtypes
957+
df1 = DataFrame(dict([ (c,Series(np.random.randn(5),dtype=c)) for c in
958+
['float32','float64','int32','int64','int16','int8'] ]))
958959
df1['string'] = 'foo'
959-
store.append('df_mixed_dtypes1', df1)
960-
assert (df1.dtypes == store['df_mixed_dtypes1'].dtypes).all() == True
961-
assert df1.dtypes[0] == 'float32'
962-
assert df1.dtypes[1] == 'object'
960+
df1['float322'] = 1.
961+
df1['float322'] = df1['float322'].astype('float32')
962+
df1['bool'] = df1['float32'] > 0
963963

964-
### this is not supported, e.g. mixed float32/float64 blocks ###
965-
#df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
966-
#df1['float64'] = 1.0
967-
#store.append('df_mixed_dtypes2', df1)
968-
#assert df1.dtypes == store['df_mixed_dtypes2'].dtypes).all() == True
964+
store.append('df_mixed_dtypes1', df1)
965+
result = store.select('df_mixed_dtypes1').get_dtype_counts()
966+
expected = Series({ 'float32' : 2, 'float64' : 1,'int32' : 1, 'bool' : 1,
967+
'int16' : 1, 'int8' : 1, 'int64' : 1, 'object' : 1 })
968+
result.sort()
969+
expected.sort()
970+
tm.assert_series_equal(result,expected)
969971

970972
def test_table_mixed_dtypes(self):
971973

@@ -1628,6 +1630,10 @@ def test_select(self):
16281630
expected = df[df.A > 0].reindex(columns=['C', 'D'])
16291631
tm.assert_frame_equal(expected, result)
16301632

1633+
def test_select_dtypes(self):
1634+
1635+
with ensure_clean(self.path) as store:
1636+
16311637
# with a Timestamp data column (GH #2637)
16321638
df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300)))
16331639
store.remove('df')
@@ -1636,6 +1642,37 @@ def test_select(self):
16361642
expected = df[df.ts >= Timestamp('2012-02-01')]
16371643
tm.assert_frame_equal(expected, result)
16381644

1645+
# bool columns
1646+
df = DataFrame(np.random.randn(5,2), columns =['A','B'])
1647+
df['object'] = 'foo'
1648+
df.ix[4:5,'object'] = 'bar'
1649+
df['bool'] = df['A'] > 0
1650+
store.remove('df')
1651+
store.append('df', df, data_columns = True)
1652+
result = store.select('df', Term('bool == True'), columns = ['A','bool'])
1653+
expected = df[df.bool == True].reindex(columns=['A','bool'])
1654+
tm.assert_frame_equal(expected, result)
1655+
1656+
result = store.select('df', Term('bool == 1'), columns = ['A','bool'])
1657+
tm.assert_frame_equal(expected, result)
1658+
1659+
# integer index
1660+
df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
1661+
store.append('df_int', df)
1662+
result = store.select(
1663+
'df_int', [Term("index<10"), Term("columns", "=", ["A"])])
1664+
expected = df.reindex(index=list(df.index)[0:10],columns=['A'])
1665+
tm.assert_frame_equal(expected, result)
1666+
1667+
# float index
1668+
df = DataFrame(dict(A=np.random.rand(
1669+
20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
1670+
store.append('df_float', df)
1671+
result = store.select(
1672+
'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])])
1673+
expected = df.reindex(index=list(df.index)[0:10],columns=['A'])
1674+
tm.assert_frame_equal(expected, result)
1675+
16391676
def test_panel_select(self):
16401677

16411678
wp = tm.makePanel()
@@ -1676,20 +1713,6 @@ def test_frame_select(self):
16761713
expected = df.ix[:, ['A']]
16771714
tm.assert_frame_equal(result, expected)
16781715

1679-
# other indicies for a frame
1680-
1681-
# integer
1682-
df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
1683-
store.append('df_int', df)
1684-
store.select(
1685-
'df_int', [Term("index<10"), Term("columns", "=", ["A"])])
1686-
1687-
df = DataFrame(dict(A=np.random.rand(
1688-
20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
1689-
store.append('df_float', df)
1690-
store.select(
1691-
'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])])
1692-
16931716
# invalid terms
16941717
df = tm.makeTimeDataFrame()
16951718
store.append('df_time', df)

0 commit comments

Comments
 (0)