Skip to content

Commit e4f3fa8

Browse files
committed
BUG: address #590 and more concat tests with hierarchical index
1 parent 4b6b49f commit e4f3fa8

File tree

5 files changed

+110
-46
lines changed

5 files changed

+110
-46
lines changed

pandas/core/common.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ def _stringify(col):
512512
else:
513513
return '%s' % col
514514

515-
def _float_format_default(v, width = None):
515+
def _float_format_default(v, width=None):
516516
"""
517517
Take a float and its formatted representation and if it needs extra space
518518
to fit the width, reformat it to that width.
@@ -565,30 +565,41 @@ def _float_format_default(v, width = None):
565565

566566
return fmt_str % v
567567

568-
def _format(s, space=None, na_rep=None, float_format=None, col_width=None):
568+
def _format(s, dtype, space=None, na_rep=None, float_format=None,
569+
col_width=None):
569570
def _just_help(x):
570571
if space is None:
571572
return x
572573
return x[:space].ljust(space)
573574

574-
if isinstance(s, float):
575-
if na_rep is not None and isnull(s):
576-
if np.isnan(s):
577-
s = na_rep
578-
return _just_help('%s' % s)
575+
def _make_float_format(x):
576+
if na_rep is not None and isnull(x):
577+
if np.isnan(x):
578+
x = ' ' + na_rep
579+
return _just_help('%s' % x)
579580

580581
if float_format:
581-
formatted = float_format(s)
582+
formatted = float_format(x)
582583
elif _float_format:
583-
formatted = _float_format(s)
584+
formatted = _float_format(x)
584585
else:
585-
formatted = _float_format_default(s, col_width)
586+
formatted = _float_format_default(x, col_width)
586587

587588
return _just_help(formatted)
588-
elif isinstance(s, int):
589-
return _just_help('% d' % s)
589+
590+
def _make_int_format(x):
591+
return _just_help('% d' % x)
592+
593+
if is_float_dtype(dtype):
594+
return _make_float_format(s)
595+
elif is_integer_dtype(dtype):
596+
return _make_int_format(s)
590597
else:
591-
return _just_help('%s' % _stringify(s))
598+
if na_rep is not None and lib.checknull(s):
599+
return na_rep
600+
else:
601+
# object dtype
602+
return _just_help('%s' % _stringify(s))
592603

593604
#------------------------------------------------------------------------------
594605
# miscellaneous python tools
@@ -727,11 +738,19 @@ def is_integer(obj):
727738
def is_float(obj):
728739
return isinstance(obj, (float, np.floating))
729740

730-
def is_integer_dtype(arr):
731-
return issubclass(arr.dtype.type, np.integer)
741+
def is_integer_dtype(arr_or_dtype):
742+
if isinstance(arr_or_dtype, np.dtype):
743+
tipo = arr_or_dtype.type
744+
else:
745+
tipo = arr_or_dtype.dtype.type
746+
return issubclass(tipo, np.integer)
732747

733-
def is_float_dtype(arr):
734-
return issubclass(arr.dtype.type, np.floating)
748+
def is_float_dtype(arr_or_dtype):
749+
if isinstance(arr_or_dtype, np.dtype):
750+
tipo = arr_or_dtype.type
751+
else:
752+
tipo = arr_or_dtype.dtype.type
753+
return issubclass(tipo, np.floating)
735754

736755
def save(obj, path):
737756
"""

pandas/core/format.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,15 @@ def to_string(self):
9999

100100
self.buf.writelines(to_write)
101101

102-
def _default_col_formatter(self, v, col_width=None):
102+
def _get_col_formatter(self, dtype):
103103
from pandas.core.common import _format
104104

105-
return _format(v, space=self.col_space, na_rep=self.na_rep,
106-
float_format=self.float_format, col_width=col_width)
105+
def formatter(x, col_width=None):
106+
return _format(x, dtype, space=self.col_space,
107+
na_rep=self.na_rep,
108+
float_format=self.float_format,
109+
col_width=col_width)
110+
return formatter
107111

108112
def _format_col(self, col, i=None):
109113
if self.formatters is None:
@@ -117,7 +121,8 @@ def _format_col(self, col, i=None):
117121
else:
118122
return formatter(self.frame[col][i])
119123
else:
120-
formatter = self._default_col_formatter
124+
dtype = self.frame[col].dtype
125+
formatter = self._get_col_formatter(dtype)
121126

122127
if i is not None:
123128
return formatter(self.frame[col][i])

pandas/tests/test_frame.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,18 +1567,18 @@ def test_repr(self):
15671567
self.frame.reindex(columns=['A', 'B']).info(verbose=False, buf=buf)
15681568

15691569
# big one
1570-
biggie = DataFrame(np.zeros((1000, 4)), columns=range(4),
1571-
index=range(1000))
1570+
biggie = DataFrame(np.zeros((200, 4)), columns=range(4),
1571+
index=range(200))
15721572
foo = repr(biggie)
15731573

15741574
# mixed
15751575
foo = repr(self.mixed_frame)
15761576
self.mixed_frame.info(verbose=False, buf=buf)
15771577

15781578
# big mixed
1579-
biggie = DataFrame({'A' : randn(1000),
1580-
'B' : tm.makeStringIndex(1000)},
1581-
index=range(1000))
1579+
biggie = DataFrame({'A' : randn(200),
1580+
'B' : tm.makeStringIndex(200)},
1581+
index=range(200))
15821582
biggie['A'][:20] = nan
15831583
biggie['B'][:20] = nan
15841584

@@ -1675,9 +1675,9 @@ def test_to_string(self):
16751675
import re
16761676

16771677
# big mixed
1678-
biggie = DataFrame({'A' : randn(1000),
1679-
'B' : tm.makeStringIndex(1000)},
1680-
index=range(1000))
1678+
biggie = DataFrame({'A' : randn(200),
1679+
'B' : tm.makeStringIndex(200)},
1680+
index=range(200))
16811681

16821682
biggie['A'][:20] = nan
16831683
biggie['B'][:20] = nan
@@ -1717,7 +1717,7 @@ def test_to_string(self):
17171717
biggie.to_string(columns=['B', 'A'], col_space=12,
17181718
float_format=str)
17191719

1720-
frame = DataFrame(index=np.arange(1000))
1720+
frame = DataFrame(index=np.arange(200))
17211721
frame.to_string()
17221722

17231723
def test_to_string_no_header(self):
@@ -1747,10 +1747,10 @@ def test_to_string_float_formatting(self):
17471747

17481748
df_s = df.to_string()
17491749

1750-
expected = ' x \n0 0.000000\n1 0.250000\n' \
1751-
'2 3456.000\n3 1.20e+46\n4 1.64e+06\n' \
1752-
'5 1.70e+08\n6 1.253456\n7 3.141593\n' \
1753-
'8 -1.00e+06'
1750+
expected = (' x \n0 0.000000\n1 0.250000\n'
1751+
'2 3456.000\n3 1.20e+46\n4 1.64e+06\n'
1752+
'5 1.70e+08\n6 1.253456\n7 3.141593\n'
1753+
'8 -1.00e+06')
17541754
assert(df_s == expected)
17551755

17561756
df = DataFrame({'x' : [3234, 0.253]})
@@ -1766,11 +1766,24 @@ def test_to_string_float_formatting(self):
17661766
expected = ' x \n0 1.e+09\n1 0.2512'
17671767
assert(df_s == expected)
17681768

1769+
def test_to_string_format_na(self):
1770+
df = DataFrame({'A' : [np.nan, -1, -2.1234, 3, 4],
1771+
'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
1772+
result = df.to_string()
1773+
1774+
expected = (' A B \n'
1775+
'0 NaN NaN \n'
1776+
'1 -1.000 foo \n'
1777+
'2 -2.123 foooo \n'
1778+
'3 3.000 fooooo\n'
1779+
'4 4.000 bar ')
1780+
self.assertEqual(result, expected)
1781+
17691782
def test_to_html(self):
17701783
# big mixed
1771-
biggie = DataFrame({'A' : randn(1000),
1772-
'B' : tm.makeStringIndex(1000)},
1773-
index=range(1000))
1784+
biggie = DataFrame({'A' : randn(200),
1785+
'B' : tm.makeStringIndex(200)},
1786+
index=range(200))
17741787

17751788
biggie['A'][:20] = nan
17761789
biggie['B'][:20] = nan
@@ -1791,7 +1804,7 @@ def test_to_html(self):
17911804
biggie.to_html(columns=['B', 'A'], col_space=12,
17921805
float_format=str)
17931806

1794-
frame = DataFrame(index=np.arange(1000))
1807+
frame = DataFrame(index=np.arange(200))
17951808
frame.to_html()
17961809

17971810
def test_insert(self):

pandas/tools/merge.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -607,9 +607,17 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
607607
verify_integrity : boolean, default False
608608
Check whether the new concatenated axis contains duplicates. This can
609609
be very expensive relative to the actual data concatenation
610-
keys : sequence-like or list of sequences
611-
levels :
612-
names :
610+
keys : sequence, default None
611+
If multiple levels passed, should contain tuples
612+
levels : list of sequences, default None
613+
Specific levels (unique values) to use for constructing a
614+
MultiIndex. Otherwise they will be inferred from the keys
615+
names : list, default None
616+
Names for the levels in the resulting hierarchical index
617+
618+
Notes
619+
-----
620+
The keys, levels, and names arguments are all optional
613621
614622
Returns
615623
-------
@@ -885,22 +893,24 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
885893
else:
886894
label_list.append(concat_index.values)
887895

888-
names.extend(_get_consensus_names(indexes))
896+
# also copies
897+
names = names + _get_consensus_names(indexes)
889898

890899
return MultiIndex.from_arrays(label_list, names=names)
891900

892901
new_index = indexes[0]
893902
n = len(new_index)
894903

895-
names.append(indexes[0].name)
904+
# also copies
905+
names = names + [indexes[0].name]
896906

897907
if levels is None:
898908
if single_level:
899909
new_levels = [_ensure_index(keys)]
900910
else:
901-
new_levels = [_ensure_index(k) for k in keys]
911+
new_levels = [Factor(zp).level for zp in zipped]
902912
else:
903-
new_levels = list(levels)
913+
new_levels = [_ensure_index(x) for x in levels]
904914

905915
# do something a bit more speedy
906916
new_levels.append(new_index)

pandas/tools/tests/test_merge.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,24 @@ def test_concat_with_group_keys(self):
778778
tm.assert_frame_equal(result, expected)
779779

780780
def test_concat_keys_and_levels(self):
781-
pass
781+
df = DataFrame(np.random.randn(1, 3))
782+
df2 = DataFrame(np.random.randn(1, 4))
783+
784+
levels = [['foo', 'baz'], ['one', 'two']]
785+
names = ['first', 'second']
786+
result = concat([df, df2, df, df2],
787+
keys=[('foo', 'one'), ('foo', 'two'),
788+
('baz', 'one'), ('baz', 'two')],
789+
levels=levels,
790+
names=names)
791+
expected = concat([df, df2, df, df2])
792+
exp_index = MultiIndex(levels=levels + [[0]],
793+
labels=[[0, 0, 1, 1], [0, 1, 0, 1],
794+
[0, 0, 0, 0]],
795+
names=names + [None])
796+
expected.index = exp_index
797+
798+
assert_frame_equal(result, expected)
782799

783800
def test_crossed_dtypes_weird_corner(self):
784801
columns = ['A', 'B', 'C', 'D']

0 commit comments

Comments
 (0)