From 1361fa4613fa566c350b83d51571090f31f076a4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 28 Jun 2019 09:42:44 -0500 Subject: [PATCH 1/8] Shorter truncated Series/DataFrame repr: introduce min_rows --- pandas/core/config_init.py | 9 +++++++++ pandas/core/frame.py | 11 +++++++---- pandas/core/series.py | 8 ++++++-- pandas/io/formats/format.py | 25 +++++++++++++++++-------- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 4409267147b65..c849c69267839 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -77,6 +77,13 @@ def use_numexpr_cb(key): correct auto-detection. """ +pc_min_rows_doc = """ +: int + The numbers of rows to show in a truncated view (when `max_rows` is + exceeded). Ignored when `max_rows` is set to None or 0. When set to + None, follows the value of `max_rows`. +""" + pc_max_cols_doc = """ : int If max_cols is exceeded, switch to truncate view. Depending on @@ -306,6 +313,8 @@ def is_terminal(): validator=is_instance_factory((int, type(None)))) cf.register_option('max_rows', 60, pc_max_rows_doc, validator=is_instance_factory([type(None), int])) + cf.register_option('min_rows', 10, pc_min_rows_doc, + validator=is_instance_factory([type(None), int])) cf.register_option('max_categories', 8, pc_max_categories_doc, validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fd2e1e3e41ced..a7732a60bdb17 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -620,14 +620,16 @@ def __repr__(self): return buf.getvalue() max_rows = get_option("display.max_rows") + min_rows = get_option("display.min_rows") max_cols = get_option("display.max_columns") show_dimensions = get_option("display.show_dimensions") if get_option("display.expand_frame_repr"): width, _ = console.get_console_size() else: width = None - self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, - line_width=width, show_dimensions=show_dimensions) + self.to_string(buf=buf, max_rows=max_rows, min_rows=min_rows, + max_cols=max_cols, line_width=width, + show_dimensions=show_dimensions) return buf.getvalue() @@ -665,8 +667,8 @@ def _repr_html_(self): def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, - max_rows=None, max_cols=None, show_dimensions=False, - decimal='.', line_width=None): + min_rows=None, max_rows=None, max_cols=None, + show_dimensions=False, decimal='.', line_width=None): """ Render a DataFrame to a console-friendly tabular output. %(shared_params)s @@ -695,6 +697,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True, sparsify=sparsify, justify=justify, index_names=index_names, header=header, index=index, + min_rows=min_rows, max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, diff --git a/pandas/core/series.py b/pandas/core/series.py index 730a96f5435a1..15d6110a48768 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1409,17 +1409,20 @@ def __repr__(self): width, height = get_terminal_size() max_rows = (height if get_option("display.max_rows") == 0 else get_option("display.max_rows")) + min_rows = (height if get_option("display.max_rows") == 0 else + get_option("display.min_rows")) show_dimensions = get_option("display.show_dimensions") self.to_string(buf=buf, name=self.name, dtype=self.dtype, - max_rows=max_rows, length=show_dimensions) + min_rows=min_rows, max_rows=max_rows, + length=show_dimensions) result = buf.getvalue() return result def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, index=True, length=False, dtype=False, name=False, - max_rows=None): + min_rows=None, max_rows=None): """ Render a string representation of the Series. @@ -1456,6 +1459,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, header=header, index=index, dtype=dtype, na_rep=na_rep, float_format=float_format, + min_rows=min_rows, max_rows=max_rows) result = formatter.to_string() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 152e9a2e9ab3d..2c383ed000989 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -159,7 +159,7 @@ class SeriesFormatter: def __init__(self, series, buf=None, length=True, header=True, index=True, na_rep='NaN', name=False, float_format=None, dtype=True, - max_rows=None): + min_rows=None, max_rows=None): self.series = series self.buf = buf if buf is not None else StringIO() self.name = name @@ -167,6 +167,7 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, self.header = header self.length = length self.index = index + self.min_rows = min_rows self.max_rows = max_rows if float_format is None: @@ -179,15 +180,19 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, def _chk_truncate(self): from pandas.core.reshape.concat import concat + min_rows = self.min_rows max_rows = self.max_rows + if max_rows and min_rows is None: + min_rows = max_rows + min_rows = min(min_rows, max_rows) truncate_v = max_rows and (len(self.series) > max_rows) series = self.series if truncate_v: - if max_rows == 1: - row_num = max_rows - series = series.iloc[:max_rows] + if min_rows == 1: + row_num = min_rows + series = series.iloc[:min_rows] else: - row_num = max_rows // 2 + row_num = min_rows // 2 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) self.tr_row_num = row_num @@ -390,9 +395,9 @@ class DataFrameFormatter(TableFormatter): def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, - index_names=True, line_width=None, max_rows=None, - max_cols=None, show_dimensions=False, decimal='.', - table_id=None, render_links=False, **kwds): + index_names=True, line_width=None, min_rows=None, + max_rows=None, max_cols=None, show_dimensions=False, + decimal='.', table_id=None, render_links=False, **kwds): self.frame = frame if buf is not None: self.buf = _expand_user(_stringify_path(buf)) @@ -413,6 +418,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.header = header self.index = index self.line_width = line_width + self.min_rows = min_rows self.max_rows = max_rows self.max_cols = max_cols self.max_rows_displayed = min(max_rows or len(self.frame), @@ -471,6 +477,9 @@ def _chk_truncate(self): max_rows = h if not hasattr(self, 'max_rows_adj'): + if max_rows: + if (len(self.frame) > max_rows) and self.min_rows: + max_rows = min(self.min_rows, max_rows) self.max_rows_adj = max_rows if not hasattr(self, 'max_cols_adj'): self.max_cols_adj = max_cols From b8f483e082f6524fbd372aeb63c653765108a1e6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 2 Jul 2019 23:30:03 -0400 Subject: [PATCH 2/8] add tests --- pandas/io/formats/format.py | 3 +- pandas/tests/io/formats/test_format.py | 56 ++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fb66dc3bc0352..b595ebbb771bd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -184,7 +184,8 @@ def _chk_truncate(self): max_rows = self.max_rows if max_rows and min_rows is None: min_rows = max_rows - min_rows = min(min_rows, max_rows) + if min_rows and max_rows: + min_rows = min(min_rows, max_rows) truncate_v = max_rows and (len(self.series) > max_rows) series = self.series if truncate_v: diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0eeb0e6eb2f2d..7098a382cad45 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -377,6 +377,34 @@ def mkframe(n): printing.pprint_thing(df._repr_fits_horizontal_()) assert has_expanded_repr(df) + def test_repr_min_rows(self): + df = pd.DataFrame({'a': range(20)}) + + # default setting no truncation even if above min_rows + assert '..' not in repr(df) + + df = pd.DataFrame({'a': range(61)}) + + # default of max_rows 60 triggers truncation if above + assert '..' in repr(df) + + with option_context('display.max_rows', 10, 'display.min_rows', 4): + # truncated after first two rows + assert '..' in repr(df) + assert '2 ' not in repr(df) + + with option_context('display.max_rows', 12, 'display.min_rows', None): + # when set to None, follow value of max_rows + assert '5 5' in repr(df) + + with option_context('display.max_rows', 10, 'display.min_rows', 12): + # when set value higher as max_rows, use the minimum + assert '5 5' not in repr(df) + + with option_context('display.max_rows', None, 'display.min_rows', 12): + # max_rows of None -> never truncate + assert '..' not in repr(df) + def test_str_max_colwidth(self): # GH 7856 df = pd.DataFrame([{'a': 'foo', @@ -2284,6 +2312,34 @@ def test_show_dimensions(self): "display.show_dimensions", False): assert 'Length' not in repr(s) + def test_repr_min_rows(self): + s = pd.Series(range(20)) + + # default setting no truncation even if above min_rows + assert '..' not in repr(s) + + s = pd.Series(range(61)) + + # default of max_rows 60 triggers truncation if above + assert '..' in repr(s) + + with option_context('display.max_rows', 10, 'display.min_rows', 4): + # truncated after first two rows + assert '..' in repr(s) + assert '2 ' not in repr(s) + + with option_context('display.max_rows', 12, 'display.min_rows', None): + # when set to None, follow value of max_rows + assert '5 5' in repr(s) + + with option_context('display.max_rows', 10, 'display.min_rows', 12): + # when set value higher as max_rows, use the minimum + assert '5 5' not in repr(s) + + with option_context('display.max_rows', None, 'display.min_rows', 12): + # max_rows of None -> never truncate + assert '..' not in repr(s) + def test_to_string_name(self): s = Series(range(100), dtype='int64') s.name = 'myser' From 98e7d437015d5a50bfd0a9d2e1add67201fc0679 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 2 Jul 2019 23:40:15 -0400 Subject: [PATCH 3/8] add some comments --- pandas/io/formats/format.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b595ebbb771bd..6f59240abca4c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -182,10 +182,14 @@ def _chk_truncate(self): from pandas.core.reshape.concat import concat min_rows = self.min_rows max_rows = self.max_rows - if max_rows and min_rows is None: + # if min_rows is None, follow value of max_rows + if max_rows and not min_rows: min_rows = max_rows + # if both are set, min_rows is minimum of both if min_rows and max_rows: min_rows = min(min_rows, max_rows) + # truncation determined by max_rows, actual truncated number of rows + # used below by min_rows truncate_v = max_rows and (len(self.series) > max_rows) series = self.series if truncate_v: @@ -480,6 +484,7 @@ def _chk_truncate(self): if not hasattr(self, 'max_rows_adj'): if max_rows: if (len(self.frame) > max_rows) and self.min_rows: + # if truncated, set max_rows showed to min_rows max_rows = min(self.min_rows, max_rows) self.max_rows_adj = max_rows if not hasattr(self, 'max_cols_adj'): From e4b2144862d8ddd717dd361b368074c3ce2fcf97 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Jul 2019 08:52:42 -0400 Subject: [PATCH 4/8] simplify --- pandas/io/formats/format.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6f59240abca4c..c2e1f88d5843a 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -182,22 +182,20 @@ def _chk_truncate(self): from pandas.core.reshape.concat import concat min_rows = self.min_rows max_rows = self.max_rows - # if min_rows is None, follow value of max_rows - if max_rows and not min_rows: - min_rows = max_rows - # if both are set, min_rows is minimum of both - if min_rows and max_rows: - min_rows = min(min_rows, max_rows) # truncation determined by max_rows, actual truncated number of rows # used below by min_rows truncate_v = max_rows and (len(self.series) > max_rows) series = self.series if truncate_v: - if min_rows == 1: - row_num = min_rows - series = series.iloc[:min_rows] + if min_rows: + # if min_rows is set (not None or 0), set max_rows to minimum + # of both + max_rows = min(min_rows, max_rows) + if max_rows == 1: + row_num = max_rows + series = series.iloc[:max_rows] else: - row_num = min_rows // 2 + row_num = max_rows // 2 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) self.tr_row_num = row_num From 8060faf8bce86900185cf91c2c5db683d1656ab5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Jul 2019 09:29:55 -0400 Subject: [PATCH 5/8] add whatsnew + docs --- doc/source/user_guide/options.rst | 22 +++++++++++++++++++++- doc/source/whatsnew/v0.25.0.rst | 24 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index 1f296c0d6c088..f32a8adfd4d33 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -157,6 +157,22 @@ lines are replaced by an ellipsis. df pd.reset_option('max_rows') +Once the ``display.max_rows`` is exceeded, the ``display.min_rows`` options +determines how many rows are shown in the truncated repr. + +.. ipython:: python + + pd.set_option('max_rows', 8) + pd.set_option('max_rows', 4) + # below max_rows -> all rows shown + df = pd.DataFrame(np.random.randn(7, 2)) + df + # above max_rows -> only min_rows (4) rows shown + df = pd.DataFrame(np.random.randn(9, 2)) + df + pd.reset_option('max_rows') + pd.reset_option('min_rows') + ``display.expand_frame_repr`` allows for the representation of dataframes to stretch across pages, wrapped over the full column vs row-wise. @@ -352,8 +368,12 @@ display.max_rows 60 This sets the maximum numbe out various output. For example, this value determines whether the repr() for a dataframe prints out - fully or just a summary repr. + fully or just a truncated or summary repr. 'None' value means unlimited. +display.min_rows 10 The numbers of rows to show in a truncated + repr (when `max_rows` is exceeded). Ignored + when `max_rows` is set to None or 0. When set + to None, follows the value of `max_rows`. display.max_seq_items 100 when pretty-printing a long sequence, no more then `max_seq_items` will be printed. If items are omitted, diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e9d23cfd8efc1..01aa9f6468277 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -134,6 +134,30 @@ than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally, the output will truncate, if it's wider than :attr:`options.display.width` (default: 80 characters). +.. _whatsnew_0250.enhancements.shorter_truncated_repr: + +Shorter truncated repr for Series and DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Currently, the default display options of pandas ensure that when a Series +or DataFrame has more than 60 rows, its repr gets truncated to this maximum +of 60 rows (the ``display.max_rows`` option). However, this still gives +a repr that takes up a large part of the vertical screen estate. Therefore, +a new option ``display.min_rows`` is introduced with a default of 10 which +determines the number of rows showed in the truncated repr: + +- For small Series or DataFrames, up to ``max_rows` number of rows is shown + (default: 60). +- For larger Series of DataFrame with a length above `max_rows``, only + ``min_rows`` number of rows is shown (default: 10, i.e. the first and last + 5 rows). + +This dual option allows to still see the full content of relatively small +objects (e.g. ``df.head(20)`` shows all 20 rows), while giving a brief repr +for large objects. + +To restore the previous behaviour of a single threshold, set +``pd.options.display.min_rows = None``. .. _whatsnew_0250.enhancements.other: From 41c8543cf97b5530c9a0018ed6a6d01d5dc16015 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Jul 2019 16:03:03 -0400 Subject: [PATCH 6/8] doc fix --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 01aa9f6468277..3c04dc30e5b5f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -148,7 +148,7 @@ determines the number of rows showed in the truncated repr: - For small Series or DataFrames, up to ``max_rows` number of rows is shown (default: 60). -- For larger Series of DataFrame with a length above `max_rows``, only +- For larger Series of DataFrame with a length above ``max_rows``, only ``min_rows`` number of rows is shown (default: 10, i.e. the first and last 5 rows). From 577f0784369cc721332d6a147a7714f05b66682a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Jul 2019 16:08:42 -0400 Subject: [PATCH 7/8] add to Formatter / to_string docstring --- pandas/core/frame.py | 2 +- pandas/core/series.py | 5 ++++- pandas/io/formats/format.py | 13 ++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b358b0b913beb..9294f1489a0e5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -635,7 +635,7 @@ def _repr_html_(self): def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, - min_rows=None, max_rows=None, max_cols=None, + max_rows=None, min_rows=None, max_cols=None, show_dimensions=False, decimal='.', line_width=None): """ Render a DataFrame to a console-friendly tabular output. diff --git a/pandas/core/series.py b/pandas/core/series.py index 808941427464c..9a21fc86147f0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1499,7 +1499,7 @@ def __repr__(self): def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, index=True, length=False, dtype=False, name=False, - min_rows=None, max_rows=None): + max_rows=None, min_rows=None): """ Render a string representation of the Series. @@ -1525,6 +1525,9 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, max_rows : int, optional Maximum number of rows to show before truncating. If None, show all. + min_rows : int, optional + The number of rows to display in a truncated repr (when number + of rows is above `max_rows`). Returns ------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index c2e1f88d5843a..98c31fbeb78e6 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -79,6 +79,9 @@ * unset. max_rows : int, optional Maximum number of rows to display in the console. + min_rows : int, optional + The number of rows to display in the console in a truncated repr + (when number of rows is above `max_rows`). max_cols : int, optional Maximum number of columns to display in the console. show_dimensions : bool, default False @@ -159,7 +162,7 @@ class SeriesFormatter: def __init__(self, series, buf=None, length=True, header=True, index=True, na_rep='NaN', name=False, float_format=None, dtype=True, - min_rows=None, max_rows=None): + max_rows=None, min_rows=None): self.series = series self.buf = buf if buf is not None else StringIO() self.name = name @@ -167,8 +170,8 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, self.header = header self.length = length self.index = index - self.min_rows = min_rows self.max_rows = max_rows + self.min_rows = min_rows if float_format is None: float_format = get_option("display.float_format") @@ -398,8 +401,8 @@ class DataFrameFormatter(TableFormatter): def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, - index_names=True, line_width=None, min_rows=None, - max_rows=None, max_cols=None, show_dimensions=False, + index_names=True, line_width=None, max_rows=None, + min_rows=None, max_cols=None, show_dimensions=False, decimal='.', table_id=None, render_links=False, **kwds): self.frame = frame if buf is not None: @@ -421,8 +424,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.header = header self.index = index self.line_width = line_width - self.min_rows = min_rows self.max_rows = max_rows + self.min_rows = min_rows self.max_cols = max_cols self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) From 577c5cf6daaa47cec526fab78d7a61fadb4c13ee Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Jul 2019 15:44:25 -0500 Subject: [PATCH 8/8] fixup --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3c04dc30e5b5f..a83cabf82b424 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -146,7 +146,7 @@ a repr that takes up a large part of the vertical screen estate. Therefore, a new option ``display.min_rows`` is introduced with a default of 10 which determines the number of rows showed in the truncated repr: -- For small Series or DataFrames, up to ``max_rows` number of rows is shown +- For small Series or DataFrames, up to ``max_rows`` number of rows is shown (default: 60). - For larger Series of DataFrame with a length above ``max_rows``, only ``min_rows`` number of rows is shown (default: 10, i.e. the first and last