From e76a6d79fbdad1288782582b01afdc9627b26f9d Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Wed, 7 Mar 2018 11:56:05 +0100 Subject: [PATCH 1/8] Fixing two bugs in the to_latex conversion. Did some refactoring as well --- pandas/core/groupby.py | 1 + pandas/io/formats/format.py | 173 +++++++++++++---------- pandas/tests/io/formats/test_to_latex.py | 35 +++++ 3 files changed, 137 insertions(+), 72 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 00643614e8803..f9375e98ac782 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3305,6 +3305,7 @@ def apply(self, func, *args, **kwargs): klass='Series', versionadded='')) def aggregate(self, func_or_funcs, *args, **kwargs): + _level = kwargs.pop('_level', None) if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 621641747f376..56c491e67ad8b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -48,6 +48,7 @@ import csv from functools import partial +import pdb common_docstring = """ Parameters @@ -870,6 +871,16 @@ class LatexFormatter(TableFormatter): HTMLFormatter """ + ESCAPE_MAPPING = {'_': '\\_', + '%': '\\%', + '$': '\\$', + '#': '\\#', + '{': '\\{', + '}': '\\}', + '~': '\\textasciitilde', + '^': '\\textasciicircum', + '&': '\\&'} + def __init__(self, formatter, column_format=None, longtable=False, multicolumn=False, multicolumn_format=None, multirow=False): self.fmt = formatter @@ -881,12 +892,10 @@ def __init__(self, formatter, column_format=None, longtable=False, self.multicolumn_format = multicolumn_format self.multirow = multirow - def write_result(self, buf): + def _build_str_cols(self): """ - Render a DataFrame to a LaTeX tabular/longtable environment output. + Builds the string representation of the columns """ - - # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') .format(name=type(self.frame).__name__, @@ -896,44 +905,18 @@ def write_result(self, buf): else: strcols = self.fmt._to_str_columns() + # reestablish the MultiIndex that has been joined by _to_str_column + if self.fmt.index and isinstance(self.frame.index, MultiIndex): + strcols = self._rebuild_multi_index(strcols) + return strcols + + def _build_col_format(self): def get_col_type(dtype): if issubclass(dtype.type, np.number): return 'r' else: return 'l' - # reestablish the MultiIndex that has been joined by _to_str_column - if self.fmt.index and isinstance(self.frame.index, MultiIndex): - clevels = self.frame.columns.nlevels - strcols.pop(0) - name = any(self.frame.index.names) - cname = any(self.frame.columns.names) - lastcol = self.frame.index.nlevels - 1 - previous_lev3 = None - for i, lev in enumerate(self.frame.index.levels): - lev2 = lev.format() - blank = ' ' * len(lev2[0]) - # display column names in last index-column - if cname and i == lastcol: - lev3 = [x if x else '{}' for x in self.frame.columns.names] - else: - lev3 = [blank] * clevels - if name: - lev3.append(lev.name) - current_idx_val = None - for level_idx in self.frame.index.labels[i]: - if ((previous_lev3 is None or - previous_lev3[len(lev3)].isspace()) and - lev2[level_idx] == current_idx_val): - # same index as above row and left index was the same - lev3.append(blank) - else: - # different value than above or left index different - lev3.append(lev2[level_idx]) - current_idx_val = lev2[level_idx] - strcols.insert(i, lev3) - previous_lev3 = lev3 - column_format = self.column_format if column_format is None: dtypes = self.frame.dtypes._values @@ -945,19 +928,22 @@ def get_col_type(dtype): compat.string_types): # pragma: no cover raise AssertionError('column_format must be str or unicode, ' 'not {typ}'.format(typ=type(column_format))) + return column_format - if not self.longtable: - buf.write('\\begin{{tabular}}{{{fmt}}}\n' - .format(fmt=column_format)) - buf.write('\\toprule\n') - else: - buf.write('\\begin{{longtable}}{{{fmt}}}\n' - .format(fmt=column_format)) - buf.write('\\toprule\n') + def write_result(self, buf): + """ + Render a DataFrame to a LaTeX tabular/longtable environment output. + """ - ilevels = self.frame.index.nlevels - clevels = self.frame.columns.nlevels - nlevels = clevels + strcols = self._build_str_cols() + column_format = self._build_col_format() + table_type = 'longtable' if self.longtable else 'tabular' + + buf.write('\\begin{{{typ}}}{{{fmt}}}\n' + .format(fmt=column_format, typ=table_type)) + buf.write('\\toprule\n') + + nlevels = self.frame.columns.nlevels if any(self.frame.index.names): nlevels += 1 strrows = list(zip(*strcols)) @@ -975,28 +961,8 @@ def get_col_type(dtype): buf.write('\\endfoot\n\n') buf.write('\\bottomrule\n') buf.write('\\endlastfoot\n') - if self.fmt.kwds.get('escape', True): - # escape backslashes first - crow = [(x.replace('\\', '\\textbackslash').replace('_', '\\_') - .replace('%', '\\%').replace('$', '\\$') - .replace('#', '\\#').replace('{', '\\{') - .replace('}', '\\}').replace('~', '\\textasciitilde') - .replace('^', '\\textasciicircum').replace('&', '\\&') - if (x and x != '{}') else '{}') for x in row] - else: - crow = [x if x else '{}' for x in row] - if self.bold_rows and self.fmt.index: - # bold row labels - crow = ['\\textbf{{{x}}}'.format(x=x) - if j < ilevels and x.strip() not in ['', '{}'] else x - for j, x in enumerate(crow)] - if i < clevels and self.fmt.header and self.multicolumn: - # sum up columns to multicolumns - crow = self._format_multicolumn(crow, ilevels) - if (i >= nlevels and self.fmt.index and self.multirow and - ilevels > 1): - # sum up rows to multirows - crow = self._format_multirow(crow, ilevels, i, strrows) + + crow = self._build_row(i, row, strrows) buf.write(' & '.join(crow)) buf.write(' \\\\\n') if self.multirow and i < len(strrows) - 1: @@ -1004,9 +970,41 @@ def get_col_type(dtype): if not self.longtable: buf.write('\\bottomrule\n') - buf.write('\\end{tabular}\n') - else: - buf.write('\\end{longtable}\n') + buf.write('\\end{{{typ}}}\n'.format(typ=table_type)) + + def _build_row(self, i, row, strrows): + crow = self._escape_row(row) + ilevels = self.frame.index.nlevels + clevels = nlevels = self.frame.columns.nlevels + if any(self.frame.index.names): + nlevels += 1 + if self.bold_rows and self.fmt.index: + # bold row labels + crow = ['\\textbf{{{x}}}'.format(x=x) + if j < ilevels and x.strip() not in ['', '{}'] else x + for j, x in enumerate(crow)] + if i < clevels and self.fmt.header and self.multicolumn: + # sum up columns to multicolumns + crow = self._format_multicolumn(crow, ilevels) + if (i >= nlevels and self.fmt.index and self.multirow and + ilevels > 1): + # sum up rows to multirows + crow = self._format_multirow(crow, ilevels, i, strrows) + return crow + + def _escape_row(self, row): + print(row) + def null_replace(x): + if not x or x == '{}': + return '{}' + return x + + def escape_item(x): + x = x.replace('\\', '\\textbackslash') + for k, v in LatexFormatter.ESCAPE_MAPPING.items(): + x = x.replace(k, v) + return x + return [escape_item(null_replace(x)) if self.fmt.kwds.get('escape', True) and x and x != '{}' else null_replace(x) for x in row] def _format_multicolumn(self, row, ilevels): r""" @@ -1083,6 +1081,37 @@ def _print_cline(self, buf, i, icol): # remove entries that have been written to buffer self.clinebuf = [x for x in self.clinebuf if x[0] != i] + def _rebuild_multi_index(self, strcols): + strcols.pop(0) + previous_lev3 = None + for i, lev in enumerate(self.frame.index.levels): + lev2 = lev.format() + blank = ' ' * len(lev2[0]) + # display column names in last index-column + if any(self.frame.columns.names) and i == (self.frame.index.nlevels - 1): + lev3 = [x if x else '{}' for x in self.frame.columns.names] + else: + lev3 = [blank] * self.frame.columns.nlevels + if any(map(lambda x: False if x is None else True, self.frame.index.names)): + if lev.name: + lev3.append(u'{name}'.format(name=lev.name)) + else: + lev3.append(lev.name) + current_idx_val = None + for level_idx in self.frame.index.labels[i]: + if ((previous_lev3 is None or + previous_lev3[len(lev3)].isspace()) and + lev2[level_idx] == current_idx_val): + # same index as above row and left index was the same + lev3.append(blank) + else: + # different value than above or left index different + lev3.append(lev2[level_idx]) + current_idx_val = lev2[level_idx] + strcols.insert(i, lev3) + previous_lev3 = lev3 + return strcols + class HTMLFormatter(TableFormatter): diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 5ebf196be094e..5f1ee008e6203 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -621,3 +621,38 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): \end{tabular} """ % tuple(list(col_names) + [idx_names_row]) assert observed == expected + + def test_to_latex_multiindex_non_string(self): + # GH 19981 + df = pd.DataFrame([[1, 2, 3]]*2).set_index([0, 1]) + observed = df.to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & 2 \\ +{} & 1 & \\ +\midrule +1 & 2 & 3 \\ + & & 3 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_missing_rows(self): + # GH 18669 + mi = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=['', None]) + df = pd.DataFrame(-1, index=mi, columns=range(4)) + observed = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & & 0 & 1 & 2 & 3 \\ +\midrule +{} & {} & & & & \\ +1 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +2 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected \ No newline at end of file From c20ec4c30d05c4560193e4c2c8ed235dcc8a9adb Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Wed, 7 Mar 2018 11:59:52 +0100 Subject: [PATCH 2/8] Removing pdb --- pandas/core/groupby.py | 1 - pandas/io/formats/format.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 73f31af4e37e0..4a09d636ee320 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3416,7 +3416,6 @@ def apply(self, func, *args, **kwargs): klass='Series', versionadded='')) def aggregate(self, func_or_funcs, *args, **kwargs): - _level = kwargs.pop('_level', None) if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 134128f5aba3b..efe5f4b2bcbaf 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -48,7 +48,6 @@ import csv from functools import partial -import pdb common_docstring = """ Parameters From eeedf08d7011739ca1c2c2e9e628076be1594a12 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Wed, 7 Mar 2018 12:04:13 +0100 Subject: [PATCH 3/8] Fixing flake8 issues --- pandas/io/formats/format.py | 17 ++++++++++------- pandas/tests/io/formats/test_to_latex.py | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index efe5f4b2bcbaf..f578b89d737be 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -986,13 +986,12 @@ def _build_row(self, i, row, strrows): # sum up columns to multicolumns crow = self._format_multicolumn(crow, ilevels) if (i >= nlevels and self.fmt.index and self.multirow and - ilevels > 1): + ilevels > 1): # sum up rows to multirows crow = self._format_multirow(crow, ilevels, i, strrows) return crow def _escape_row(self, row): - print(row) def null_replace(x): if not x or x == '{}': return '{}' @@ -1003,7 +1002,9 @@ def escape_item(x): for k, v in LatexFormatter.ESCAPE_MAPPING.items(): x = x.replace(k, v) return x - return [escape_item(null_replace(x)) if self.fmt.kwds.get('escape', True) and x and x != '{}' else null_replace(x) for x in row] + return [escape_item(null_replace(x)) + if self.fmt.kwds.get('escape', True) + and x and x != '{}' else null_replace(x) for x in row] def _format_multicolumn(self, row, ilevels): r""" @@ -1087,11 +1088,13 @@ def _rebuild_multi_index(self, strcols): lev2 = lev.format() blank = ' ' * len(lev2[0]) # display column names in last index-column - if any(self.frame.columns.names) and i == (self.frame.index.nlevels - 1): + if any(self.frame.columns.names) \ + and i == (self.frame.index.nlevels - 1): lev3 = [x if x else '{}' for x in self.frame.columns.names] else: lev3 = [blank] * self.frame.columns.nlevels - if any(map(lambda x: False if x is None else True, self.frame.index.names)): + if any(map(lambda x: False if x is None else True, + self.frame.index.names)): if lev.name: lev3.append(u'{name}'.format(name=lev.name)) else: @@ -1099,8 +1102,8 @@ def _rebuild_multi_index(self, strcols): current_idx_val = None for level_idx in self.frame.index.labels[i]: if ((previous_lev3 is None or - previous_lev3[len(lev3)].isspace()) and - lev2[level_idx] == current_idx_val): + previous_lev3[len(lev3)].isspace()) + and lev2[level_idx] == current_idx_val): # same index as above row and left index was the same lev3.append(blank) else: diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 5f1ee008e6203..0ed519ba84805 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -624,7 +624,7 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): def test_to_latex_multiindex_non_string(self): # GH 19981 - df = pd.DataFrame([[1, 2, 3]]*2).set_index([0, 1]) + df = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]) observed = df.to_latex() expected = r"""\begin{tabular}{llr} \toprule @@ -655,4 +655,4 @@ def test_to_latex_missing_rows(self): \bottomrule \end{tabular} """ - assert observed == expected \ No newline at end of file + assert observed == expected From b771ab38f7d2e3d712de2b235039492eeb60b9bc Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Wed, 14 Mar 2018 18:37:34 +0100 Subject: [PATCH 4/8] Adding comments and fixing tests --- pandas/io/formats/format.py | 2 +- pandas/io/formats/latex.py | 33 ++++++++++++++++++++++-- pandas/tests/io/formats/test_to_latex.py | 2 +- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index c173a120617bc..1731dbb3ac68d 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1598,4 +1598,4 @@ def buffer_put_lines(buf, lines): """ if any(isinstance(x, compat.text_type) for x in lines): lines = [compat.text_type(x) for x in lines] - buf.write('\n'.join(lines)) \ No newline at end of file + buf.write('\n'.join(lines)) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 25bb7c71f6a4f..9ed40dc5b3ae9 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -90,6 +90,10 @@ def get_col_type(dtype): def write_result(self, buf): """ Render a DataFrame to a LaTeX tabular/longtable environment output. + + Parameters + ---------- + buf : The buffer to write output to """ strcols = self._build_str_cols() @@ -130,6 +134,15 @@ def write_result(self, buf): buf.write('\\end{{{typ}}}\n'.format(typ=table_type)) def _build_row(self, i, row, strrows): + """ + Build and style a row in preparation for LaTeX output + + Parameters + ---------- + i : the current row counter + row : the row itself + strrows : all rows + """ crow = self._escape_row(row) ilevels = self.frame.index.nlevels clevels = nlevels = self.frame.columns.nlevels @@ -150,16 +163,25 @@ def _build_row(self, i, row, strrows): return crow def _escape_row(self, row): + """ + Escape elements based LaTeX-specific escape bindings as defined + in ESCAPE_MAPPING. + + Parameters + ---------- + row : the row with elements to be escaped + """ def null_replace(x): - if not x or x == '{}': + if (not x and x != 0) or x == '{}': return '{}' - return x + return '{x}'.format(x=x) def escape_item(x): x = x.replace('\\', '\\textbackslash') for k, v in LatexFormatter.ESCAPE_MAPPING.items(): x = x.replace(k, v) return x + return [escape_item(null_replace(x)) if self.fmt.kwds.get('escape', True) and x and x != '{}' else null_replace(x) for x in row] @@ -238,6 +260,13 @@ def _print_cline(self, buf, i, icol): self.clinebuf = [x for x in self.clinebuf if x[0] != i] def _rebuild_multi_index(self, strcols): + """ + Reestablish the MultiIndex that has been joined by _to_str_column + + Parameters + ---------- + strcols : all columns in string format + """ strcols.pop(0) previous_lev3 = None for i, lev in enumerate(self.frame.index.levels): diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 0ed519ba84805..3ae24cfe46e6e 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -629,7 +629,7 @@ def test_to_latex_multiindex_non_string(self): expected = r"""\begin{tabular}{llr} \toprule & & 2 \\ -{} & 1 & \\ +0 & 1 & \\ \midrule 1 & 2 & 3 \\ & & 3 \\ From 832027196f7b48c683fd2100cc0377a70bad002f Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Wed, 14 Mar 2018 21:24:55 +0100 Subject: [PATCH 5/8] fixing unicode errors --- pandas/io/formats/latex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 9ed40dc5b3ae9..1763170a925d4 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -174,7 +174,7 @@ def _escape_row(self, row): def null_replace(x): if (not x and x != 0) or x == '{}': return '{}' - return '{x}'.format(x=x) + return u'{x}'.format(x=x) def escape_item(x): x = x.replace('\\', '\\textbackslash') From d392d73a5e26cefc06dda72c3fff849cd5f55e37 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Wed, 14 Mar 2018 23:07:12 +0100 Subject: [PATCH 6/8] Refactoring get_level_lengths so that formats.format and formats.style are using the same function --- pandas/io/formats/excel.py | 14 +++---- pandas/io/formats/format.py | 56 ++++++++++++++----------- pandas/io/formats/html.py | 11 ++--- pandas/io/formats/style.py | 58 ++++---------------------- pandas/tests/io/formats/test_format.py | 17 ++++++++ pandas/tests/io/formats/test_style.py | 19 +-------- 6 files changed, 67 insertions(+), 108 deletions(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 76ffd41f93090..6e6cc7185850a 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -134,9 +134,9 @@ def build_alignment(self, props): def build_border(self, props): return {side: { 'style': self._border_style(props.get('border-{side}-style' - .format(side=side)), + .format(side=side)), props.get('border-{side}-width' - .format(side=side))), + .format(side=side))), 'color': self.color_to_excel( props.get('border-{side}-color'.format(side=side))), } for side in ['top', 'right', 'bottom', 'left']} @@ -408,9 +408,7 @@ def _format_header_mi(self): return columns = self.columns - level_strs = columns.format(sparsify=self.merge_cells, adjoin=False, - names=False) - level_lengths = get_level_lengths(level_strs) + level_lengths = get_level_lengths(columns, self.merge_cells) coloffset = 0 lnum = 0 @@ -436,6 +434,8 @@ def _format_header_mi(self): header_style) else: # Format in legacy format with dots to indicate levels. + level_strs = columns.format(sparsify=None, adjoin=False, + names=False) for i, values in enumerate(zip(*level_strs)): v = ".".join(map(pprint_thing, values)) yield ExcelCell(lnum, coloffset + i + 1, v, header_style) @@ -560,9 +560,7 @@ def _format_hierarchical_rows(self): if self.merge_cells: # Format hierarchical rows as merged cells. - level_strs = self.df.index.format(sparsify=True, adjoin=False, - names=False) - level_lengths = get_level_lengths(level_strs) + level_lengths = get_level_lengths(self.df.index, True) for spans, levels, labels in zip(level_lengths, self.df.index.levels, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 1731dbb3ac68d..e0e41c9dfd08f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,6 +6,7 @@ from __future__ import print_function # pylint: disable=W0141 +from collections import defaultdict from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import ( @@ -603,8 +604,8 @@ def to_string(self): if len(frame.columns) == 0 or len(frame.index) == 0: info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') .format(name=type(self.frame).__name__, - col=pprint_thing(frame.columns), - idx=pprint_thing(frame.index))) + col=pprint_thing(frame.columns), + idx=pprint_thing(frame.index))) text = info_line else: @@ -1545,8 +1546,9 @@ def _binify(cols, line_width): return bins -def get_level_lengths(levels, sentinel=''): - """For each index in each level the function returns lengths of indexes. +def get_level_lengths(index, hidden_elements=None, sentinel=None): + """ + Given an index, find the level length for each element. Parameters ---------- @@ -1554,35 +1556,39 @@ def get_level_lengths(levels, sentinel=''): List of values on for level. sentinel : string, optional Value which states that no new index starts on there. + hidden_elements : list, optional + A list of index positions which should not be visible Returns ---------- - Returns list of maps. For each level returns map of indexes (key is index - in row and value is length of index). + Returns a list of dicts which represent the level lengths -- the key + is the index in a row and the value is length of the index). """ - if len(levels) == 0: - return [] - - control = [True for x in levels[0]] - - result = [] - for level in levels: - last_index = 0 + if sentinel is None: + sentinel = com.sentinel_factory() - lengths = {} - for i, key in enumerate(level): - if control[i] and key == sentinel: - pass - else: - control[i] = False - lengths[last_index] = i - last_index - last_index = i + if hidden_elements is None: + hidden_elements = [] - lengths[last_index] = len(level) - last_index + levels = index.format(sparsify=sentinel, adjoin=False, names=False) - result.append(lengths) + lengths = [] + if index.nlevels == 1: + levels = [levels] - return result + last_label = 0 + for i, level in enumerate(levels): + level_spans = defaultdict(int) + for j, key in enumerate(level): + if not get_option('display.multi_sparse'): + level_spans[j] = 1 + else: + if key != sentinel: + last_label = j + level_spans[last_label] += 1 if j not in hidden_elements else 0 + lengths.append({span[0]: span[1] + for span in level_spans.items() if span[1] > 0}) + return lengths def buffer_put_lines(buf, lines): diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index a43c55a220292..0582730c7d789 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -234,7 +234,7 @@ def _column_header(): sentinel = None levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False) - level_lengths = get_level_lengths(levels, sentinel) + level_lengths = get_level_lengths(self.columns, sentinel=sentinel) inner_lvl = len(level_lengths) - 1 for lnum, (records, values) in enumerate(zip(level_lengths, levels)): @@ -397,12 +397,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): idx_values = lzip(*idx_values) if self.fmt.sparsify: - # GH3547 - sentinel = com.sentinel_factory() - levels = frame.index.format(sparsify=sentinel, adjoin=False, - names=False) - - level_lengths = get_level_lengths(levels, sentinel) + level_lengths = get_level_lengths(frame.index) inner_lvl = len(level_lengths) - 1 if truncate_v: # Insert ... row and adjust idx_values and @@ -471,7 +466,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=tags, - nindex_levels=len(levels) - sparse_offset) + nindex_levels=len(level_lengths) - sparse_offset) else: for i in range(len(frame)): idx_values = list(zip(*frame.index.format( diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f876ceb8a26bf..d8ba4c48c12be 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -8,6 +8,7 @@ from uuid import uuid1 import copy from collections import defaultdict, MutableMapping +from pandas.io.formats.format import get_level_lengths try: from jinja2 import ( @@ -198,8 +199,8 @@ def format_attr(pair): return "{key}={value}".format(**pair) # for sparsifying a MultiIndex - idx_lengths = _get_level_lengths(self.index) - col_lengths = _get_level_lengths(self.columns, hidden_columns) + idx_lengths = get_level_lengths(self.index) + col_lengths = get_level_lengths(self.columns, hidden_columns) cell_context = dict() @@ -249,7 +250,8 @@ def format_attr(pair): "class": " ".join(cs), "is_visible": _is_visible(c, r, col_lengths), } - colspan = col_lengths.get((r, c), 0) + colspan = col_lengths[r][c] if r < len( + col_lengths) and c in col_lengths[r] else 0 if colspan > 1: es["attributes"] = [ format_attr({"key": "colspan", "value": colspan}) @@ -292,7 +294,8 @@ def format_attr(pair): "id": "_".join(rid[1:]), "class": " ".join(rid) } - rowspan = idx_lengths.get((c, r), 0) + rowspan = idx_lengths[c][r] if c < len( + idx_lengths) and r in idx_lengths[c] else 0 if rowspan > 1: es["attributes"] = [ format_attr({"key": "rowspan", "value": rowspan}) @@ -1209,51 +1212,8 @@ def _is_visible(idx_row, idx_col, lengths): """ Index -> {(idx_row, idx_col): bool}) """ - return (idx_col, idx_row) in lengths - - -def _get_level_lengths(index, hidden_elements=None): - """ - Given an index, find the level length for each element. - Optional argument is a list of index positions which - should not be visible. - - Result is a dictionary of (level, inital_position): span - """ - sentinel = com.sentinel_factory() - levels = index.format(sparsify=sentinel, adjoin=False, names=False) - - if hidden_elements is None: - hidden_elements = [] - - lengths = {} - if index.nlevels == 1: - for i, value in enumerate(levels): - if(i not in hidden_elements): - lengths[(0, i)] = 1 - return lengths - - for i, lvl in enumerate(levels): - for j, row in enumerate(lvl): - if not get_option('display.multi_sparse'): - lengths[(i, j)] = 1 - elif (row != sentinel) and (j not in hidden_elements): - last_label = j - lengths[(i, last_label)] = 1 - elif (row != sentinel): - # even if its hidden, keep track of it in case - # length >1 and later elements are visible - last_label = j - lengths[(i, last_label)] = 0 - elif(j not in hidden_elements): - lengths[(i, last_label)] += 1 - - non_zero_lengths = {} - for element, length in lengths.items(): - if(length >= 1): - non_zero_lengths[element] = length - - return non_zero_lengths + return idx_col < len(lengths) and idx_row in lengths[idx_col] + # return (idx_col, idx_row) in lengths def _maybe_wrap_formatter(formatter): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 6c3b75cdfa6df..5b526f53caf73 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2618,3 +2618,20 @@ def test_format_percentiles(): pytest.raises(ValueError, fmt.format_percentiles, [-0.001, 0.1, 0.5]) pytest.raises(ValueError, fmt.format_percentiles, [2, 0.1, 0.5]) pytest.raises(ValueError, fmt.format_percentiles, [0.1, 0.5, 'a']) + + +def test_get_level_lengths(): + index = pd.MultiIndex.from_product([['a', 'b'], [0, 1, 2]]) + expected = [{0: 3, 3: 3}, {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1}] + result = fmt.get_level_lengths(index) + assert result == expected + + +def test_get_level_lengths_un_sorted(): + index = pd.MultiIndex.from_arrays([ + [1, 1, 2, 1], + ['a', 'b', 'b', 'd'] + ]) + expected = [{0: 2, 2: 1, 3: 1}, {0: 1, 1: 1, 2: 1, 3: 1}] + result = fmt.get_level_lengths(index) + assert result == expected diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index c1ab9cd184340..8b78014b1249b 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -10,7 +10,7 @@ import pandas.util._test_decorators as td jinja2 = pytest.importorskip('jinja2') -from pandas.io.formats.style import Styler, _get_level_lengths # noqa +from pandas.io.formats.style import Styler class TestStyler(object): @@ -776,23 +776,6 @@ def f(x): with pytest.raises(ValueError): df.style._apply(f, axis=None) - def test_get_level_lengths(self): - index = pd.MultiIndex.from_product([['a', 'b'], [0, 1, 2]]) - expected = {(0, 0): 3, (0, 3): 3, (1, 0): 1, (1, 1): 1, (1, 2): 1, - (1, 3): 1, (1, 4): 1, (1, 5): 1} - result = _get_level_lengths(index) - tm.assert_dict_equal(result, expected) - - def test_get_level_lengths_un_sorted(self): - index = pd.MultiIndex.from_arrays([ - [1, 1, 2, 1], - ['a', 'b', 'b', 'd'] - ]) - expected = {(0, 0): 2, (0, 2): 1, (0, 3): 1, - (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1} - result = _get_level_lengths(index) - tm.assert_dict_equal(result, expected) - def test_mi_sparse(self): df = pd.DataFrame({'A': [1, 2]}, index=pd.MultiIndex.from_arrays([['a', 'a'], From e5546996bf4b8739bb55802f354112d6701e8069 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Thu, 15 Mar 2018 00:25:26 +0100 Subject: [PATCH 7/8] fixing sentinel as named arg --- pandas/io/formats/excel.py | 4 ++-- pandas/io/formats/format.py | 2 +- pandas/io/formats/style.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 6e6cc7185850a..c8629cf790975 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -408,7 +408,7 @@ def _format_header_mi(self): return columns = self.columns - level_lengths = get_level_lengths(columns, self.merge_cells) + level_lengths = get_level_lengths(columns, sentinel=self.merge_cells) coloffset = 0 lnum = 0 @@ -560,7 +560,7 @@ def _format_hierarchical_rows(self): if self.merge_cells: # Format hierarchical rows as merged cells. - level_lengths = get_level_lengths(self.df.index, True) + level_lengths = get_level_lengths(self.df.index, sentinel=True) for spans, levels, labels in zip(level_lengths, self.df.index.levels, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index e0e41c9dfd08f..7d9793b2b0573 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1585,7 +1585,7 @@ def get_level_lengths(index, hidden_elements=None, sentinel=None): else: if key != sentinel: last_label = j - level_spans[last_label] += 1 if j not in hidden_elements else 0 + level_spans[last_label] += (1 if j not in hidden_elements else 0) lengths.append({span[0]: span[1] for span in level_spans.items() if span[1] > 0}) return lengths diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d8ba4c48c12be..a6e1ff2d57729 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -200,7 +200,7 @@ def format_attr(pair): # for sparsifying a MultiIndex idx_lengths = get_level_lengths(self.index) - col_lengths = get_level_lengths(self.columns, hidden_columns) + col_lengths = get_level_lengths(self.columns, hidden_elements=hidden_columns) cell_context = dict() From 32df2853cc29dcfbbe4d6fcfc1f69ca73c4f167e Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Thu, 15 Mar 2018 15:20:21 +0100 Subject: [PATCH 8/8] setting sentinel and fixing pep8 errors --- pandas/io/formats/excel.py | 8 ++++---- pandas/io/formats/format.py | 3 ++- pandas/io/formats/style.py | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index c8629cf790975..a389f364795d5 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -408,7 +408,7 @@ def _format_header_mi(self): return columns = self.columns - level_lengths = get_level_lengths(columns, sentinel=self.merge_cells) + level_lengths = get_level_lengths(columns, sentinel='') coloffset = 0 lnum = 0 @@ -434,8 +434,8 @@ def _format_header_mi(self): header_style) else: # Format in legacy format with dots to indicate levels. - level_strs = columns.format(sparsify=None, adjoin=False, - names=False) + level_strs = columns.format(sparsify=self.merge_cells, + adjoin=False, names=False) for i, values in enumerate(zip(*level_strs)): v = ".".join(map(pprint_thing, values)) yield ExcelCell(lnum, coloffset + i + 1, v, header_style) @@ -560,7 +560,7 @@ def _format_hierarchical_rows(self): if self.merge_cells: # Format hierarchical rows as merged cells. - level_lengths = get_level_lengths(self.df.index, sentinel=True) + level_lengths = get_level_lengths(self.df.index, sentinel='') for spans, levels, labels in zip(level_lengths, self.df.index.levels, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 7d9793b2b0573..8aea3383b4642 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1585,7 +1585,8 @@ def get_level_lengths(index, hidden_elements=None, sentinel=None): else: if key != sentinel: last_label = j - level_spans[last_label] += (1 if j not in hidden_elements else 0) + level_spans[last_label] += (1 if j not in hidden_elements + else 0) lengths.append({span[0]: span[1] for span in level_spans.items() if span[1] > 0}) return lengths diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index a6e1ff2d57729..a7bd4b4cf31b6 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -200,7 +200,8 @@ def format_attr(pair): # for sparsifying a MultiIndex idx_lengths = get_level_lengths(self.index) - col_lengths = get_level_lengths(self.columns, hidden_elements=hidden_columns) + col_lengths = get_level_lengths(self.columns, + hidden_elements=hidden_columns) cell_context = dict()