From b696a9221adb9db583406cbc2c524fd615d746c0 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 10 Jun 2017 10:01:05 -0500 Subject: [PATCH] DOC: improve some Excel docstring --- doc/source/io.rst | 10 ++++++++++ pandas/io/excel.py | 11 +++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index bd81b478b5326..7ea476514e88d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -712,6 +712,16 @@ index column inference and discard the last column, pass ``index_col=False``: pd.read_csv(StringIO(data)) pd.read_csv(StringIO(data), index_col=False) +If a subset of data is being parsed using the ``usecols`` option, the +``index_col`` specification is based on that subset, not the original data. + +.. ipython:: python + + data = 'a,b,c\n4,apple,bat,\n8,orange,cow,' + print(data) + pd.read_csv(StringIO(data), usecols=['b', 'c']) + pd.read_csv(StringIO(data), usecols=['b', 'c'], index_col=0) + .. _io.parse_dates: Date Handling diff --git a/pandas/io/excel.py b/pandas/io/excel.py index a4d2fabf76a41..e3c9ae3f164cb 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -85,7 +85,9 @@ index_col : int, list of ints, default None Column (0-indexed) to use as the row labels of the DataFrame. Pass None if there is no such column. If a list is passed, - those columns will be combined into a ``MultiIndex`` + those columns will be combined into a ``MultiIndex``. If a + subset of data is selected with ``parse_cols``, index_col + is based on the subset. names : array-like, default None List of column names to use. If file contains no header row, then you should explicitly pass header=None @@ -96,7 +98,7 @@ content. dtype : Type name or dict of column -> type, default None Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} - Use `str` or `object` to preserve and not interpret dtype. + Use `object` to preserve data as stored in Excel and not interpret dtype. If converters are specified, they will be applied INSTEAD of dtype conversion. @@ -116,8 +118,9 @@ * If None then parse all columns, * If int then indicates last column to be parsed * If list of ints then indicates list of column numbers to be parsed - * If string then indicates comma separated list of column names and - column ranges (e.g. "A:E" or "A,C,E:F") + * If string then indicates comma separated list of Excel column letters and + column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of + both sides. squeeze : boolean, default False If the parsed data only contains one column then return a Series na_values : scalar, str, list-like, or dict, default None