Skip to content

Commit a985129

Browse files
committed
GH14671 - ERR: Raise ValueError if usecol doesn't exist with same len
- Updated tests - Updated whatsnew 0.19.2 note - Added new parameter file_header for CParserWrapper to contain the original header read from the file for comparison
1 parent c045e1d commit a985129

File tree

4 files changed

+20
-7
lines changed

4 files changed

+20
-7
lines changed

doc/source/whatsnew/v0.19.2.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Bug Fixes
3232

3333

3434

35+
- Bug in pd.read_csv - catch missing columns if usecols and header lengths match (:issue:`14671`)
3536

3637

3738

pandas/io/parsers.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,12 +1425,14 @@ def __init__(self, src, **kwds):
14251425
self.orig_names = self.names[:]
14261426

14271427
if self.usecols:
1428-
if len(self.names) > len(self.usecols):
1429-
self.names = [n for i, n in enumerate(self.names)
1430-
if (i in self.usecols or n in self.usecols)]
1431-
1432-
if len(self.names) < len(self.usecols):
1433-
raise ValueError("Usecols do not match names.")
1428+
if self._reader.file_header is not None:
1429+
h = self._reader.file_header[0]
1430+
usecol_len = len(set(self.usecols) - set(h))
1431+
usecoli_len = len(set(self.usecols) - set(range(0, len(h))))
1432+
if usecol_len > 0 and usecoli_len > 0:
1433+
raise ValueError("Usecols do not match names.")
1434+
1435+
self.names = self._filter_usecols(self.names)
14341436

14351437
self._set_noconvert_columns()
14361438

pandas/io/tests/parser/usecols.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ def test_usecols(self):
5454
expected.columns = ['foo', 'bar']
5555
tm.assert_frame_equal(result, expected)
5656

57+
# same length but usecols column doesn't exist - see gh-14671
58+
self.assertRaises(ValueError, self.read_csv, StringIO(data),
59+
usecols=['a', 'b', 'z'])
60+
5761
data = """\
5862
1,2,3
5963
4,5,6

pandas/parser.pyx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ cdef class TextReader:
290290
object na_values
291291
object memory_map
292292
object as_recarray
293-
object header, orig_header, names, header_start, header_end
293+
object header, orig_header, names, header_start, header_end, file_header
294294
object index_col
295295
object low_memory
296296
object skiprows
@@ -775,6 +775,12 @@ cdef class TextReader:
775775
data_line = hr + 1
776776
header.append(this_header)
777777

778+
self.file_header = header[:]
779+
780+
#if self.usecols is not None:
781+
# if len(set(self.usecols) - set(header[0])) > 0 and len(set(self.usecols) - set(range(0,field_count))) > 0:
782+
# raise ValueError("Usecols do not match names.")
783+
778784
if self.names is not None:
779785
header = [ self.names ]
780786

0 commit comments

Comments
 (0)