From 76a07381003336453cd5eb884f5acfa998578ef1 Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 30 May 2016 17:49:59 +0100 Subject: [PATCH] Fix maybe_convert_numeric for unhashable objects --- doc/source/whatsnew/v0.18.2.txt | 1 + pandas/src/inference.pyx | 2 +- pandas/tests/test_infer_and_convert.py | 6 ++++++ pandas/tools/tests/test_util.py | 12 ++++++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 2b67aca1dcf74..a552b67288b57 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -317,6 +317,7 @@ Bug Fixes - Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) +- Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) - Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index e2c59a34bdf21..d4e149eb09b65 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -569,7 +569,7 @@ def maybe_convert_numeric(object[:] values, set na_values, for i in range(n): val = values[i] - if val in na_values: + if val.__hash__ is not None and val in na_values: floats[i] = complexes[i] = nan seen_float = True elif util.is_float_object(val): diff --git a/pandas/tests/test_infer_and_convert.py b/pandas/tests/test_infer_and_convert.py index 06e2a82e07dee..075e31034b261 100644 --- a/pandas/tests/test_infer_and_convert.py +++ b/pandas/tests/test_infer_and_convert.py @@ -102,6 +102,12 @@ def test_scientific_no_exponent(self): result = lib.maybe_convert_numeric(arr, set(), False, True) self.assertTrue(np.all(np.isnan(result))) + def test_convert_non_hashable(self): + # Test for Bug #13324 + arr = np.array([[10.0, 2], 1.0, 'apple']) + result = lib.maybe_convert_numeric(arr, set(), False, True) + tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + class TestTypeInference(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 4e704554f982f..c592b33bdab9a 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -279,6 +279,18 @@ def test_period(self): # res = pd.to_numeric(pd.Series(idx, name='xxx')) # tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) + def test_non_hashable(self): + # Test for Bug #13324 + s = pd.Series([[10.0, 2], 1.0, 'apple']) + res = pd.to_numeric(s, errors='coerce') + tm.assert_series_equal(res, pd.Series([np.nan, 1.0, np.nan])) + + res = pd.to_numeric(s, errors='ignore') + tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) + + with self.assertRaisesRegexp(TypeError, "Invalid object type"): + pd.to_numeric(s) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],