BUG: fix unsafe dtype changes in putmasking on series

jreback · jreback · commit b379772359a0 · 2013-04-09T10:14:46.000-04:00
allow boolean indexing of series w/o changing the dtype
     with a list of the rhs if we can preserver the dtype
     of the input, if we can't upcast, but can only do this
     in cases where we won't change the itemsize
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -760,12 +760,27 @@ def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
 
         def changeit():
 
-            # our type is wrong here, need to upcast
+            # try to directly set by expanding our array to full
+            # length of the boolean
+            om = other[mask]
+            om_at = om.astype(result.dtype)
+            if (om == om_at).all():
+                new_other = result.values.copy()
+                new_other[mask] = om_at
+                result[:] = new_other
+                return result, False
+
+            # we are forced to change the dtype of the result as the input isn't compatible
             r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
             np.putmask(r, mask, other)
 
             # we need to actually change the dtype here
             if change is not None:
+
+                # if we are trying to do something unsafe
+                # like put a bigger dtype in a smaller one, use the smaller one
+                if change.dtype.itemsize < r.dtype.itemsize:
+                    raise Exception("cannot change dtype of input to smaller size")
                 change.dtype = r.dtype
                 change[:] = r
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -739,7 +739,13 @@ def where(self, cond, other=nan, inplace=False):
         if isinstance(other, Series):
             other = other.reindex(ser.index)
         elif isinstance(other, (tuple,list)):
-            other = np.array(other)
+
+            # try to set the same dtype as ourselves
+            new_other = np.array(other,dtype=self.dtype)
+            if not (new_other == np.array(other)).all():
+                other = np.array(other)
+            else:
+                other = new_other
 
         if len(other) != len(ser):
             icond = ~cond
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -1127,26 +1127,44 @@ def test_where(self):
         self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), [0,2,3])
         self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), [])
 
-
-        s = Series(np.arange(10), dtype=np.int32)
-        mask = s < 5
-        s[mask] = range(5)
-        expected = Series(np.arange(10), dtype=np.int32)
-        assert_series_equal(s, expected)
-        self.assertEquals(s.dtype, expected.dtype)
+        # unsafe dtype changes
+        for dtype in [ np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64 ]:
+            s = Series(np.arange(10), dtype=dtype)
+            mask = s < 5
+            s[mask] = range(2,7)
+            expected = Series(range(2,7) + range(5,10), dtype=dtype)
+            assert_series_equal(s, expected)
+            self.assertEquals(s.dtype, expected.dtype)
+
+        # these are allowed operations, but are upcasted
+        for dtype in [ np.int64, np.float64 ]:
+            s = Series(np.arange(10), dtype=dtype)
+            mask = s < 5
+            values = [2.5,3.5,4.5,5.5,6.5]
+            s[mask] = values 
+            expected = Series(values + range(5,10), dtype='float64')
+            assert_series_equal(s, expected)
+            self.assertEquals(s.dtype, expected.dtype)
+
+        # can't do these as we are forced to change the itemsize of the input to something we cannot
+        for dtype in [ np.int8, np.int16, np.int32, np.float16, np.float32 ]:
+            s = Series(np.arange(10), dtype=dtype)
+            mask = s < 5
+            values = [2.5,3.5,4.5,5.5,6.5]
+            self.assertRaises(Exception, s.__setitem__, tuple(mask), values)
 
         # GH3235
         s = Series(np.arange(10))
         mask = s < 5
-        s[mask] = range(5)
-        expected = Series(np.arange(10))
+        s[mask] = range(2,7)
+        expected = Series(range(2,7) + range(5,10))
         assert_series_equal(s, expected)
         self.assertEquals(s.dtype, expected.dtype)
 
         s = Series(np.arange(10))
         mask = s > 5
         s[mask] = [0]*4
-        expected = Series([0,1,2,3,4,5] + [0]*4,dtype='float64')
+        expected = Series([0,1,2,3,4,5] + [0]*4)
         assert_series_equal(s,expected)
 
         s = Series(np.arange(10))
@@ -3174,7 +3192,7 @@ def test_cast_on_putmask(self):
         # need to upcast
         s = Series([1,2],index=[1,2],dtype='int64')
         s[[True, False]] = Series([0],index=[1],dtype='int64')
-        expected = Series([0,2],index=[1,2],dtype='float64')
+        expected = Series([0,2],index=[1,2],dtype='int64')
 
         assert_series_equal(s, expected)