diff --git a/RELEASE.rst b/RELEASE.rst index fa19e8c9eb475..c672fb65ee96f 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -122,6 +122,8 @@ pandas 0.11.0 - Handle "ragged" CSV files missing trailing delimiters in rows with missing fields when also providing explicit list of column names (so the parser knows how many columns to expect in the result) (GH2981_) + - On a mixed DataFrame, allow setting with indexers with ndarray/DataFrame + on rhs (GH3216_) **API Changes** @@ -249,9 +251,11 @@ pandas 0.11.0 - Add comparison operators to Period object (GH2781_) - Fix bug when concatenating two Series into a DataFrame when they have the same name (GH2797_) - - fix automatic color cycling when plotting consecutive timeseries + - Fix automatic color cycling when plotting consecutive timeseries without color arguments (GH2816_) - fixed bug in the pickling of PeriodIndex (GH2891_) + - Upcast/split blocks when needed in a mixed DataFrame when setitem + with an indexer (GH3216_) .. _GH622: https://github.com/pydata/pandas/issues/622 .. _GH797: https://github.com/pydata/pandas/issues/797 @@ -340,6 +344,7 @@ pandas 0.11.0 .. _GH2751: https://github.com/pydata/pandas/issues/2751 .. _GH2747: https://github.com/pydata/pandas/issues/2747 .. _GH2816: https://github.com/pydata/pandas/issues/2816 +.. _GH3216: https://github.com/pydata/pandas/issues/2816 pandas 0.10.1 ============= diff --git a/pandas/core/common.py b/pandas/core/common.py index 28f3a19ab5298..7a78539c10a98 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -694,6 +694,11 @@ def _maybe_promote(dtype, fill_value=np.nan): if issubclass(fill_value.dtype.type, (np.datetime64,np.timedelta64)): fill_value = tslib.iNaT else: + + # we need to change to object type as our + # fill_value is of object type + if fill_value.dtype == np.object_: + dtype = np.dtype(np.object_) fill_value = np.nan # returns tuple of (dtype, fill_value) @@ -763,7 +768,7 @@ def changeit(): if change is not None: change.dtype = r.dtype change[:] = r - + return r, True # we want to decide whether putmask will work @@ -792,6 +797,34 @@ def changeit(): return result, False +def _maybe_upcast_indexer(result, indexer, other, dtype=None): + """ a safe version of setitem that (potentially upcasts the result + return the result and a changed flag + """ + + def changeit(): + # our type is wrong here, need to upcast + r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True) + try: + r[indexer] = other + except: + + # if we hit this then we still have an incompatible type + r[indexer] = fill_value + + return r, True + + new_dtype, fill_value = _maybe_promote(result.dtype,other) + if new_dtype != result.dtype: + return changeit() + + try: + result[indexer] = other + except: + return changeit() + + return result, False + def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): """ provide explicty type promotion and coercion diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 790fe87af364a..5230bf20e60b4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -119,24 +119,54 @@ def _setitem_with_indexer(self, indexer, value): plane_indexer = indexer[:het_axis] + indexer[het_axis + 1:] item_labels = self.obj._get_axis(het_axis) - if isinstance(value, (np.ndarray, DataFrame)) and value.ndim > 1: - raise ValueError('Setting mixed-type DataFrames with ' - 'array/DataFrame pieces not yet supported') + def setter(item, v): + data = self.obj[item] + values = data.values + if np.prod(values.shape): + result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None)) + if changed: + self.obj[item] = result - try: - for item in item_labels[het_idx]: - data = self.obj[item] - values = data.values - if np.prod(values.shape): - value = com._possibly_cast_to_datetime( - value, getattr(data, 'dtype', None)) - values[plane_indexer] = value - except ValueError: - for item, v in zip(item_labels[het_idx], value): - data = self.obj[item] - values = data.values - if np.prod(values.shape): - values[plane_indexer] = v + labels = item_labels[het_idx] + + if _is_list_like(value): + + # we have an equal len Frame + if isinstance(value, DataFrame) and value.ndim > 1: + + for item in labels: + + # align to + if item in value: + v = value[item] + v = v.reindex(self.obj[item].reindex(v.index).dropna().index) + setter(item, v.values) + else: + setter(item, np.nan) + + # we have an equal len ndarray + elif isinstance(value, np.ndarray) and value.ndim > 1: + if len(labels) != len(value): + raise ValueError('Must have equal len keys and value when' + ' setting with an ndarray') + + for i, item in enumerate(labels): + setter(item, value[:,i]) + + # we have an equal len list/ndarray + elif len(labels) == 1 and len(self.obj[labels[0]]) == len(value): + setter(labels[0], value) + + # per label values + else: + + for item, v in zip(labels, value): + setter(item, v) + else: + + # scalar + for item in labels: + setter(item, value) else: if isinstance(indexer, tuple): diff --git a/pandas/core/series.py b/pandas/core/series.py index acfc875fa45a1..4656054344ddb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2065,7 +2065,7 @@ def update(self, other): """ other = other.reindex_like(self) mask = notnull(other) - np.putmask(self.values, mask, other.values) + com._maybe_upcast_putmask(self.values,mask,other,change=self.values) #---------------------------------------------------------------------- # Reindexing, sorting diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ced4b23b7e4fa..586b6030ec2da 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1275,9 +1275,10 @@ def test_setitem_single_column_mixed_datetime(self): df.ix['d', :] = nan self.assert_(com.isnull(df.ix['c', :]).all() == False) + # as of GH 3216 this will now work! # try to set with a list like item - self.assertRaises( - Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan]) + #self.assertRaises( + # Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan]) def test_setitem_frame(self): piece = self.frame.ix[:2, ['A', 'B']] @@ -1285,10 +1286,50 @@ def test_setitem_frame(self): assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values, piece.values) + # GH 3216 + + # already aligned + f = self.mixed_frame.copy() + piece = DataFrame([[ 1, 2], [3, 4]], index=f.index[0:2],columns=['A', 'B']) + key = (slice(None,2), ['A', 'B']) + f.ix[key] = piece + assert_almost_equal(f.ix[0:2, ['A', 'B']].values, + piece.values) + + # rows unaligned + f = self.mixed_frame.copy() + piece = DataFrame([[ 1, 2 ], [3, 4], [5, 6], [7, 8]], index=list(f.index[0:2]) + ['foo','bar'],columns=['A', 'B']) + key = (slice(None,2), ['A', 'B']) + f.ix[key] = piece + assert_almost_equal(f.ix[0:2:, ['A', 'B']].values, + piece.values[0:2]) + + # key is unaligned with values + f = self.mixed_frame.copy() + piece = f.ix[:2, ['A']] + key = (slice(-2, None), ['A', 'B']) + f.ix[key] = piece + piece['B'] = np.nan + assert_almost_equal(f.ix[-2:, ['A', 'B']].values, + piece.values) + + # ndarray + f = self.mixed_frame.copy() piece = self.mixed_frame.ix[:2, ['A', 'B']] - f = self.mixed_frame.ix.__setitem__ key = (slice(-2, None), ['A', 'B']) - self.assertRaises(ValueError, f, key, piece) + f.ix[key] = piece.values + assert_almost_equal(f.ix[-2:, ['A', 'B']].values, + piece.values) + + + # needs upcasting + df = DataFrame([[1,2,'foo'],[3,4,'bar']],columns=['A','B','C']) + df2 = df.copy() + df2.ix[:,['A','B']] = df.ix[:,['A','B']]+0.5 + expected = df.reindex(columns=['A','B']) + expected += 0.5 + expected['C'] = df['C'] + assert_frame_equal(df2, expected) def test_setitem_frame_align(self): piece = self.frame.ix[:2, ['A', 'B']] diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 75aa208e0c6b2..297c744b96f28 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -724,6 +724,18 @@ def test_xs_multiindex(self): expected = df.iloc[:,0:2].loc[:,'a'] assert_frame_equal(result,expected) + def test_setitem_dtype_upcast(self): + + # GH3216 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df['c'] = np.nan + self.assert_(df['c'].dtype == np.float64) + + df.ix[0,'c'] = 'foo' + expected = DataFrame([{"a": 1, "c" : 'foo'}, {"a": 3, "b": 2, "c" : np.nan}]) + assert_frame_equal(df,expected) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index bd9e61ba5d89b..830c8c07c24da 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2314,6 +2314,13 @@ def test_update(self): expected = Series([1.5, 3.5, 3., 5., np.nan]) assert_series_equal(s, expected) + # GH 3217 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df['c'] = np.nan + + # this will fail as long as series is a sub-class of ndarray + ##### df['c'].update(Series(['foo'],index=[0])) ##### + def test_corr(self): _skip_if_no_scipy()