Cleanups after merge

MichaelTiemannOSC · MichaelTiemannOSC · commit f0c7e64c7729 · 2023-08-14T21:20:31.000-04:00
First draft of 100% working test cases after (re)merging with changes extracted from these changes as part of PR hgrecco#196. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com>
diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py
@@ -33,7 +33,7 @@
 # from pint.facets.plain.unit import PlainUnit as _Unit
 
 if HAS_UNCERTAINTIES:
-    from uncertainties import UFloat, ufloat
+    from uncertainties import ufloat, UFloat
     from uncertainties import unumpy as unp
 
     _ufloat_nan = ufloat(np.nan, 0)
@@ -330,12 +330,6 @@ def __setitem__(self, key, value):
             # doing nothing here seems to be ok
             return
 
-        master_scalar = None
-        try:
-            master_scalar = next(i for i in self._data if pd.notna(i))
-        except StopIteration:
-            pass
-
         if isinstance(value, _Quantity):
             value = value.to(self.units).magnitude
         elif is_list_like(value) and len(value) > 0:
@@ -347,6 +341,36 @@ def __setitem__(self, key, value):
         key = check_array_indexer(self, key)
         # Filter out invalid values for our array type(s)
         try:
+            if HAS_UNCERTAINTIES and is_object_dtype(self._data):
+                from pandas.api.types import is_scalar, is_numeric_dtype
+
+                def value_to_ufloat(value):
+                    if pd.isna(value) or isinstance(value, UFloat):
+                        return value
+                    if is_numeric_dtype(type(value)):
+                        return ufloat(value, 0)
+                    raise ValueError
+
+                try:
+                    any_ufloats = next(
+                        True for i in self._data if isinstance(i, UFloat)
+                    )
+                    if any_ufloats:
+                        if is_scalar(key):
+                            if is_list_like(value):
+                                # cannot do many:1 setitem
+                                raise ValueError
+                            # 1:1 setitem
+                            value = value_to_ufloat(value)
+                        elif is_list_like(value):
+                            # many:many setitem
+                            value = [value_to_ufloat(v) for v in value]
+                        else:
+                            # broadcast 1:many
+                            value = value_to_ufloat(value)
+                except StopIteration:
+                    # If array is full of nothingness, we can put anything inside it
+                    pass
             self._data[key] = value
         except IndexError as e:
             msg = "Mask is wrong length. {}".format(e)
@@ -593,9 +617,7 @@ def _values_for_factorize(self):
         if arr.dtype.kind == "O":
             if HAS_UNCERTAINTIES and arr.size > 0:
                 # Canonicalize uncertain NaNs and pd.NA to np.nan
-                arr = arr.map(
-                    lambda x: self.dtype.na_value if x is pd.NA or unp.isnan(x) else x
-                )
+                arr = arr.map(lambda x: np.nan if x is pd.NA or unp.isnan(x) else x)
             return np.array(arr, copy=False), self.dtype.na_value
         return arr._values_for_factorize()
 
@@ -627,7 +649,7 @@ def value_counts(self, dropna=True):
             nafilt = data.map(lambda x: x is pd.NA or unp.isnan(x))
         else:
             nafilt = pd.isna(data)
-        na_value = self.dtype.na_value
+        na_value_for_index = pd.NA
         data = data[~nafilt]
         if HAS_UNCERTAINTIES and data.dtype.kind == "O":
             # This is a work-around for unhashable UFloats
@@ -643,7 +665,7 @@ def value_counts(self, dropna=True):
         array = [data_list.count(item) for item in index]
 
         if not dropna:
-            index.append(na_value)
+            index.append(na_value_for_index)
             array.append(nafilt.sum())
 
         return Series(np.asarray(array), index=index)
diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py
@@ -10,10 +10,22 @@
 
 try:
     import uncertainties.unumpy as unp
-    from uncertainties import ufloat, UFloat  # noqa: F401
+    from uncertainties import ufloat, UFloat
+    from uncertainties.core import AffineScalarFunc  # noqa: F401
+
+    def AffineScalarFunc__hash__(self):
+        if not self._linear_part.expanded():
+            self._linear_part.expand()
+        combo = tuple(iter(self._linear_part.linear_combo.items()))
+        if len(combo) > 1 or combo[0][1] != 1.0:
+            return hash(combo)
+        # The unique value that comes from a unique variable (which it also hashes to)
+        return id(combo[0][0])
+
+    AffineScalarFunc.__hash__ = AffineScalarFunc__hash__
 
-    HAS_UNCERTAINTIES = True
     _ufloat_nan = ufloat(np.nan, 0)
+    HAS_UNCERTAINTIES = True
 except ImportError:
     unp = np
     HAS_UNCERTAINTIES = False
@@ -170,8 +182,8 @@ def dtype():
 
 
 _base_numeric_dtypes = [float, int]
-_all_numeric_dtypes = (
-    _base_numeric_dtypes + [] if HAS_UNCERTAINTIES else [np.complex128]
+_all_numeric_dtypes = _base_numeric_dtypes + (
+    [] if HAS_UNCERTAINTIES else [np.complex128]
 )
 
 
@@ -650,23 +662,9 @@ def _get_exception(self, data, op_name):
             if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]:
                 return op_name, TypeError
         if op_name in ["__pow__", "__rpow__"]:
-            return DimensionalityError
-        complex128_dtype = pd.core.dtypes.dtypes.NumpyEADtype("complex128")
-        if (
-            (isinstance(obj, pd.Series) and obj.dtype == complex128_dtype)
-            or (
-                isinstance(obj, pd.DataFrame)
-                and any([dtype == complex128_dtype for dtype in obj.dtypes])
-            )
-            or (isinstance(other, pd.Series) and other.dtype == complex128_dtype)
-            or (
-                isinstance(other, pd.DataFrame)
-                and any([dtype == complex128_dtype for dtype in other.dtypes])
-            )
-        ):
-            if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]:
-                return TypeError
-        return super()._get_expected_exception(op_name, obj, other)
+            return op_name, DimensionalityError
+
+        return op_name, None
 
     # With Pint 0.21, series and scalar need to have compatible units for
     # the arithmetic to work
@@ -717,7 +715,9 @@ def test_divmod(self, data, USE_UNCERTAINTIES):
         self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, ser)
 
     @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True)
-    def test_divmod_series_array(self, data, data_for_twos):
+    def test_divmod_series_array(self, data, data_for_twos, USE_UNCERTAINTIES):
+        if USE_UNCERTAINTIES:
+            pytest.skip(reason="uncertainties does not implement divmod")
         ser = pd.Series(data)
         self._check_divmod_op(ser, divmod, data)
 
@@ -727,12 +727,6 @@ def test_divmod_series_array(self, data, data_for_twos):
         other = pd.Series(other)
         self._check_divmod_op(other, ops.rdivmod, ser)
 
-    @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True)
-    def test_divmod_series_array(self, data, data_for_twos, USE_UNCERTAINTIES):
-        if USE_UNCERTAINTIES:
-            pytest.skip(reason="uncertainties does not implement divmod")
-        super().test_divmod_series_array(data, data_for_twos)
-
 
 class TestComparisonOps(base.BaseComparisonOpsTests):
     def _compare_other(self, s, data, op_name, other):
@@ -871,16 +865,6 @@ def test_reduce_series(
             warnings.simplefilter("ignore", RuntimeWarning)
             self.check_reduce(s, op_name, skipna)
 
-    @pytest.mark.parametrize("skipna", [True, False])
-    def test_reduce_series_xx(self, data, all_numeric_reductions, skipna):
-        op_name = all_numeric_reductions
-        s = pd.Series(data)
-
-        # min/max with empty produce numpy warnings
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", RuntimeWarning)
-            self.check_reduce(s, op_name, skipna)
-
 
 class TestBooleanReduce(base.BaseBooleanReduceTests):
     def check_reduce(self, s, op_name, skipna):
@@ -922,7 +906,18 @@ class TestSetitem(base.BaseSetitemTests):
     @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True)
     def test_setitem_scalar_key_sequence_raise(self, data):
         # This can be removed when https://github.com/pandas-dev/pandas/pull/54441 is accepted
-        base.BaseSetitemTests.test_setitem_scalar_key_sequence_raise(self, data)
+        arr = data[:5].copy()
+        with pytest.raises((ValueError, TypeError)):
+            arr[0] = arr[[0, 1]]
+
+    def test_setitem_invalid(self, data, invalid_scalar):
+        # This can be removed when https://github.com/pandas-dev/pandas/pull/54441 is accepted
+        msg = ""  # messages vary by subclass, so we do not test it
+        with pytest.raises((ValueError, TypeError), match=msg):
+            data[0] = invalid_scalar
+
+        with pytest.raises((ValueError, TypeError), match=msg):
+            data[:] = invalid_scalar
 
     @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True)
     def test_setitem_2d_values(self, data):