Merge remote-tracking branch 'upstream/master' into typing

simonjayhawkins · simonjayhawkins · commit 7e24cc145dfb · 2019-09-09T17:01:02.000+01:00
diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
@@ -78,6 +78,7 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`).
+- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`)
 -
 -
 -
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -344,14 +344,13 @@ def array_with_unit_to_datetime(ndarray values, object unit,
         # try a quick conversion to i8
         # if we have nulls that are not type-compat
         # then need to iterate
-        try:
+        if values.dtype.kind == "i":
+            # Note: this condition makes the casting="same_kind" redundant
             iresult = values.astype('i8', casting='same_kind', copy=False)
             mask = iresult == NPY_NAT
             iresult[mask] = 0
             fvalues = iresult.astype('f8') * m
             need_to_iterate = False
-        except:
-            pass
 
         # check the bounds
         if not need_to_iterate:
@@ -406,7 +405,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
-                    except:
+                    except OverflowError:
                         if is_raise:
                             raise OutOfBoundsDatetime(
                                 "cannot convert input {val} with the unit "
@@ -447,7 +446,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
             else:
                 try:
                     oresult[i] = Timestamp(cast_from_unit(val, unit))
-                except:
+                except OverflowError:
                     oresult[i] = val
 
         elif isinstance(val, str):
@@ -574,7 +573,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                         # datetimes/strings, then we must coerce)
                         try:
                             iresult[i] = cast_from_unit(val, 'ns')
-                        except:
+                        except OverflowError:
                             iresult[i] = NPY_NAT
 
                 elif isinstance(val, str):
diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx
@@ -138,6 +138,10 @@ cpdef get_freq_code(freqstr):
     -------
     return : tuple of base frequency code and stride (mult)
 
+    Raises
+    ------
+    TypeError : if passed a tuple witth incorrect types
+
     Examples
     --------
     >>> get_freq_code('3D')
@@ -156,16 +160,16 @@ cpdef get_freq_code(freqstr):
         if is_integer_object(freqstr[0]) and is_integer_object(freqstr[1]):
             # e.g., freqstr = (2000, 1)
             return freqstr
+        elif is_integer_object(freqstr[0]):
+            # Note: passing freqstr[1] below will raise TypeError if that
+            #  is not a str
+            code = _period_str_to_code(freqstr[1])
+            stride = freqstr[0]
+            return code, stride
         else:
             # e.g., freqstr = ('T', 5)
-            try:
-                code = _period_str_to_code(freqstr[0])
-                stride = freqstr[1]
-            except:
-                if is_integer_object(freqstr[1]):
-                    raise
-                code = _period_str_to_code(freqstr[1])
-                stride = freqstr[0]
+            code = _period_str_to_code(freqstr[0])
+            stride = freqstr[1]
             return code, stride
 
     if is_integer_object(freqstr):
@@ -177,7 +181,7 @@ cpdef get_freq_code(freqstr):
     return code, stride
 
 
-cpdef _base_and_stride(freqstr):
+cpdef _base_and_stride(str freqstr):
     """
     Return base freq and stride info from string representation
 
@@ -207,7 +211,7 @@ cpdef _base_and_stride(freqstr):
     return base, stride
 
 
-cpdef _period_str_to_code(freqstr):
+cpdef _period_str_to_code(str freqstr):
     freqstr = _lite_rule_alias.get(freqstr, freqstr)
 
     if freqstr not in _dont_uppercase:
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
@@ -587,15 +587,11 @@ def try_parse_dates(object[:] values, parser=None,
     else:
         parse_date = parser
 
-        try:
-            for i in range(n):
-                if values[i] == '':
-                    result[i] = np.nan
-                else:
-                    result[i] = parse_date(values[i])
-        except Exception:
-            # raise if passed parser and it failed
-            raise
+        for i in range(n):
+            if values[i] == '':
+                result[i] = np.nan
+            else:
+                result[i] = parse_date(values[i])
 
     return result.base  # .base to access underlying ndarray
 
@@ -814,7 +810,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
     if dt_str_parse is None or dt_str_split is None:
         return None
 
-    if not isinstance(dt_str, (str, unicode)):
+    if not isinstance(dt_str, str):
         return None
 
     day_attribute_and_format = (('day',), '%d', 2)
@@ -840,19 +836,16 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
 
     try:
         parsed_datetime = dt_str_parse(dt_str, dayfirst=dayfirst)
-    except:
+    except (ValueError, OverflowError):
         # In case the datetime can't be parsed, its format cannot be guessed
         return None
 
     if parsed_datetime is None:
         return None
 
-    try:
-        tokens = dt_str_split(dt_str)
-    except:
-        # In case the datetime string can't be split, its format cannot
-        # be guessed
-        return None
+    # the default dt_str_split from dateutil will never raise here; we assume
+    #  that any user-provided function will not either.
+    tokens = dt_str_split(dt_str)
 
     format_guess = [None] * len(tokens)
     found_attrs = set()
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -324,7 +324,11 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
                 if cast:
                     result[item] = self._try_cast(result[item], data)
 
-            except ValueError:
+            except ValueError as err:
+                if "Must produce aggregated value" in str(err):
+                    # raised in _aggregate_named, handle at higher level
+                    #  see test_apply_with_mutated_index
+                    raise
                 cannot_agg.append(item)
                 continue
             except TypeError as e:
@@ -1009,7 +1013,7 @@ def _aggregate_named(self, func, *args, **kwargs):
             group.name = name
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
-                raise Exception("Must produce aggregated value")
+                raise ValueError("Must produce aggregated value")
             result[name] = self._try_cast(output, group)
 
         return result
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -2263,26 +2263,28 @@ def _get_cythonized_result(
         base_func = getattr(libgroupby, how)
 
         for name, obj in self._iterate_slices():
+            values = obj._data._values
+
             if aggregate:
                 result_sz = ngroups
             else:
-                result_sz = len(obj.values)
+                result_sz = len(values)
 
             if not cython_dtype:
-                cython_dtype = obj.values.dtype
+                cython_dtype = values.dtype
 
             result = np.zeros(result_sz, dtype=cython_dtype)
             func = partial(base_func, result, labels)
             inferences = None
 
             if needs_values:
-                vals = obj.values
+                vals = values
                 if pre_processing:
                     vals, inferences = pre_processing(vals)
                 func = partial(func, vals)
 
             if needs_mask:
-                mask = isna(obj.values).view(np.uint8)
+                mask = isna(values).view(np.uint8)
                 func = partial(func, mask)
 
             if needs_ngroups:
@@ -2291,7 +2293,7 @@ def _get_cythonized_result(
             func(**kwargs)  # Call func to modify indexer values in place
 
             if result_is_index:
-                result = algorithms.take_nd(obj.values, result)
+                result = algorithms.take_nd(values, result)
 
             if post_processing:
                 result = post_processing(result, inferences)
diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
@@ -326,18 +326,15 @@ def __new__(cls, data):
         if orig is not None:
             data = Series(orig.values.categories, name=orig.name, copy=False)
 
-        try:
-            if is_datetime64_dtype(data.dtype):
-                return DatetimeProperties(data, orig)
-            elif is_datetime64tz_dtype(data.dtype):
-                return DatetimeProperties(data, orig)
-            elif is_timedelta64_dtype(data.dtype):
-                return TimedeltaProperties(data, orig)
-            elif is_period_arraylike(data):
-                return PeriodProperties(data, orig)
-            elif is_datetime_arraylike(data):
-                return DatetimeProperties(data, orig)
-        except Exception:
-            pass  # we raise an attribute error anyway
+        if is_datetime64_dtype(data.dtype):
+            return DatetimeProperties(data, orig)
+        elif is_datetime64tz_dtype(data.dtype):
+            return DatetimeProperties(data, orig)
+        elif is_timedelta64_dtype(data.dtype):
+            return TimedeltaProperties(data, orig)
+        elif is_period_arraylike(data):
+            return PeriodProperties(data, orig)
+        elif is_datetime_arraylike(data):
+            return DatetimeProperties(data, orig)
 
         raise AttributeError("Can only use .dt accessor with datetimelike values")
diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py
@@ -71,7 +71,6 @@ def difference(self, other):
     __add__ = __iadd__ = union
 
     def __getitem__(self, n):
-        # Python 3 compat
         if isinstance(n, slice):
             return self.__class__(super().__getitem__(n))
         return super().__getitem__(n)
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -90,7 +90,8 @@ def __next__(self):
 
 
 def _is_url(url) -> bool:
-    """Check to see if a URL has a valid protocol.
+    """
+    Check to see if a URL has a valid protocol.
 
     Parameters
     ----------
@@ -101,10 +102,9 @@ def _is_url(url) -> bool:
     isurl : bool
         If `url` has a valid protocol return True otherwise False.
     """
-    try:
-        return parse_url(url).scheme in _VALID_URLS
-    except Exception:
+    if not isinstance(url, str):
         return False
+    return parse_url(url).scheme in _VALID_URLS
 
 
 def _expand_user(
@@ -171,18 +171,16 @@ def _stringify_path(
 
 def is_s3_url(url) -> bool:
     """Check for an s3, s3n, or s3a url"""
-    try:
-        return parse_url(url).scheme in ["s3", "s3n", "s3a"]
-    except Exception:
+    if not isinstance(url, str):
         return False
+    return parse_url(url).scheme in ["s3", "s3n", "s3a"]
 
 
 def is_gcs_url(url) -> bool:
     """Check for a gcs url"""
-    try:
-        return parse_url(url).scheme in ["gcs", "gs"]
-    except Exception:
+    if not isinstance(url, str):
         return False
+    return parse_url(url).scheme in ["gcs", "gs"]
 
 
 def urlopen(*args, **kwargs):
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -5,6 +5,7 @@
 
 import codecs
 from contextlib import contextmanager
+from datetime import tzinfo
 import decimal
 from functools import partial
 from io import StringIO
@@ -27,8 +28,6 @@
 )
 from unicodedata import east_asian_width
 
-from dateutil.tz.tz import tzutc
-from dateutil.zoneinfo import tzfile
 import numpy as np
 
 from pandas._config.config import get_option, set_option
@@ -1557,9 +1556,7 @@ def _is_dates_only(
 
 
 def _format_datetime64(
-    x: Union[NaTType, Timestamp],
-    tz: Optional[Union[tzfile, tzutc]] = None,
-    nat_rep: str = "NaT",
+    x: Union[NaTType, Timestamp], tz: Optional[tzinfo] = None, nat_rep: str = "NaT"
 ) -> str:
     if x is None or (is_scalar(x) and isna(x)):
         return nat_rep
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1064,7 +1064,6 @@ def _clean_options(self, options, engine):
             )
 
             if result.get(arg, depr_default) != depr_default:
-                # raise Exception(result.get(arg, depr_default), depr_default)
                 depr_warning += msg + "\n\n"
             else:
                 result[arg] = parser_default
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
@@ -153,10 +153,10 @@ def read_pickle(path, compression="infer"):
             # We want to silence any warnings about, e.g. moved modules.
             warnings.simplefilter("ignore", Warning)
             return pickle.load(f)
-    except Exception:  # noqa: E722
+    except Exception:
         try:
             return pc.load(f, encoding=None)
-        except Exception:  # noqa: E722
+        except Exception:
             return pc.load(f, encoding="latin1")
     finally:
         f.close()
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py

Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,7 @@ Groupby/resample/rolling`
`78`	`78`	`^^^^^^^^^^^^^^^^^^^^^^^^`
`79`	`79`
`80`	`80`	- Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`).
	`81`	+- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`)
`81`	`82`	`-`
`82`	`83`	`-`
`83`	`84`	`-`
Original file line number	Diff line number	Diff line change
`@@ -1064,7 +1064,6 @@ def _clean_options(self, options, engine):`
`1064`	`1064`	`)`
`1065`	`1065`
`1066`	`1066`	`if result.get(arg, depr_default) != depr_default:`
`1067`		`- # raise Exception(result.get(arg, depr_default), depr_default)`
`1068`	`1067`	`depr_warning += msg + "\n\n"`
`1069`	`1068`	`else:`
`1070`	`1069`	`result[arg] = parser_default`