Skip to content

Commit 7e24cc1

Browse files
Merge remote-tracking branch 'upstream/master' into typing
2 parents 6f8a1e8 + e0c63b4 commit 7e24cc1

File tree

13 files changed

+131
-72
lines changed

13 files changed

+131
-72
lines changed

doc/source/whatsnew/v0.25.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Groupby/resample/rolling
7878
^^^^^^^^^^^^^^^^^^^^^^^^
7979

8080
- Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`).
81+
- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`)
8182
-
8283
-
8384
-

pandas/_libs/tslib.pyx

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -344,14 +344,13 @@ def array_with_unit_to_datetime(ndarray values, object unit,
344344
# try a quick conversion to i8
345345
# if we have nulls that are not type-compat
346346
# then need to iterate
347-
try:
347+
if values.dtype.kind == "i":
348+
# Note: this condition makes the casting="same_kind" redundant
348349
iresult = values.astype('i8', casting='same_kind', copy=False)
349350
mask = iresult == NPY_NAT
350351
iresult[mask] = 0
351352
fvalues = iresult.astype('f8') * m
352353
need_to_iterate = False
353-
except:
354-
pass
355354

356355
# check the bounds
357356
if not need_to_iterate:
@@ -406,7 +405,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
406405
elif is_ignore:
407406
raise AssertionError
408407
iresult[i] = NPY_NAT
409-
except:
408+
except OverflowError:
410409
if is_raise:
411410
raise OutOfBoundsDatetime(
412411
"cannot convert input {val} with the unit "
@@ -447,7 +446,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
447446
else:
448447
try:
449448
oresult[i] = Timestamp(cast_from_unit(val, unit))
450-
except:
449+
except OverflowError:
451450
oresult[i] = val
452451

453452
elif isinstance(val, str):
@@ -574,7 +573,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
574573
# datetimes/strings, then we must coerce)
575574
try:
576575
iresult[i] = cast_from_unit(val, 'ns')
577-
except:
576+
except OverflowError:
578577
iresult[i] = NPY_NAT
579578

580579
elif isinstance(val, str):

pandas/_libs/tslibs/frequencies.pyx

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ cpdef get_freq_code(freqstr):
138138
-------
139139
return : tuple of base frequency code and stride (mult)
140140
141+
Raises
142+
------
143+
TypeError : if passed a tuple witth incorrect types
144+
141145
Examples
142146
--------
143147
>>> get_freq_code('3D')
@@ -156,16 +160,16 @@ cpdef get_freq_code(freqstr):
156160
if is_integer_object(freqstr[0]) and is_integer_object(freqstr[1]):
157161
# e.g., freqstr = (2000, 1)
158162
return freqstr
163+
elif is_integer_object(freqstr[0]):
164+
# Note: passing freqstr[1] below will raise TypeError if that
165+
# is not a str
166+
code = _period_str_to_code(freqstr[1])
167+
stride = freqstr[0]
168+
return code, stride
159169
else:
160170
# e.g., freqstr = ('T', 5)
161-
try:
162-
code = _period_str_to_code(freqstr[0])
163-
stride = freqstr[1]
164-
except:
165-
if is_integer_object(freqstr[1]):
166-
raise
167-
code = _period_str_to_code(freqstr[1])
168-
stride = freqstr[0]
171+
code = _period_str_to_code(freqstr[0])
172+
stride = freqstr[1]
169173
return code, stride
170174

171175
if is_integer_object(freqstr):
@@ -177,7 +181,7 @@ cpdef get_freq_code(freqstr):
177181
return code, stride
178182

179183

180-
cpdef _base_and_stride(freqstr):
184+
cpdef _base_and_stride(str freqstr):
181185
"""
182186
Return base freq and stride info from string representation
183187
@@ -207,7 +211,7 @@ cpdef _base_and_stride(freqstr):
207211
return base, stride
208212

209213

210-
cpdef _period_str_to_code(freqstr):
214+
cpdef _period_str_to_code(str freqstr):
211215
freqstr = _lite_rule_alias.get(freqstr, freqstr)
212216

213217
if freqstr not in _dont_uppercase:

pandas/_libs/tslibs/parsing.pyx

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -587,15 +587,11 @@ def try_parse_dates(object[:] values, parser=None,
587587
else:
588588
parse_date = parser
589589

590-
try:
591-
for i in range(n):
592-
if values[i] == '':
593-
result[i] = np.nan
594-
else:
595-
result[i] = parse_date(values[i])
596-
except Exception:
597-
# raise if passed parser and it failed
598-
raise
590+
for i in range(n):
591+
if values[i] == '':
592+
result[i] = np.nan
593+
else:
594+
result[i] = parse_date(values[i])
599595

600596
return result.base # .base to access underlying ndarray
601597

@@ -814,7 +810,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
814810
if dt_str_parse is None or dt_str_split is None:
815811
return None
816812

817-
if not isinstance(dt_str, (str, unicode)):
813+
if not isinstance(dt_str, str):
818814
return None
819815

820816
day_attribute_and_format = (('day',), '%d', 2)
@@ -840,19 +836,16 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
840836

841837
try:
842838
parsed_datetime = dt_str_parse(dt_str, dayfirst=dayfirst)
843-
except:
839+
except (ValueError, OverflowError):
844840
# In case the datetime can't be parsed, its format cannot be guessed
845841
return None
846842

847843
if parsed_datetime is None:
848844
return None
849845

850-
try:
851-
tokens = dt_str_split(dt_str)
852-
except:
853-
# In case the datetime string can't be split, its format cannot
854-
# be guessed
855-
return None
846+
# the default dt_str_split from dateutil will never raise here; we assume
847+
# that any user-provided function will not either.
848+
tokens = dt_str_split(dt_str)
856849

857850
format_guess = [None] * len(tokens)
858851
found_attrs = set()

pandas/core/groupby/generic.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,11 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
324324
if cast:
325325
result[item] = self._try_cast(result[item], data)
326326

327-
except ValueError:
327+
except ValueError as err:
328+
if "Must produce aggregated value" in str(err):
329+
# raised in _aggregate_named, handle at higher level
330+
# see test_apply_with_mutated_index
331+
raise
328332
cannot_agg.append(item)
329333
continue
330334
except TypeError as e:
@@ -1009,7 +1013,7 @@ def _aggregate_named(self, func, *args, **kwargs):
10091013
group.name = name
10101014
output = func(group, *args, **kwargs)
10111015
if isinstance(output, (Series, Index, np.ndarray)):
1012-
raise Exception("Must produce aggregated value")
1016+
raise ValueError("Must produce aggregated value")
10131017
result[name] = self._try_cast(output, group)
10141018

10151019
return result

pandas/core/groupby/groupby.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,26 +2263,28 @@ def _get_cythonized_result(
22632263
base_func = getattr(libgroupby, how)
22642264

22652265
for name, obj in self._iterate_slices():
2266+
values = obj._data._values
2267+
22662268
if aggregate:
22672269
result_sz = ngroups
22682270
else:
2269-
result_sz = len(obj.values)
2271+
result_sz = len(values)
22702272

22712273
if not cython_dtype:
2272-
cython_dtype = obj.values.dtype
2274+
cython_dtype = values.dtype
22732275

22742276
result = np.zeros(result_sz, dtype=cython_dtype)
22752277
func = partial(base_func, result, labels)
22762278
inferences = None
22772279

22782280
if needs_values:
2279-
vals = obj.values
2281+
vals = values
22802282
if pre_processing:
22812283
vals, inferences = pre_processing(vals)
22822284
func = partial(func, vals)
22832285

22842286
if needs_mask:
2285-
mask = isna(obj.values).view(np.uint8)
2287+
mask = isna(values).view(np.uint8)
22862288
func = partial(func, mask)
22872289

22882290
if needs_ngroups:
@@ -2291,7 +2293,7 @@ def _get_cythonized_result(
22912293
func(**kwargs) # Call func to modify indexer values in place
22922294

22932295
if result_is_index:
2294-
result = algorithms.take_nd(obj.values, result)
2296+
result = algorithms.take_nd(values, result)
22952297

22962298
if post_processing:
22972299
result = post_processing(result, inferences)

pandas/core/indexes/accessors.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -326,18 +326,15 @@ def __new__(cls, data):
326326
if orig is not None:
327327
data = Series(orig.values.categories, name=orig.name, copy=False)
328328

329-
try:
330-
if is_datetime64_dtype(data.dtype):
331-
return DatetimeProperties(data, orig)
332-
elif is_datetime64tz_dtype(data.dtype):
333-
return DatetimeProperties(data, orig)
334-
elif is_timedelta64_dtype(data.dtype):
335-
return TimedeltaProperties(data, orig)
336-
elif is_period_arraylike(data):
337-
return PeriodProperties(data, orig)
338-
elif is_datetime_arraylike(data):
339-
return DatetimeProperties(data, orig)
340-
except Exception:
341-
pass # we raise an attribute error anyway
329+
if is_datetime64_dtype(data.dtype):
330+
return DatetimeProperties(data, orig)
331+
elif is_datetime64tz_dtype(data.dtype):
332+
return DatetimeProperties(data, orig)
333+
elif is_timedelta64_dtype(data.dtype):
334+
return TimedeltaProperties(data, orig)
335+
elif is_period_arraylike(data):
336+
return PeriodProperties(data, orig)
337+
elif is_datetime_arraylike(data):
338+
return DatetimeProperties(data, orig)
342339

343340
raise AttributeError("Can only use .dt accessor with datetimelike values")

pandas/core/indexes/frozen.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ def difference(self, other):
7171
__add__ = __iadd__ = union
7272

7373
def __getitem__(self, n):
74-
# Python 3 compat
7574
if isinstance(n, slice):
7675
return self.__class__(super().__getitem__(n))
7776
return super().__getitem__(n)

pandas/io/common.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ def __next__(self):
9090

9191

9292
def _is_url(url) -> bool:
93-
"""Check to see if a URL has a valid protocol.
93+
"""
94+
Check to see if a URL has a valid protocol.
9495
9596
Parameters
9697
----------
@@ -101,10 +102,9 @@ def _is_url(url) -> bool:
101102
isurl : bool
102103
If `url` has a valid protocol return True otherwise False.
103104
"""
104-
try:
105-
return parse_url(url).scheme in _VALID_URLS
106-
except Exception:
105+
if not isinstance(url, str):
107106
return False
107+
return parse_url(url).scheme in _VALID_URLS
108108

109109

110110
def _expand_user(
@@ -171,18 +171,16 @@ def _stringify_path(
171171

172172
def is_s3_url(url) -> bool:
173173
"""Check for an s3, s3n, or s3a url"""
174-
try:
175-
return parse_url(url).scheme in ["s3", "s3n", "s3a"]
176-
except Exception:
174+
if not isinstance(url, str):
177175
return False
176+
return parse_url(url).scheme in ["s3", "s3n", "s3a"]
178177

179178

180179
def is_gcs_url(url) -> bool:
181180
"""Check for a gcs url"""
182-
try:
183-
return parse_url(url).scheme in ["gcs", "gs"]
184-
except Exception:
181+
if not isinstance(url, str):
185182
return False
183+
return parse_url(url).scheme in ["gcs", "gs"]
186184

187185

188186
def urlopen(*args, **kwargs):

pandas/io/formats/format.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import codecs
77
from contextlib import contextmanager
8+
from datetime import tzinfo
89
import decimal
910
from functools import partial
1011
from io import StringIO
@@ -27,8 +28,6 @@
2728
)
2829
from unicodedata import east_asian_width
2930

30-
from dateutil.tz.tz import tzutc
31-
from dateutil.zoneinfo import tzfile
3231
import numpy as np
3332

3433
from pandas._config.config import get_option, set_option
@@ -1557,9 +1556,7 @@ def _is_dates_only(
15571556

15581557

15591558
def _format_datetime64(
1560-
x: Union[NaTType, Timestamp],
1561-
tz: Optional[Union[tzfile, tzutc]] = None,
1562-
nat_rep: str = "NaT",
1559+
x: Union[NaTType, Timestamp], tz: Optional[tzinfo] = None, nat_rep: str = "NaT"
15631560
) -> str:
15641561
if x is None or (is_scalar(x) and isna(x)):
15651562
return nat_rep

pandas/io/parsers.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,6 @@ def _clean_options(self, options, engine):
10641064
)
10651065

10661066
if result.get(arg, depr_default) != depr_default:
1067-
# raise Exception(result.get(arg, depr_default), depr_default)
10681067
depr_warning += msg + "\n\n"
10691068
else:
10701069
result[arg] = parser_default

pandas/io/pickle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ def read_pickle(path, compression="infer"):
153153
# We want to silence any warnings about, e.g. moved modules.
154154
warnings.simplefilter("ignore", Warning)
155155
return pickle.load(f)
156-
except Exception: # noqa: E722
156+
except Exception:
157157
try:
158158
return pc.load(f, encoding=None)
159-
except Exception: # noqa: E722
159+
except Exception:
160160
return pc.load(f, encoding="latin1")
161161
finally:
162162
f.close()

0 commit comments

Comments
 (0)