From fc65e3a40ce34ae6405f48012e55d2f9d08574e7 Mon Sep 17 00:00:00 2001 From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com> Date: Tue, 18 Apr 2023 16:37:14 +0200 Subject: [PATCH] undo changes and adding None as an optional argument type for validate argument of join and merge method Change byteorder argument typing for to_stata method to literal, added definition in pandas/_typing.py Change if_exists argument typing for to_gbq method to literal, added definition in pandas/_typing.py Change orient argument typing for from_dict method to literal, added definition in pandas/_typing.py Change how argument typing for to_timestamp method to literal, added definition in pandas/_typing.py Change validate argument typing for merge and join methods to literal, added definition in pandas/_typing.py Change na_action arguments typing for applymap method to literal, added definition in pandas/_typing.py Change join and errors arguments typing for update method to litaral, added definition in pandas/_typing.py Change keep argument typing for nlargest and nsallest to litaera, added definition in pandas/_typing.py Specify the kind and na_position more precisely in sort_values, reusing type definitions in pandas/_typing.py --- pandas/_typing.py | 40 ++++++++++++++++++++++++++++++++++++++++ pandas/core/frame.py | 39 ++++++++++++++++++++++++++------------- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index de02a549856ab..0dfe9d2b7d0b5 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -132,6 +132,8 @@ ] Timezone = Union[str, tzinfo] +ToTimestampHow = Literal["s", "e", "start", "end"] + # NDFrameT is stricter and ensures that the same subclass of NDFrame always is # used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a # Series is passed into a function, a Series is always returned and if a DataFrame is @@ -303,6 +305,9 @@ def closed(self) -> bool: str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]] ] +# Arguments for nsmalles and n_largest +NsmallestNlargestKeep = Literal["first", "last", "all"] + # Arguments for fillna() FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"] @@ -372,9 +377,29 @@ def closed(self) -> bool: # merge MergeHow = Literal["left", "right", "inner", "outer", "cross"] +MergeValidate = Literal[ + "one_to_one", + "1:1", + "one_to_many", + "1:m", + "many_to_one", + "m:1", + "many_to_many", + "m:m", +] # join JoinHow = Literal["left", "right", "inner", "outer"] +JoinValidate = Literal[ + "one_to_one", + "1:1", + "one_to_many", + "1:m", + "many_to_one", + "m:1", + "many_to_many", + "m:m", +] MatplotlibColor = Union[str, Sequence[float]] TimeGrouperOrigin = Union[ @@ -390,3 +415,18 @@ def closed(self) -> bool: ] AlignJoin = Literal["outer", "inner", "left", "right"] DtypeBackend = Literal["pyarrow", "numpy_nullable"] + +# update +UpdateJoin = Literal["left"] + +# applymap +NaAction = Literal["None", "ignore"] + +# from_dict[ +FromDictOrient = Literal["columns", "index", "tight"] + +# to_gbc +ToGbqIfexist = Literal["fail", "replace", "append"] + +# to_stata +ToStataByteorder = Literal[">", "<", "little", "big"] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5341b87c39676..085e2c6eb2182 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -219,12 +219,17 @@ FloatFormatType, FormattersType, Frequency, + FromDictOrient, IgnoreRaise, IndexKeyFunc, IndexLabel, + JoinValidate, Level, MergeHow, + MergeValidate, + NaAction, NaPosition, + NsmallestNlargestKeep, PythonFuncType, QuantileInterpolation, ReadBuffer, @@ -234,6 +239,10 @@ SortKind, StorageOptions, Suffixes, + ToGbqIfexist, + ToStataByteorder, + ToTimestampHow, + UpdateJoin, ValueKeyFunc, WriteBuffer, npt, @@ -1637,7 +1646,7 @@ def __rmatmul__(self, other) -> DataFrame: def from_dict( cls, data: dict, - orient: str = "columns", + orient: FromDictOrient = "columns", dtype: Dtype | None = None, columns: Axes | None = None, ) -> DataFrame: @@ -1981,7 +1990,7 @@ def to_gbq( project_id: str | None = None, chunksize: int | None = None, reauth: bool = False, - if_exists: str = "fail", + if_exists: ToGbqIfexist = "fail", auth_local_webserver: bool = True, table_schema: list[dict[str, str]] | None = None, location: str | None = None, @@ -2535,7 +2544,7 @@ def to_stata( *, convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: str | None = None, + byteorder: ToStataByteorder | None = None, time_stamp: datetime.datetime | None = None, data_label: str | None = None, variable_labels: dict[Hashable, str] | None = None, @@ -6521,8 +6530,8 @@ def sort_values( axis: Axis = ..., ascending=..., inplace: Literal[False] = ..., - kind: str = ..., - na_position: str = ..., + kind: SortKind = ..., + na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., ) -> DataFrame: @@ -7077,7 +7086,9 @@ def value_counts( return counts - def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame: + def nlargest( + self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first" + ) -> DataFrame: """ Return the first `n` rows ordered by `columns` in descending order. @@ -7184,7 +7195,9 @@ def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFram """ return selectn.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest() - def nsmallest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame: + def nsmallest( + self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first" + ) -> DataFrame: """ Return the first `n` rows ordered by `columns` in ascending order. @@ -8348,10 +8361,10 @@ def combiner(x, y): def update( self, other, - join: str = "left", + join: UpdateJoin = "left", overwrite: bool = True, filter_func=None, - errors: str = "ignore", + errors: IgnoreRaise = "ignore", ) -> None: """ Modify in place using non-NA values from another DataFrame. @@ -9857,7 +9870,7 @@ def infer(x): return self.apply(infer).__finalize__(self, "map") def applymap( - self, func: PythonFuncType, na_action: str | None = None, **kwargs + self, func: PythonFuncType, na_action: NaAction = None, **kwargs ) -> DataFrame: """ Apply a function to a Dataframe elementwise. @@ -9969,7 +9982,7 @@ def join( lsuffix: str = "", rsuffix: str = "", sort: bool = False, - validate: str | None = None, + validate: JoinValidate | None = None, ) -> DataFrame: """ Join columns of another DataFrame. @@ -10211,7 +10224,7 @@ def merge( suffixes: Suffixes = ("_x", "_y"), copy: bool | None = None, indicator: str | bool = False, - validate: str | None = None, + validate: MergeValidate | None = None, ) -> DataFrame: from pandas.core.reshape.merge import merge @@ -11506,7 +11519,7 @@ def quantile( def to_timestamp( self, freq: Frequency | None = None, - how: str = "start", + how: ToTimestampHow = "start", axis: Axis = 0, copy: bool | None = None, ) -> DataFrame: