From fc65e3a40ce34ae6405f48012e55d2f9d08574e7 Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Tue, 18 Apr 2023 16:37:14 +0200
Subject: [PATCH] undo changes and adding None as an optional argument type for
 validate argument of join and merge method

Change byteorder argument typing for to_stata method to literal, added definition in pandas/_typing.py

Change if_exists argument typing for to_gbq method to literal, added definition in pandas/_typing.py

Change orient argument typing for from_dict method to literal, added definition in pandas/_typing.py

Change how argument typing for to_timestamp method to literal, added definition in pandas/_typing.py

Change validate argument typing for merge and join methods to literal, added definition in pandas/_typing.py

Change na_action arguments typing for applymap method to literal, added definition in pandas/_typing.py

Change join and errors arguments typing for update method to litaral, added definition in pandas/_typing.py

Change keep argument typing for nlargest and nsallest to litaera, added definition in pandas/_typing.py

Specify the kind and na_position more precisely in sort_values, reusing type definitions in pandas/_typing.py
---
 pandas/_typing.py    | 40 ++++++++++++++++++++++++++++++++++++++++
 pandas/core/frame.py | 39 ++++++++++++++++++++++++++-------------
 2 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index de02a549856ab..0dfe9d2b7d0b5 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -132,6 +132,8 @@
 ]
 Timezone = Union[str, tzinfo]
 
+ToTimestampHow = Literal["s", "e", "start", "end"]
+
 # NDFrameT is stricter and ensures that the same subclass of NDFrame always is
 # used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
 # Series is passed into a function, a Series is always returned and if a DataFrame is
@@ -303,6 +305,9 @@ def closed(self) -> bool:
     str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]]
 ]
 
+# Arguments for nsmalles and n_largest
+NsmallestNlargestKeep = Literal["first", "last", "all"]
+
 # Arguments for fillna()
 FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
 
@@ -372,9 +377,29 @@ def closed(self) -> bool:
 
 # merge
 MergeHow = Literal["left", "right", "inner", "outer", "cross"]
+MergeValidate = Literal[
+    "one_to_one",
+    "1:1",
+    "one_to_many",
+    "1:m",
+    "many_to_one",
+    "m:1",
+    "many_to_many",
+    "m:m",
+]
 
 # join
 JoinHow = Literal["left", "right", "inner", "outer"]
+JoinValidate = Literal[
+    "one_to_one",
+    "1:1",
+    "one_to_many",
+    "1:m",
+    "many_to_one",
+    "m:1",
+    "many_to_many",
+    "m:m",
+]
 
 MatplotlibColor = Union[str, Sequence[float]]
 TimeGrouperOrigin = Union[
@@ -390,3 +415,18 @@ def closed(self) -> bool:
 ]
 AlignJoin = Literal["outer", "inner", "left", "right"]
 DtypeBackend = Literal["pyarrow", "numpy_nullable"]
+
+# update
+UpdateJoin = Literal["left"]
+
+# applymap
+NaAction = Literal["None", "ignore"]
+
+# from_dict[
+FromDictOrient = Literal["columns", "index", "tight"]
+
+# to_gbc
+ToGbqIfexist = Literal["fail", "replace", "append"]
+
+# to_stata
+ToStataByteorder = Literal[">", "<", "little", "big"]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5341b87c39676..085e2c6eb2182 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -219,12 +219,17 @@
         FloatFormatType,
         FormattersType,
         Frequency,
+        FromDictOrient,
         IgnoreRaise,
         IndexKeyFunc,
         IndexLabel,
+        JoinValidate,
         Level,
         MergeHow,
+        MergeValidate,
+        NaAction,
         NaPosition,
+        NsmallestNlargestKeep,
         PythonFuncType,
         QuantileInterpolation,
         ReadBuffer,
@@ -234,6 +239,10 @@
         SortKind,
         StorageOptions,
         Suffixes,
+        ToGbqIfexist,
+        ToStataByteorder,
+        ToTimestampHow,
+        UpdateJoin,
         ValueKeyFunc,
         WriteBuffer,
         npt,
@@ -1637,7 +1646,7 @@ def __rmatmul__(self, other) -> DataFrame:
     def from_dict(
         cls,
         data: dict,
-        orient: str = "columns",
+        orient: FromDictOrient = "columns",
         dtype: Dtype | None = None,
         columns: Axes | None = None,
     ) -> DataFrame:
@@ -1981,7 +1990,7 @@ def to_gbq(
         project_id: str | None = None,
         chunksize: int | None = None,
         reauth: bool = False,
-        if_exists: str = "fail",
+        if_exists: ToGbqIfexist = "fail",
         auth_local_webserver: bool = True,
         table_schema: list[dict[str, str]] | None = None,
         location: str | None = None,
@@ -2535,7 +2544,7 @@ def to_stata(
         *,
         convert_dates: dict[Hashable, str] | None = None,
         write_index: bool = True,
-        byteorder: str | None = None,
+        byteorder: ToStataByteorder | None = None,
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
@@ -6521,8 +6530,8 @@ def sort_values(
         axis: Axis = ...,
         ascending=...,
         inplace: Literal[False] = ...,
-        kind: str = ...,
-        na_position: str = ...,
+        kind: SortKind = ...,
+        na_position: NaPosition = ...,
         ignore_index: bool = ...,
         key: ValueKeyFunc = ...,
     ) -> DataFrame:
@@ -7077,7 +7086,9 @@ def value_counts(
 
         return counts
 
-    def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame:
+    def nlargest(
+        self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first"
+    ) -> DataFrame:
         """
         Return the first `n` rows ordered by `columns` in descending order.
 
@@ -7184,7 +7195,9 @@ def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFram
         """
         return selectn.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest()
 
-    def nsmallest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame:
+    def nsmallest(
+        self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first"
+    ) -> DataFrame:
         """
         Return the first `n` rows ordered by `columns` in ascending order.
 
@@ -8348,10 +8361,10 @@ def combiner(x, y):
     def update(
         self,
         other,
-        join: str = "left",
+        join: UpdateJoin = "left",
         overwrite: bool = True,
         filter_func=None,
-        errors: str = "ignore",
+        errors: IgnoreRaise = "ignore",
     ) -> None:
         """
         Modify in place using non-NA values from another DataFrame.
@@ -9857,7 +9870,7 @@ def infer(x):
         return self.apply(infer).__finalize__(self, "map")
 
     def applymap(
-        self, func: PythonFuncType, na_action: str | None = None, **kwargs
+        self, func: PythonFuncType, na_action: NaAction = None, **kwargs
     ) -> DataFrame:
         """
         Apply a function to a Dataframe elementwise.
@@ -9969,7 +9982,7 @@ def join(
         lsuffix: str = "",
         rsuffix: str = "",
         sort: bool = False,
-        validate: str | None = None,
+        validate: JoinValidate | None = None,
     ) -> DataFrame:
         """
         Join columns of another DataFrame.
@@ -10211,7 +10224,7 @@ def merge(
         suffixes: Suffixes = ("_x", "_y"),
         copy: bool | None = None,
         indicator: str | bool = False,
-        validate: str | None = None,
+        validate: MergeValidate | None = None,
     ) -> DataFrame:
         from pandas.core.reshape.merge import merge
 
@@ -11506,7 +11519,7 @@ def quantile(
     def to_timestamp(
         self,
         freq: Frequency | None = None,
-        how: str = "start",
+        how: ToTimestampHow = "start",
         axis: Axis = 0,
         copy: bool | None = None,
     ) -> DataFrame: