From 4dbb97758e6833a872982bb6cb95960aa08f5f52 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 9 Dec 2022 17:15:09 +0100
Subject: [PATCH 1/7] DEV: remove downstream test packages from environment.yml

---
 environment.yml                         | 18 ++++++------------
 requirements-dev.txt                    | 14 ++++----------
 scripts/generate_pip_deps_from_conda.py |  2 +-
 3 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/environment.yml b/environment.yml
index 70884f4ca98a3..640e86045abdf 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,7 +3,7 @@ name: pandas-dev
 channels:
   - conda-forge
 dependencies:
-  - python=3.8
+  - python=3.10
   - pip
 
   # build dependencies
@@ -17,6 +17,7 @@ dependencies:
   - psutil
   - pytest-asyncio>=0.17
   - boto3
+  - coverage
 
   # required dependencies
   - python-dateutil
@@ -27,12 +28,14 @@ dependencies:
   - beautifulsoup4
   - blosc
   - brotlipy
+  - botocore
   - bottleneck
   - fastparquet
   - fsspec
   - html5lib
   - hypothesis
   - gcsfs
+  - ipython
   - jinja2
   - lxml
   - matplotlib
@@ -41,6 +44,7 @@ dependencies:
   - openpyxl
   - odfpy
   - pandas-gbq
+  - py
   - psycopg2
   - pyarrow<10
   - pymysql
@@ -60,17 +64,7 @@ dependencies:
 
   # downstream packages
   - aiobotocore<2.0.0  # GH#44311 pinned to fix docbuild
-  - botocore
-  - cftime
-  - dask
-  - ipython
-  - seaborn
-  - scikit-learn
-  - statsmodels
-  - coverage
-  - pandas-datareader
-  - pyyaml
-  - py
+  - dask-core
 
   # local testing dependencies
   - moto
diff --git a/requirements-dev.txt b/requirements-dev.txt
index caa3dd49add3b..8f4039ba9f665 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -10,18 +10,21 @@ pytest-xdist>=1.31
 psutil
 pytest-asyncio>=0.17
 boto3
+coverage
 python-dateutil
 numpy
 pytz
 beautifulsoup4
 blosc
 brotlipy
+botocore
 bottleneck
 fastparquet
 fsspec
 html5lib
 hypothesis
 gcsfs
+ipython
 jinja2
 lxml
 matplotlib
@@ -30,6 +33,7 @@ numexpr>=2.8.0
 openpyxl
 odfpy
 pandas-gbq
+py
 psycopg2-binary
 pyarrow<10
 pymysql
@@ -47,17 +51,7 @@ xlrd
 xlsxwriter
 zstandard
 aiobotocore<2.0.0
-botocore
-cftime
 dask
-ipython
-seaborn
-scikit-learn
-statsmodels
-coverage
-pandas-datareader
-pyyaml
-py
 moto
 flask
 asv
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index f25ac9a24b98b..8c2b0111949d2 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -24,8 +24,8 @@
 REMAP_VERSION = {"tzdata": "2022.1"}
 RENAME = {
     "pytables": "tables",
-    "geopandas-base": "geopandas",
     "psycopg2": "psycopg2-binary",
+    "dask-core": "dask",
 }
 
 

From 630d17ea7066b171f709969bde4823dcce852ddd Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 9 Dec 2022 20:22:01 +0100
Subject: [PATCH 2/7] undo python change, add seaborn-base

---
 environment.yml                         | 3 ++-
 requirements-dev.txt                    | 1 +
 scripts/generate_pip_deps_from_conda.py | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 640e86045abdf..c3a07080db55a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,7 +3,7 @@ name: pandas-dev
 channels:
   - conda-forge
 dependencies:
-  - python=3.10
+  - python=3.8
   - pip
 
   # build dependencies
@@ -65,6 +65,7 @@ dependencies:
   # downstream packages
   - aiobotocore<2.0.0  # GH#44311 pinned to fix docbuild
   - dask-core
+  - seaborn-base
 
   # local testing dependencies
   - moto
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 8f4039ba9f665..a6baa4cd9f004 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -52,6 +52,7 @@ xlsxwriter
 zstandard
 aiobotocore<2.0.0
 dask
+sseaborn
 moto
 flask
 asv
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index 8c2b0111949d2..fba53a7c171dd 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -26,6 +26,7 @@
     "pytables": "tables",
     "psycopg2": "psycopg2-binary",
     "dask-core": "dask",
+    "seaborn-base": "sseaborn",
 }
 
 

From fcac8febfc02bead2e89a17a0ac5c58c5c7de95f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 9 Dec 2022 21:19:41 +0100
Subject: [PATCH 3/7] typo

---
 requirements-dev.txt                    | 2 +-
 scripts/generate_pip_deps_from_conda.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index a6baa4cd9f004..ff83e2b985874 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -52,7 +52,7 @@ xlsxwriter
 zstandard
 aiobotocore<2.0.0
 dask
-sseaborn
+seaborn
 moto
 flask
 asv
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index fba53a7c171dd..8190104428724 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -26,7 +26,7 @@
     "pytables": "tables",
     "psycopg2": "psycopg2-binary",
     "dask-core": "dask",
-    "seaborn-base": "sseaborn",
+    "seaborn-base": "seaborn",
 }
 
 

From b1c70c719f4d5c3c0f3f2fbec95d3ecff4b38721 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 10 Dec 2022 20:02:50 +0000
Subject: [PATCH 4/7] use plain code block for statsmodels whatsnew note

---
 doc/source/whatsnew/v0.16.2.rst | 54 ++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.2.rst b/doc/source/whatsnew/v0.16.2.rst
index c6c134a383e11..ba20c3ba9ac43 100644
--- a/doc/source/whatsnew/v0.16.2.rst
+++ b/doc/source/whatsnew/v0.16.2.rst
@@ -61,21 +61,45 @@ In the example above, the functions ``f``, ``g``, and ``h`` each expected the Da
 When the function you wish to apply takes its data anywhere other than the first argument, pass a tuple
 of ``(function, keyword)`` indicating where the DataFrame should flow. For example:
 
-.. ipython:: python
-   :okwarning:
-
-   import statsmodels.formula.api as sm
-
-   bb = pd.read_csv("data/baseball.csv", index_col="id")
-
-   # sm.ols takes (formula, data)
-   (
-       bb.query("h > 0")
-       .assign(ln_h=lambda df: np.log(df.h))
-       .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
-       .fit()
-       .summary()
-   )
+.. code-block:: ipython
+
+    In [1]: import statsmodels.formula.api as sm
+
+    In [2]: bb = pd.read_csv('data/baseball.csv', index_col='id')
+
+    # sm.poisson takes (formula, data)
+    In [3]: (bb.query('h > 0')
+    ...:    .assign(ln_h = lambda df: np.log(df.h))
+    ...:    .pipe((sm.poisson, 'data'), 'hr ~ ln_h + year + g + C(lg)')
+    ...:    .fit()
+    ...:    .summary()
+    ...: )
+    ...:
+    Optimization terminated successfully.
+            Current function value: 2.116284
+            Iterations 24
+    Out[3]:
+    <class 'statsmodels.iolib.summary.Summary'>
+    """
+                            Poisson Regression Results
+    ==============================================================================
+    Dep. Variable:                     hr   No. Observations:                   68
+    Model:                        Poisson   Df Residuals:                       63
+    Method:                           MLE   Df Model:                            4
+    Date:                Sat, 13 Jun 2015   Pseudo R-squ.:                  0.6878
+    Time:                        15:07:13   Log-Likelihood:                -143.91
+    converged:                       True   LL-Null:                       -460.91
+                                            LLR p-value:                6.774e-136
+    ===============================================================================
+                    coef    std err          z      P>|z|      [95.0% Conf. Int.]
+    -------------------------------------------------------------------------------
+    Intercept   -1267.3636    457.867     -2.768      0.006     -2164.767  -369.960
+    C(lg)[T.NL]    -0.2057      0.101     -2.044      0.041        -0.403    -0.008
+    ln_h            0.9280      0.191      4.866      0.000         0.554     1.302
+    year            0.6301      0.228      2.762      0.006         0.183     1.077
+    g               0.0099      0.004      2.754      0.006         0.003     0.017
+    ===============================================================================
+    """
 
 The pipe method is inspired by unix pipes, which stream text through
 processes. More recently dplyr_ and magrittr_ have introduced the

From a16e793bc9a789b9737f67687c991705d61d3fdd Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 11 Dec 2022 11:22:23 +0000
Subject: [PATCH 5/7] use code-block in user guide

---
 doc/source/user_guide/basics.rst | 60 +++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
index 2204c8b04e438..9755e008a2b20 100644
--- a/doc/source/user_guide/basics.rst
+++ b/doc/source/user_guide/basics.rst
@@ -827,20 +827,54 @@ In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``.
 
 For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.ols, 'data')`` to ``pipe``:
 
-.. ipython:: python
-   :okwarning:
-
-   import statsmodels.formula.api as sm
-
-   bb = pd.read_csv("data/baseball.csv", index_col="id")
+.. code-block:: ipython
 
-   (
-       bb.query("h > 0")
-       .assign(ln_h=lambda df: np.log(df.h))
-       .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
-       .fit()
-       .summary()
-   )
+   In [147]: import statsmodels.formula.api as sm
+
+   In [148]: bb = pd.read_csv("data/baseball.csv", index_col="id")
+
+   In [149]: (
+      .....:     bb.query("h > 0")
+      .....:     .assign(ln_h=lambda df: np.log(df.h))
+      .....:     .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
+      .....:     .fit()
+      .....:     .summary()
+      .....: )
+      .....:
+   Out[149]:
+   <class 'statsmodels.iolib.summary.Summary'>
+   """
+                              OLS Regression Results
+   ==============================================================================
+   Dep. Variable:                     hr   R-squared:                       0.685
+   Model:                            OLS   Adj. R-squared:                  0.665
+   Method:                 Least Squares   F-statistic:                     34.28
+   Date:                Tue, 22 Nov 2022   Prob (F-statistic):           3.48e-15
+   Time:                        05:34:17   Log-Likelihood:                -205.92
+   No. Observations:                  68   AIC:                             421.8
+   Df Residuals:                      63   BIC:                             432.9
+   Df Model:                           4
+   Covariance Type:            nonrobust
+   ===============================================================================
+                     coef    std err          t      P>|t|      [0.025      0.975]
+   -------------------------------------------------------------------------------
+   Intercept   -8484.7720   4664.146     -1.819      0.074   -1.78e+04     835.780
+   C(lg)[T.NL]    -2.2736      1.325     -1.716      0.091      -4.922       0.375
+   ln_h           -1.3542      0.875     -1.547      0.127      -3.103       0.395
+   year            4.2277      2.324      1.819      0.074      -0.417       8.872
+   g               0.1841      0.029      6.258      0.000       0.125       0.243
+   ==============================================================================
+   Omnibus:                       10.875   Durbin-Watson:                   1.999
+   Prob(Omnibus):                  0.004   Jarque-Bera (JB):               17.298
+   Skew:                           0.537   Prob(JB):                     0.000175
+   Kurtosis:                       5.225   Cond. No.                     1.49e+07
+   ==============================================================================
+
+   Notes:
+   [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
+   [2] The condition number is large, 1.49e+07. This might indicate that there are
+   strong multicollinearity or other numerical problems.
+   """
 
 The pipe method is inspired by unix pipes and more recently dplyr_ and magrittr_, which
 have introduced the popular ``(%>%)`` (read pipe) operator for R_.

From 4194f5bfbbd53a4bb067c4a4d7a90f908b7dc435 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 11 Dec 2022 13:06:53 +0000
Subject: [PATCH 6/7] fixup 0.16.2 whatsnew

---
 doc/source/whatsnew/v0.16.2.rst | 62 +++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.2.rst b/doc/source/whatsnew/v0.16.2.rst
index ba20c3ba9ac43..ef73c4b092fc1 100644
--- a/doc/source/whatsnew/v0.16.2.rst
+++ b/doc/source/whatsnew/v0.16.2.rst
@@ -65,40 +65,50 @@ of ``(function, keyword)`` indicating where the DataFrame should flow. For examp
 
     In [1]: import statsmodels.formula.api as sm
 
-    In [2]: bb = pd.read_csv('data/baseball.csv', index_col='id')
-
-    # sm.poisson takes (formula, data)
-    In [3]: (bb.query('h > 0')
-    ...:    .assign(ln_h = lambda df: np.log(df.h))
-    ...:    .pipe((sm.poisson, 'data'), 'hr ~ ln_h + year + g + C(lg)')
-    ...:    .fit()
-    ...:    .summary()
+    In [2]: bb = pd.read_csv("data/baseball.csv", index_col="id")
+
+    # sm.ols takes (formula, data)
+    In [3]: (
+    ...:     bb.query("h > 0")
+    ...:     .assign(ln_h=lambda df: np.log(df.h))
+    ...:     .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
+    ...:     .fit()
+    ...:     .summary()
     ...: )
     ...:
-    Optimization terminated successfully.
-            Current function value: 2.116284
-            Iterations 24
     Out[3]:
     <class 'statsmodels.iolib.summary.Summary'>
     """
-                            Poisson Regression Results
+                                OLS Regression Results
     ==============================================================================
-    Dep. Variable:                     hr   No. Observations:                   68
-    Model:                        Poisson   Df Residuals:                       63
-    Method:                           MLE   Df Model:                            4
-    Date:                Sat, 13 Jun 2015   Pseudo R-squ.:                  0.6878
-    Time:                        15:07:13   Log-Likelihood:                -143.91
-    converged:                       True   LL-Null:                       -460.91
-                                            LLR p-value:                6.774e-136
+    Dep. Variable:                     hr   R-squared:                       0.685
+    Model:                            OLS   Adj. R-squared:                  0.665
+    Method:                 Least Squares   F-statistic:                     34.28
+    Date:                Tue, 22 Nov 2022   Prob (F-statistic):           3.48e-15
+    Time:                        05:35:23   Log-Likelihood:                -205.92
+    No. Observations:                  68   AIC:                             421.8
+    Df Residuals:                      63   BIC:                             432.9
+    Df Model:                           4
+    Covariance Type:            nonrobust
     ===============================================================================
-                    coef    std err          z      P>|z|      [95.0% Conf. Int.]
+                    coef    std err          t      P>|t|      [0.025      0.975]
     -------------------------------------------------------------------------------
-    Intercept   -1267.3636    457.867     -2.768      0.006     -2164.767  -369.960
-    C(lg)[T.NL]    -0.2057      0.101     -2.044      0.041        -0.403    -0.008
-    ln_h            0.9280      0.191      4.866      0.000         0.554     1.302
-    year            0.6301      0.228      2.762      0.006         0.183     1.077
-    g               0.0099      0.004      2.754      0.006         0.003     0.017
-    ===============================================================================
+    Intercept   -8484.7720   4664.146     -1.819      0.074   -1.78e+04     835.780
+    C(lg)[T.NL]    -2.2736      1.325     -1.716      0.091      -4.922       0.375
+    ln_h           -1.3542      0.875     -1.547      0.127      -3.103       0.395
+    year            4.2277      2.324      1.819      0.074      -0.417       8.872
+    g               0.1841      0.029      6.258      0.000       0.125       0.243
+    ==============================================================================
+    Omnibus:                       10.875   Durbin-Watson:                   1.999
+    Prob(Omnibus):                  0.004   Jarque-Bera (JB):               17.298
+    Skew:                           0.537   Prob(JB):                     0.000175
+    Kurtosis:                       5.225   Cond. No.                     1.49e+07
+    ==============================================================================
+
+    Notes:
+    [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
+    [2] The condition number is large, 1.49e+07. This might indicate that there are
+    strong multicollinearity or other numerical problems.
     """
 
 The pipe method is inspired by unix pipes, which stream text through

From 8a71912730dff05b2d684c530908209db39d8dac Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 11 Dec 2022 20:59:55 +0100
Subject: [PATCH 7/7] also remove pandas-gbq

---
 ci/deps/actions-38-downstream_compat.yaml | 2 +-
 environment.yml                           | 1 -
 requirements-dev.txt                      | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml
index 15ce02204ee99..fcd453b9d9fee 100644
--- a/ci/deps/actions-38-downstream_compat.yaml
+++ b/ci/deps/actions-38-downstream_compat.yaml
@@ -39,7 +39,6 @@ dependencies:
   - numexpr
   - openpyxl
   - odfpy
-  - pandas-gbq
   - psycopg2
   - pyarrow<10
   - pymysql
@@ -68,5 +67,6 @@ dependencies:
   - statsmodels
   - coverage
   - pandas-datareader
+  - pandas-gbq
   - pyyaml
   - py
diff --git a/environment.yml b/environment.yml
index c3a07080db55a..5a26f8fd1520c 100644
--- a/environment.yml
+++ b/environment.yml
@@ -43,7 +43,6 @@ dependencies:
   - numexpr>=2.8.0  # pin for "Run checks on imported code" job
   - openpyxl
   - odfpy
-  - pandas-gbq
   - py
   - psycopg2
   - pyarrow<10
diff --git a/requirements-dev.txt b/requirements-dev.txt
index ff83e2b985874..f6378ddd2e18d 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -32,7 +32,6 @@ numba>=0.53.1
 numexpr>=2.8.0
 openpyxl
 odfpy
-pandas-gbq
 py
 psycopg2-binary
 pyarrow<10