From 48c6ce6a29eb2265c481cb416e6a33e4998e0970 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Nov 2021 12:04:41 +0100 Subject: [PATCH 1/2] ASV: reduce overall run time for GroupByMethods benchmarks --- asv_bench/benchmarks/groupby.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 86322661a4e8a..62bdadc17e72e 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -443,7 +443,7 @@ class GroupByMethods: "var", ], ["direct", "transformation"], - [1, 2, 5, 10], + [1, 5], ] def setup(self, dtype, method, application, ncols): @@ -455,6 +455,7 @@ def setup(self, dtype, method, application, ncols): raise NotImplementedError if application == "transformation" and method in [ + "describe", "head", "tail", "unique", @@ -464,7 +465,12 @@ def setup(self, dtype, method, application, ncols): # DataFrameGroupBy doesn't have these methods raise NotImplementedError - ngroups = 1000 + if method == "describe" and ncols == 5: + ngroups = 20 + elif method in ["describe", "mad", "skew"]: + ngroups = 100 + else: + ngroups = 1000 size = ngroups * 2 rng = np.arange(ngroups).reshape(-1, 1) rng = np.broadcast_to(rng, (len(rng), ncols)) @@ -491,9 +497,6 @@ def setup(self, dtype, method, application, ncols): cols = cols[0] if application == "transformation": - if method == "describe": - raise NotImplementedError - self.as_group_method = lambda: df.groupby("key")[cols].transform(method) self.as_field_method = lambda: df.groupby(cols)["key"].transform(method) else: From 41c810d01fd6185884fe3184475f9a868d2df3c5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 25 Nov 2021 20:22:11 +0100 Subject: [PATCH 2/2] ngroups=20 for all describe --- asv_bench/benchmarks/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 62bdadc17e72e..ff58e382a9ba2 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -465,9 +465,9 @@ def setup(self, dtype, method, application, ncols): # DataFrameGroupBy doesn't have these methods raise NotImplementedError - if method == "describe" and ncols == 5: + if method == "describe": ngroups = 20 - elif method in ["describe", "mad", "skew"]: + elif method in ["mad", "skew"]: ngroups = 100 else: ngroups = 1000