add slt

berkaysynnada · berkaysynnada · commit 187533641e62 · 2025-01-29T17:29:33.000+03:00
diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs
@@ -42,7 +42,7 @@ use crate::expressions::Column;
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow_schema::SortOptions;
-use datafusion_common::{internal_err, not_impl_err, ScalarValue, Result};
+use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue};
 use datafusion_expr::{AggregateExprSetMonotonicity, AggregateUDF, ReversedUDAF};
 use datafusion_expr_common::accumulator::Accumulator;
 use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
@@ -32,7 +32,7 @@ use crate::{reverse_order_bys, EquivalenceProperties, PhysicalExpr};
 use arrow::array::Array;
 use arrow::record_batch::RecordBatch;
 use arrow::{array::ArrayRef, datatypes::Field};
-use datafusion_common::{DataFusionError, ScalarValue, Result};
+use datafusion_common::{DataFusionError, Result, ScalarValue};
 use datafusion_expr::{Accumulator, WindowFrame};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -512,7 +512,7 @@ impl AggregateExec {
             &group_expr_mapping,
             &mode,
             &input_order_mode,
-            aggr_expr.clone(),
+            aggr_expr.as_slice(),
         );
 
         Ok(AggregateExec {
@@ -649,7 +649,7 @@ impl AggregateExec {
         group_expr_mapping: &ProjectionMapping,
         mode: &AggregateMode,
         input_order_mode: &InputOrderMode,
-        aggr_exprs: Vec<Arc<AggregateFunctionExpr>>,
+        aggr_exprs: &[Arc<AggregateFunctionExpr>],
     ) -> PlanProperties {
         // Construct equivalence properties:
         let mut eq_properties = input
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -6203,3 +6203,97 @@ physical_plan
 14)--------------PlaceholderRowExec
 15)------------ProjectionExec: expr=[1 as id, 2 as foo]
 16)--------------PlaceholderRowExec
+
+
+# Set-Monotonic Aggregate functions can output results in order
+statement ok
+CREATE EXTERNAL TABLE aggregate_test_100_ordered (
+  c1  VARCHAR NOT NULL,
+  c2  TINYINT NOT NULL,
+  c3  SMALLINT NOT NULL,
+  c4  SMALLINT,
+  c5  INT,
+  c6  BIGINT NOT NULL,
+  c7  SMALLINT NOT NULL,
+  c8  INT NOT NULL,
+  c9  INT UNSIGNED NOT NULL,
+  c10 BIGINT UNSIGNED NOT NULL,
+  c11 FLOAT NOT NULL,
+  c12 DOUBLE NOT NULL,
+  c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+WITH ORDER (c1)
+OPTIONS ('format.has_header' 'true');
+
+statement ok
+set datafusion.optimizer.prefer_existing_sort = true;
+
+query TT
+EXPLAIN SELECT c1, SUM(c9) as sum_c9 FROM aggregate_test_100_ordered GROUP BY c1 ORDER BY c1, sum_c9;
+----
+logical_plan
+01)Sort: aggregate_test_100_ordered.c1 ASC NULLS LAST, sum_c9 ASC NULLS LAST
+02)--Projection: aggregate_test_100_ordered.c1, sum(aggregate_test_100_ordered.c9) AS sum_c9
+03)----Aggregate: groupBy=[[aggregate_test_100_ordered.c1]], aggr=[[sum(CAST(aggregate_test_100_ordered.c9 AS UInt64))]]
+04)------TableScan: aggregate_test_100_ordered projection=[c1, c9]
+physical_plan
+01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, sum_c9@1 ASC NULLS LAST]
+02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100_ordered.c9)@1 as sum_c9]
+03)----AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(aggregate_test_100_ordered.c9)], ordering_mode=Sorted
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST, sum(aggregate_test_100_ordered.c9)@1 ASC NULLS LAST
+06)----------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[sum(aggregate_test_100_ordered.c9)], ordering_mode=Sorted
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
+
+query TT
+EXPLAIN SELECT SUM(c9) as sum_c9 FROM aggregate_test_100_ordered ORDER BY sum_c9;
+----
+logical_plan
+01)Sort: sum_c9 ASC NULLS LAST
+02)--Projection: sum(aggregate_test_100_ordered.c9) AS sum_c9
+03)----Aggregate: groupBy=[[]], aggr=[[sum(CAST(aggregate_test_100_ordered.c9 AS UInt64))]]
+04)------TableScan: aggregate_test_100_ordered projection=[c9]
+physical_plan
+01)ProjectionExec: expr=[sum(aggregate_test_100_ordered.c9)@0 as sum_c9]
+02)--AggregateExec: mode=Final, gby=[], aggr=[sum(aggregate_test_100_ordered.c9)]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(aggregate_test_100_ordered.c9)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
+
+query TT
+EXPLAIN SELECT c1, MIN(c5) as min_c5 FROM aggregate_test_100_ordered GROUP BY c1 ORDER BY c1, min_c5 DESC NULLS LAST;
+----
+logical_plan
+01)Sort: aggregate_test_100_ordered.c1 ASC NULLS LAST, min_c5 DESC NULLS LAST
+02)--Projection: aggregate_test_100_ordered.c1, min(aggregate_test_100_ordered.c5) AS min_c5
+03)----Aggregate: groupBy=[[aggregate_test_100_ordered.c1]], aggr=[[min(aggregate_test_100_ordered.c5)]]
+04)------TableScan: aggregate_test_100_ordered projection=[c1, c5]
+physical_plan
+01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, min_c5@1 DESC NULLS LAST]
+02)--ProjectionExec: expr=[c1@0 as c1, min(aggregate_test_100_ordered.c5)@1 as min_c5]
+03)----AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[min(aggregate_test_100_ordered.c5)], ordering_mode=Sorted
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST, min(aggregate_test_100_ordered.c5)@1 DESC NULLS LAST
+06)----------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[min(aggregate_test_100_ordered.c5)], ordering_mode=Sorted
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c5], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
+
+query TT
+EXPLAIN SELECT MAX(c5) as max_c5 FROM aggregate_test_100_ordered ORDER BY max_c5;
+----
+logical_plan
+01)Sort: max_c5 ASC NULLS LAST
+02)--Projection: max(aggregate_test_100_ordered.c5) AS max_c5
+03)----Aggregate: groupBy=[[]], aggr=[[max(aggregate_test_100_ordered.c5)]]
+04)------TableScan: aggregate_test_100_ordered projection=[c5]
+physical_plan
+01)ProjectionExec: expr=[max(aggregate_test_100_ordered.c5)@0 as max_c5]
+02)--AggregateExec: mode=Final, gby=[], aggr=[max(aggregate_test_100_ordered.c5)]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[max(aggregate_test_100_ordered.c5)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5], has_header=true
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
@@ -5452,3 +5452,89 @@ order by c1, c2, rank1, rank2;
 
 statement ok
 drop table t1;
+
+
+# Set-Monotonic Window Aggregate functions can output results in order
+statement ok
+CREATE EXTERNAL TABLE aggregate_test_100_ordered (
+  c1  VARCHAR NOT NULL,
+  c2  TINYINT NOT NULL,
+  c3  SMALLINT NOT NULL,
+  c4  SMALLINT,
+  c5  INT,
+  c6  BIGINT NOT NULL,
+  c7  SMALLINT NOT NULL,
+  c8  INT NOT NULL,
+  c9  INT UNSIGNED NOT NULL,
+  c10 BIGINT UNSIGNED NOT NULL,
+  c11 FLOAT NOT NULL,
+  c12 DOUBLE NOT NULL,
+  c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+WITH ORDER (c1)
+OPTIONS ('format.has_header' 'true');
+
+statement ok
+set datafusion.optimizer.prefer_existing_sort = true;
+
+query TT
+EXPLAIN SELECT c1, SUM(c9) OVER(PARTITION BY c1) as sum_c9 FROM aggregate_test_100_ordered ORDER BY c1, sum_c9;
+----
+logical_plan
+01)Sort: aggregate_test_100_ordered.c1 ASC NULLS LAST, sum_c9 ASC NULLS LAST
+02)--Projection: aggregate_test_100_ordered.c1, sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS sum_c9
+03)----WindowAggr: windowExpr=[[sum(CAST(aggregate_test_100_ordered.c9 AS UInt64)) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+04)------TableScan: aggregate_test_100_ordered projection=[c1, c9]
+physical_plan
+01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, sum_c9@1 ASC NULLS LAST]
+02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as sum_c9]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------CoalesceBatchesExec: target_batch_size=1
+05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
+06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
+
+query TT
+EXPLAIN SELECT SUM(c9) OVER() as sum_c9 FROM aggregate_test_100_ordered ORDER BY sum_c9;
+----
+logical_plan
+01)Sort: sum_c9 ASC NULLS LAST
+02)--Projection: sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS sum_c9
+03)----WindowAggr: windowExpr=[[sum(CAST(aggregate_test_100_ordered.c9 AS UInt64)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+04)------TableScan: aggregate_test_100_ordered projection=[c9]
+physical_plan
+01)ProjectionExec: expr=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as sum_c9]
+02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
+
+query TT
+EXPLAIN SELECT c1, MIN(c5) OVER(PARTITION BY c1) as min_c5 FROM aggregate_test_100_ordered ORDER BY c1, min_c5 DESC NULLS LAST;
+----
+logical_plan
+01)Sort: aggregate_test_100_ordered.c1 ASC NULLS LAST, min_c5 DESC NULLS LAST
+02)--Projection: aggregate_test_100_ordered.c1, min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS min_c5
+03)----WindowAggr: windowExpr=[[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+04)------TableScan: aggregate_test_100_ordered projection=[c1, c5]
+physical_plan
+01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, min_c5@1 DESC NULLS LAST]
+02)--ProjectionExec: expr=[c1@0 as c1, min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as min_c5]
+03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------CoalesceBatchesExec: target_batch_size=1
+05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
+06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c5], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
+
+query TT
+EXPLAIN SELECT MAX(c5) OVER() as max_c5 FROM aggregate_test_100_ordered ORDER BY max_c5;
+----
+logical_plan
+01)Sort: max_c5 ASC NULLS LAST
+02)--Projection: max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_c5
+03)----WindowAggr: windowExpr=[[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+04)------TableScan: aggregate_test_100_ordered projection=[c5]
+physical_plan
+01)ProjectionExec: expr=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as max_c5]
+02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5], has_header=true