diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c1d9ac838..713420042 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -89,9 +89,9 @@ jobs: - name: FFI unit tests run: | - cd examples/ffi-table-provider + cd examples/ffi-library uv run --no-project maturin develop --uv - uv run --no-project pytest python/tests/_test_table_provider.py + uv run --no-project pytest python/tests/_*.py - name: Cache the generated dataset id: cache-tpch-dataset diff --git a/Cargo.lock b/Cargo.lock index f1b1ed50a..3708db4f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" +checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" +checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" +checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" dependencies = [ "ahash", "arrow-buffer", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" +checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" +checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" +checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" +checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" dependencies = [ "arrow-buffer", "arrow-schema", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" +checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" +checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305" dependencies = [ "arrow-array", "arrow-buffer", @@ -326,9 +326,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" +checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -339,9 +339,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" +checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" dependencies = [ "arrow-array", "arrow-buffer", @@ -352,18 +352,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" +checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" dependencies = [ "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" +checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" dependencies = [ "ahash", "arrow-array", @@ -375,9 +375,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" +checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" dependencies = [ "arrow-array", "arrow-buffer", @@ -535,15 +535,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.5" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" +checksum = "1230237285e3e10cde447185e8975408ae24deaa67205ce684805c25bc0c7937" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "memmap2", ] [[package]] @@ -606,19 +607,18 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c" dependencies = [ "bzip2-sys", - "libc", ] [[package]] name = "bzip2-sys" -version = "0.1.11+1.0.8" +version = "0.1.12+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" dependencies = [ "cc", "libc", @@ -867,29 +867,30 @@ dependencies = [ [[package]] name = "datafusion" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "apache-avro", "arrow", - "arrow-array", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", - "bzip2 0.5.0", + "bzip2 0.5.1", "chrono", "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", + "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -898,7 +899,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "glob", "itertools 0.14.0", "log", "num-traits", @@ -910,7 +910,6 @@ dependencies = [ "sqlparser", "tempfile", "tokio", - "tokio-util", "url", "uuid", "xz2", @@ -920,8 +919,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "async-trait", @@ -935,22 +933,38 @@ dependencies = [ "itertools 0.14.0", "log", "parking_lot", - "sqlparser", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "log", + "object_store", + "tokio", ] [[package]] name = "datafusion-common" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "apache-avro", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ipc", - "arrow-schema", "base64 0.22.1", "half", "hashbrown 0.14.5", @@ -969,24 +983,52 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-datasource" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.5.1", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + [[package]] name = "datafusion-doc" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" [[package]] name = "datafusion-execution" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "dashmap", @@ -1004,8 +1046,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "chrono", @@ -1025,11 +1066,11 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "datafusion-common", + "indexmap", "itertools 0.14.0", "paste", ] @@ -1037,13 +1078,10 @@ dependencies = [ [[package]] name = "datafusion-ffi" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "abi_stable", "arrow", - "arrow-array", - "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -1058,8 +1096,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "arrow-buffer", @@ -1073,7 +1110,6 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", - "hashbrown 0.14.5", "hex", "itertools 0.14.0", "log", @@ -1088,13 +1124,10 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1111,8 +1144,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", @@ -1124,14 +1156,10 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1148,8 +1176,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "async-trait", @@ -1164,8 +1191,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1181,8 +1207,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1191,8 +1216,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "datafusion-expr", "quote", @@ -1202,8 +1226,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "chrono", @@ -1221,14 +1244,10 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1240,18 +1259,16 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "petgraph 0.7.1", + "petgraph", ] [[package]] name = "datafusion-physical-expr-common" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -1261,11 +1278,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1273,23 +1288,18 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "futures", "itertools 0.14.0", "log", "recursive", - "url", ] [[package]] name = "datafusion-physical-plan" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "async-trait", @@ -1315,8 +1325,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "chrono", @@ -1331,8 +1340,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "datafusion-common", @@ -1365,12 +1373,9 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", "bigdecimal", "datafusion-common", "datafusion-expr", @@ -1384,10 +1389,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "45.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1634405abd8bd3c64c352f2da2f2aec6d80a815930257e0db0ce4ff5daf00944" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ - "arrow-buffer", "async-recursion", "async-trait", "chrono", @@ -1456,12 +1459,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fixedbitset" version = "0.5.7" @@ -2243,6 +2240,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.9.1" @@ -2455,9 +2461,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" +checksum = "761c44d824fe83106e0600d2510c07bf4159a4985bf0569b513ea4288dc1b4fb" dependencies = [ "ahash", "arrow-array", @@ -2548,23 +2554,13 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset 0.4.2", - "indexmap", -] - [[package]] name = "petgraph" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset 0.5.7", + "fixedbitset", "indexmap", ] @@ -2660,9 +2656,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", "prost-derive", @@ -2670,16 +2666,16 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "once_cell", - "petgraph 0.6.5", + "petgraph", "prettyplease", "prost", "prost-types", @@ -2690,12 +2686,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.98", @@ -2703,9 +2699,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ "prost", ] @@ -3426,11 +3422,12 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.53.0" +version = "0.54.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -3491,9 +3488,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.52.3" +version = "0.53.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db15789cecbfdf6b1fcf2db807e767c92273bdc407ac057c2194b070c597756" +checksum = "6fac3d70185423235f37b889764e184b81a5af4bb7c95833396ee9bd92577e1b" dependencies = [ "heck", "pbjson", @@ -3952,7 +3949,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" dependencies = [ "getrandom 0.3.1", + "js-sys", "serde", + "wasm-bindgen", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d18e0e8f0..a9e167a61 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} -arrow = { version = "54", features = ["pyarrow"] } +arrow = { version = "54.2.0", features = ["pyarrow"] } datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] } datafusion-substrait = { version = "45.0.0", optional = true } datafusion-proto = { version = "45.0.0" } @@ -61,3 +61,10 @@ crate-type = ["cdylib", "rlib"] [profile.release] lto = true codegen-units = 1 + +# TODO remove once we update datafusion versions to 46 +[patch.crates-io] +datafusion = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7", optional = true } +datafusion-proto = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7" } +datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7" } diff --git a/examples/ffi-table-provider/.cargo/config.toml b/examples/ffi-library/.cargo/config.toml similarity index 100% rename from examples/ffi-table-provider/.cargo/config.toml rename to examples/ffi-library/.cargo/config.toml diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-library/Cargo.lock similarity index 90% rename from examples/ffi-table-provider/Cargo.lock rename to examples/ffi-library/Cargo.lock index 32af85180..cf9dbf411 100644 --- a/examples/ffi-table-provider/Cargo.lock +++ b/examples/ffi-library/Cargo.lock @@ -73,7 +73,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", - "getrandom", + "getrandom 0.2.15", "once_cell", "version_check", "zerocopy", @@ -144,9 +144,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" +checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" dependencies = [ "arrow-arith", "arrow-array", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" +checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -179,9 +179,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" +checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" dependencies = [ "ahash", "arrow-buffer", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" +checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" dependencies = [ "bytes", "half", @@ -207,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" +checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -228,9 +228,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" +checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c" dependencies = [ "arrow-array", "arrow-cast", @@ -244,9 +244,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" +checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" dependencies = [ "arrow-buffer", "arrow-schema", @@ -256,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" +checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" +checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305" dependencies = [ "arrow-array", "arrow-buffer", @@ -290,9 +290,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" +checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -303,9 +303,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" +checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" dependencies = [ "arrow-array", "arrow-buffer", @@ -316,18 +316,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" +checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" +checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" dependencies = [ "ahash", "arrow-array", @@ -339,9 +339,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" +checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" dependencies = [ "arrow-array", "arrow-buffer", @@ -475,15 +475,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "1230237285e3e10cde447185e8975408ae24deaa67205ce684805c25bc0c7937" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "memmap2", ] [[package]] @@ -530,9 +531,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "bzip2" @@ -546,19 +547,18 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c" dependencies = [ "bzip2-sys", - "libc", ] [[package]] name = "bzip2-sys" -version = "0.1.11+1.0.8" +version = "0.1.12+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" dependencies = [ "cc", "libc", @@ -641,7 +641,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom", + "getrandom 0.2.15", "once_cell", "tiny-keccak", ] @@ -766,27 +766,29 @@ dependencies = [ [[package]] name = "datafusion" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", - "arrow-array", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", - "bzip2 0.5.0", + "bzip2 0.5.1", "chrono", "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", + "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -795,7 +797,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "glob", "itertools 0.14.0", "log", "object_store", @@ -806,7 +807,6 @@ dependencies = [ "sqlparser", "tempfile", "tokio", - "tokio-util", "url", "uuid", "xz2", @@ -816,7 +816,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "async-trait", @@ -830,20 +830,37 @@ dependencies = [ "itertools 0.14.0", "log", "parking_lot", - "sqlparser", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "log", + "object_store", + "tokio", ] [[package]] name = "datafusion-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ipc", - "arrow-schema", "base64", "half", "hashbrown 0.14.5", @@ -862,21 +879,52 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.5.1", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "flate2", + "futures", + "glob", + "itertools 0.14.0", "log", + "object_store", + "rand", "tokio", + "tokio-util", + "url", + "xz2", + "zstd", ] [[package]] name = "datafusion-doc" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" [[package]] name = "datafusion-execution" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "dashmap", @@ -894,7 +942,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "chrono", @@ -914,10 +962,11 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "datafusion-common", + "indexmap", "itertools 0.14.0", "paste", ] @@ -925,12 +974,10 @@ dependencies = [ [[package]] name = "datafusion-ffi" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "abi_stable", "arrow", - "arrow-array", - "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -942,10 +989,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-ffi-library" +version = "0.1.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion", + "datafusion-ffi", + "pyo3", + "pyo3-build-config", +] + [[package]] name = "datafusion-functions" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "arrow-buffer", @@ -959,7 +1019,6 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", - "hashbrown 0.14.5", "hex", "itertools 0.14.0", "log", @@ -974,12 +1033,10 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -996,7 +1053,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", @@ -1008,13 +1065,10 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1031,7 +1085,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "async-trait", @@ -1046,7 +1100,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1062,7 +1116,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1071,7 +1125,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "datafusion-expr", "quote", @@ -1081,7 +1135,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "chrono", @@ -1099,13 +1153,10 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1123,11 +1174,10 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -1137,10 +1187,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1148,22 +1197,18 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "futures", "itertools 0.14.0", "log", "recursive", - "url", ] [[package]] name = "datafusion-physical-plan" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "async-trait", @@ -1189,7 +1234,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "chrono", @@ -1204,7 +1249,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", "datafusion-common", @@ -1214,11 +1259,9 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", "bigdecimal", "datafusion-common", "datafusion-expr", @@ -1279,19 +1322,6 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" -[[package]] -name = "ffi-table-provider" -version = "0.1.0" -dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", - "datafusion", - "datafusion-ffi", - "pyo3", - "pyo3-build-config", -] - [[package]] name = "fixedbitset" version = "0.5.7" @@ -1443,7 +1473,19 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets", ] [[package]] @@ -1885,6 +1927,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.9.1" @@ -2047,9 +2098,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" +checksum = "761c44d824fe83106e0600d2510c07bf4159a4985bf0569b513ea4288dc1b4fb" dependencies = [ "ahash", "arrow-array", @@ -2324,7 +2375,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -2566,11 +2617,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.53.0" +version = "0.54.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -2719,9 +2771,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.41.1" +version = "1.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" dependencies = [ "backtrace", "bytes", @@ -2731,9 +2783,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", @@ -2870,11 +2922,13 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.11.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1" dependencies = [ - "getrandom", + "getrandom 0.3.1", + "js-sys", + "wasm-bindgen", ] [[package]] @@ -2899,6 +2953,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.95" @@ -3086,6 +3149,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.6.0", +] + [[package]] name = "write16" version = "1.0.0" diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-library/Cargo.toml similarity index 67% rename from examples/ffi-table-provider/Cargo.toml rename to examples/ffi-library/Cargo.toml index 0e558fdd0..edeb54f7b 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/ffi-library/Cargo.toml @@ -16,7 +16,7 @@ # under the License. [package] -name = "ffi-table-provider" +name = "datafusion-ffi-library" version = "0.1.0" edition = "2021" @@ -24,13 +24,18 @@ edition = "2021" datafusion = { version = "45.0.0" } datafusion-ffi = { version = "45.0.0" } pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } -arrow = { version = "54" } -arrow-array = { version = "54" } -arrow-schema = { version = "54" } +arrow = { version = "54.2.0" } +arrow-array = { version = "54.2.0" } +arrow-schema = { version = "54.2.0" } [build-dependencies] pyo3-build-config = "0.23" [lib] -name = "ffi_table_provider" +name = "datafusion_ffi_library" crate-type = ["cdylib", "rlib"] + +# TODO remove once we update datafusion versions to 46 +[patch.crates-io] +datafusion = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7", features = ["avro", "unicode_expressions"] } +datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7" } diff --git a/examples/ffi-table-provider/build.rs b/examples/ffi-library/build.rs similarity index 100% rename from examples/ffi-table-provider/build.rs rename to examples/ffi-library/build.rs diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-library/pyproject.toml similarity index 97% rename from examples/ffi-table-provider/pyproject.toml rename to examples/ffi-library/pyproject.toml index 116efae9c..ca31d709f 100644 --- a/examples/ffi-table-provider/pyproject.toml +++ b/examples/ffi-library/pyproject.toml @@ -20,7 +20,7 @@ requires = ["maturin>=1.6,<2.0"] build-backend = "maturin" [project] -name = "ffi_table_provider" +name = "datafusion_ffi_library" requires-python = ">=3.8" classifiers = [ "Programming Language :: Rust", diff --git a/examples/ffi-library/python/tests/_test_scalar_udf.py b/examples/ffi-library/python/tests/_test_scalar_udf.py new file mode 100644 index 000000000..9ebcc076b --- /dev/null +++ b/examples/ffi-library/python/tests/_test_scalar_udf.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +from datafusion import SessionContext, col, ffi_udf +from datafusion_ffi_library import IsEvenFunction + + +def test_table_loading(): + ctx = SessionContext() + df = ctx.from_pydict({"a": [-3, -2, None, 0, 1, 2]}) + + is_even = ffi_udf(IsEvenFunction()) + + result = df.select(is_even(col("a"))).collect() + df.with_column("is_even", is_even(col("a"))).show() + print(result) + + assert len(result) == 1 + assert result[0].num_columns == 1 + + result = [r.column(0) for r in result] + expected = [ + pa.array([False, True, None, None, False, True], type=pa.bool_()), + ] + + assert result == expected diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/ffi-library/python/tests/_test_table_provider.py similarity index 96% rename from examples/ffi-table-provider/python/tests/_test_table_provider.py rename to examples/ffi-library/python/tests/_test_table_provider.py index 0db3ec561..6687a0c01 100644 --- a/examples/ffi-table-provider/python/tests/_test_table_provider.py +++ b/examples/ffi-library/python/tests/_test_table_provider.py @@ -17,7 +17,7 @@ import pyarrow as pa from datafusion import SessionContext -from ffi_table_provider import MyTableProvider +from datafusion_ffi_library import MyTableProvider def test_table_loading(): diff --git a/examples/ffi-library/src/lib.rs b/examples/ffi-library/src/lib.rs new file mode 100644 index 000000000..904bdf29d --- /dev/null +++ b/examples/ffi-library/src/lib.rs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::prelude::*; +mod scalar_udf; +mod table_provider; + +#[pymodule] +fn datafusion_ffi_library(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + + Ok(()) +} diff --git a/examples/ffi-library/src/scalar_udf.rs b/examples/ffi-library/src/scalar_udf.rs new file mode 100644 index 000000000..6ac75df43 --- /dev/null +++ b/examples/ffi-library/src/scalar_udf.rs @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ffi::CString, sync::Arc}; + +use arrow::array::BooleanArray; +use arrow_array::ArrayRef; +use datafusion::common::cast::as_int64_array; +use datafusion::logical_expr::create_udf; +use datafusion::logical_expr::Volatility; +use datafusion::physical_plan::ColumnarValue; +use datafusion::{arrow::datatypes::DataType, error::Result}; +use datafusion_ffi::udf::FFI_ScalarUDF; +use pyo3::{prelude::*, types::PyCapsule}; + +#[pyclass(name = "IsEvenFunction", module = "datafusion_ffi_library", subclass)] +#[derive(Clone)] +pub struct IsEvenFunction {} + +fn is_even(args: &[ColumnarValue]) -> Result { + assert_eq!(args.len(), 1); + let args = ColumnarValue::values_to_arrays(args)?; + + let values = as_int64_array(&args[0]).expect("cast failed"); + + let array = values + .iter() + .map(|value| value.and_then(|v| if v == 0 { None } else { Some(v % 2 == 0) })) + .collect::(); + + Ok(ColumnarValue::from(Arc::new(array) as ArrayRef)) +} + +#[pymethods] +impl IsEvenFunction { + #[new] + fn new() -> Self { + Self {} + } + + fn __datafusion_scalar_udf__<'py>(&self, py: Python<'py>) -> PyResult> { + let name = CString::new("datafusion_scalar_udf").unwrap(); + + let func = create_udf( + "is_even", + vec![DataType::Int64], + DataType::Boolean, + Volatility::Immutable, + Arc::new(is_even), + ); + + let ffi_func: FFI_ScalarUDF = (Arc::new(func)).try_into()?; + + PyCapsule::new(py, ffi_func, Some(name)) + } +} diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/ffi-library/src/table_provider.rs similarity index 91% rename from examples/ffi-table-provider/src/lib.rs rename to examples/ffi-library/src/table_provider.rs index 88deeece2..2dfc645e9 100644 --- a/examples/ffi-table-provider/src/lib.rs +++ b/examples/ffi-library/src/table_provider.rs @@ -31,9 +31,9 @@ use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule}; /// In order to provide a test that demonstrates different sized record batches, /// the first batch will have num_rows, the second batch num_rows+1, and so on. -#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)] +#[pyclass(name = "MyTableProvider", module = "datafusion_ffi_library", subclass)] #[derive(Clone)] -struct MyTableProvider { +pub struct MyTableProvider { num_cols: usize, num_rows: usize, num_batches: usize, @@ -104,12 +104,6 @@ impl MyTableProvider { .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; let provider = FFI_TableProvider::new(Arc::new(provider), false, None); - PyCapsule::new_bound(py, provider, Some(name.clone())) + PyCapsule::new(py, provider, Some(name.clone())) } } - -#[pymodule] -fn ffi_table_provider(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - Ok(()) -} diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 85aefcce7..330ac7a6f 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -118,6 +118,7 @@ def lit(value): udf = ScalarUDF.udf +ffi_udf = ScalarUDF.ffi_udf udaf = AggregateUDF.udaf diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index c97f453d0..99d468883 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -21,7 +21,7 @@ from abc import ABCMeta, abstractmethod from enum import Enum -from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar +from typing import TYPE_CHECKING, Callable, List, Optional, Protocol, TypeVar import pyarrow @@ -76,6 +76,15 @@ def __str__(self): return self.name.lower() +class ScalarUDFExportable(Protocol): + """Type hint for object that has __datafusion_scalar_udf__ PyCapsule. + + https://datafusion.apache.org/python/user-guide/common-operations/udf-and-udfa.html + """ + + def __datafusion_scalar_udf__(self) -> object: ... # noqa: D105 + + class ScalarUDF: """Class for performing scalar user-defined functions (UDF). @@ -86,20 +95,23 @@ class ScalarUDF: def __init__( self, name: Optional[str], - func: Callable[..., _R], - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: _R, - volatility: Volatility | str, + func: Callable[..., _R] | df_internal.ScalarUDF, + input_types: pyarrow.DataType | list[pyarrow.DataType] | None, + return_type: Optional[_R], + volatility: Volatility | str | None, ) -> None: """Instantiate a scalar user-defined function (UDF). See helper method :py:func:`udf` for argument details. """ - if isinstance(input_types, pyarrow.DataType): - input_types = [input_types] - self._udf = df_internal.ScalarUDF( - name, func, input_types, return_type, str(volatility) - ) + if isinstance(func, df_internal.ScalarUDF): + self._udf = func + else: + if isinstance(input_types, pyarrow.DataType): + input_types = [input_types] + self._udf = df_internal.ScalarUDF( + name, func, input_types, return_type, str(volatility) + ) def __call__(self, *args: Expr) -> Expr: """Execute the UDF. @@ -110,6 +122,12 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udf.__call__(*args_raw)) + @staticmethod + def ffi_udf(func: ScalarUDFExportable) -> ScalarUDF: + """Create a User-Defined Function from a provided PyCapsule.""" + udf = df_internal.ScalarUDF.ffi_udf(func) + return ScalarUDF(None, udf, None, None, None) + @staticmethod def udf( func: Callable[..., _R], diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 5bc3fb094..6ae1115e5 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -752,16 +752,23 @@ def test_execution_plan(aggregate_df): assert "AggregateExec:" in indent assert "CoalesceBatchesExec:" in indent assert "RepartitionExec:" in indent - assert "CsvExec:" in indent + assert "DataSourceExec:" in indent ctx = SessionContext() - stream = ctx.execute(plan, 0) - # get the one and only batch - batch = stream.next() - assert batch is not None - # there should be no more batches - with pytest.raises(StopIteration): - stream.next() + rows_returned = 0 + for idx in range(0, plan.partition_count): + stream = ctx.execute(plan, idx) + try: + batch = stream.next() + assert batch is not None + rows_returned += len(batch.to_pyarrow()[0]) + except StopIteration: + # This is one of the partitions with no values + pass + with pytest.raises(StopIteration): + stream.next() + + assert rows_returned == 5 def test_repartition(df): diff --git a/src/expr.rs b/src/expr.rs index e750be6a4..6f1f5a26c 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use datafusion::logical_expr::expr::{AggregateFunctionParams, WindowFunctionParams}; use datafusion::logical_expr::utils::exprlist_to_fields; use datafusion::logical_expr::{ ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition, @@ -393,12 +394,15 @@ impl PyExpr { | Expr::TryCast(TryCast { expr, .. }) | Expr::InSubquery(InSubquery { expr, .. }) => Ok(vec![PyExpr::from(*expr.clone())]), - // Expr variants containing a collection of Expr(s) for operands - Expr::AggregateFunction(AggregateFunction { args, .. }) + Expr::AggregateFunction(AggregateFunction { + params: AggregateFunctionParams { args, .. }, + .. + }) | Expr::ScalarFunction(ScalarFunction { args, .. }) - | Expr::WindowFunction(WindowFunction { args, .. }) => { - Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) - } + | Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { args, .. }, + .. + }) => Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()), // Expr(s) that require more specific processing Expr::Case(Case { @@ -575,7 +579,7 @@ impl PyExpr { Expr::AggregateFunction(agg_fn) => { let window_fn = Expr::WindowFunction(WindowFunction::new( WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()), - agg_fn.args.clone(), + agg_fn.params.args.clone(), )); add_builder_fns_to_window( diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 8fc9da5b0..f4283eb4e 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::common::DataFusionError; -use datafusion::logical_expr::expr::{AggregateFunction, Alias}; +use datafusion::logical_expr::expr::{AggregateFunction, AggregateFunctionParams, Alias}; use datafusion::logical_expr::logical_plan::Aggregate; use datafusion::logical_expr::Expr; use pyo3::{prelude::*, IntoPyObjectExt}; @@ -126,9 +126,10 @@ impl PyAggregate { match expr { // TODO: This Alias logic seems to be returning some strange results that we should investigate Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()), - Expr::AggregateFunction(AggregateFunction { func: _, args, .. }) => { - Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()) - } + Expr::AggregateFunction(AggregateFunction { + params: AggregateFunctionParams { args, .. }, + .. + }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()), _ => Err(py_type_err( "Encountered a non Aggregate type in aggregation_arguments", )), diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs index 09471097f..c09f116e3 100644 --- a/src/expr/aggregate_expr.rs +++ b/src/expr/aggregate_expr.rs @@ -40,7 +40,13 @@ impl From for PyAggregateFunction { impl Display for PyAggregateFunction { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let args: Vec = self.aggr.args.iter().map(|expr| expr.to_string()).collect(); + let args: Vec = self + .aggr + .params + .args + .iter() + .map(|expr| expr.to_string()) + .collect(); write!(f, "{}({})", self.aggr.func.name(), args.join(", ")) } } @@ -54,12 +60,13 @@ impl PyAggregateFunction { /// is this a distinct aggregate such as `COUNT(DISTINCT expr)` fn is_distinct(&self) -> bool { - self.aggr.distinct + self.aggr.params.distinct } /// Get the arguments to the aggregate function fn args(&self) -> Vec { self.aggr + .params .args .iter() .map(|expr| PyExpr::from(expr.clone())) diff --git a/src/expr/window.rs b/src/expr/window.rs index 13deaec25..c5467bf94 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::common::{DataFusionError, ScalarValue}; -use datafusion::logical_expr::expr::WindowFunction; +use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; @@ -118,7 +118,10 @@ impl PyWindowExpr { /// Returns order by columns in a window function expression pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { order_by, .. }) => py_sort_expr_list(&order_by), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { order_by, .. }, + .. + }) => py_sort_expr_list(&order_by), other => Err(not_window_function_err(other)), } } @@ -126,9 +129,10 @@ impl PyWindowExpr { /// Return partition by columns in a window function expression pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { partition_by, .. }) => { - py_expr_list(&partition_by) - } + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { partition_by, .. }, + .. + }) => py_expr_list(&partition_by), other => Err(not_window_function_err(other)), } } @@ -136,7 +140,10 @@ impl PyWindowExpr { /// Return input args for window function pub fn get_args(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { args, .. }) => py_expr_list(&args), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { args, .. }, + .. + }) => py_expr_list(&args), other => Err(not_window_function_err(other)), } } @@ -152,7 +159,10 @@ impl PyWindowExpr { /// Returns a Pywindow frame for a given window function expression pub fn get_frame(&self, expr: PyExpr) -> Option { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { window_frame, .. }) => Some(window_frame.into()), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { window_frame, .. }, + .. + }) => Some(window_frame.into()), _ => None, } } diff --git a/src/functions.rs b/src/functions.rs index 6a8abb18d..cede3250a 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -17,6 +17,7 @@ use datafusion::functions_aggregate::all_default_aggregate_functions; use datafusion::functions_window::all_default_window_functions; +use datafusion::logical_expr::expr::WindowFunctionParams; use datafusion::logical_expr::ExprFunctionExt; use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; @@ -196,10 +197,7 @@ fn alias(expr: PyExpr, name: &str) -> PyResult { #[pyfunction] fn col(name: &str) -> PyResult { Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Column(Column { - relation: None, - name: name.to_string(), - }), + expr: datafusion::logical_expr::Expr::Column(Column::new(None::, name)), }) } @@ -314,19 +312,21 @@ fn window( Ok(PyExpr { expr: datafusion::logical_expr::Expr::WindowFunction(WindowFunction { fun, - args: args.into_iter().map(|x| x.expr).collect::>(), - partition_by: partition_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - order_by: order_by - .unwrap_or_default() - .into_iter() - .map(|x| x.into()) - .collect::>(), - window_frame, - null_treatment: None, + params: WindowFunctionParams { + args: args.into_iter().map(|x| x.expr).collect::>(), + partition_by: partition_by + .unwrap_or_default() + .into_iter() + .map(|x| x.expr) + .collect::>(), + order_by: order_by + .unwrap_or_default() + .into_iter() + .map(|x| x.into()) + .collect::>(), + window_frame, + null_treatment: None, + }, }), }) } diff --git a/src/udf.rs b/src/udf.rs index 574c9d7b5..6c6c1afb4 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use datafusion_ffi::udf::{FFI_ScalarUDF, ForeignScalarUDF}; +use pyo3::types::PyCapsule; use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; @@ -28,9 +30,9 @@ use datafusion::logical_expr::function::ScalarFunctionImplementation; use datafusion::logical_expr::ScalarUDF; use datafusion::logical_expr::{create_udf, ColumnarValue}; -use crate::errors::to_datafusion_err; +use crate::errors::{py_datafusion_err, to_datafusion_err}; use crate::expr::PyExpr; -use crate::utils::parse_volatility; +use crate::utils::{parse_volatility, validate_pycapsule}; /// Create a Rust callable function from a python function that expects pyarrow arrays fn pyarrow_function_to_rust( @@ -105,6 +107,26 @@ impl PyScalarUDF { Ok(Self { function }) } + #[staticmethod] + fn ffi_udf(func: Bound) -> PyResult { + if func.hasattr("__datafusion_scalar_udf__")? { + let capsule = func.getattr("__datafusion_scalar_udf__")?.call0()?; + let capsule = capsule.downcast::()?; + validate_pycapsule(capsule, "datafusion_scalar_udf")?; + + let func = unsafe { capsule.reference::() }; + let func: ForeignScalarUDF = func.try_into().map_err(py_datafusion_err)?; + + Ok(Self { + function: ScalarUDF::from(func), + }) + } else { + Err(py_datafusion_err( + "__datafusion_scalar_udf__ does not exist on Scalar UDF object.", + )) + } + } + /// creates a new PyExpr with the call of the udf #[pyo3(signature = (*args))] fn __call__(&self, args: Vec) -> PyResult {