Skip to content

unix: BOLT fixes #463

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions cpython-unix/build-cpython.sh
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,11 @@ if [ -n "${PYTHON_MEETS_MINIMUM_VERSION_3_11}" ]; then
patch -p1 -i ${ROOT}/patch-pwd-remove-conditional.patch
fi

# Adjust BOLT flags to yield better behavior. See inline details in patch.
if [ -n "${PYTHON_MEETS_MINIMUM_VERSION_3_12}" ]; then
patch -p1 -i ${ROOT}/patch-configure-bolt-flags.patch
fi

# The optimization make targets are both phony and non-phony. This leads
# to PGO targets getting reevaluated after a build when you use multiple
# make invocations. e.g. `make install` like we do below. Fix that.
Expand Down Expand Up @@ -287,6 +292,19 @@ if [ -n "${CROSS_COMPILING}" ]; then
patch -p1 -i ${ROOT}/patch-force-cross-compile.patch
fi

# BOLT instrumented binaries segfault in some test_embed tests for unknown reasons.
# On 3.12 (minimum BOLT version), the segfault causes the test harness to
# abort and BOLT optimization uses the partial test results. On 3.13, the segfault
# is a fatal error.
if [ -n "${PYTHON_MEETS_MINIMUM_VERSION_3_10}" ]; then
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these also be gated by -n "${BOLT_CAPABLE}" so we don't disable tests unnecessarily?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't actually run the full suite of tests. So it doesn't matter today.

(It would be nice to actually run the full test suite against the built distribution but that's for another PR I suppose.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't these be reflected in the distributed Python too? Like, a consumer would have these tests disabled?

Agree it doesn't seem critical.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right, I forgot we distributed the unit tests.

I'll file an issue to clean things up. Agree we could do something better here. And we may be paving over a legit bug somewhere by disabling tests that segfault.

patch -p1 -i ${ROOT}/patch-test-embed-prevent-segfault.patch
fi

# Same as above but for an additional set of tests introduced in 3.14.
if [ -n "${PYTHON_MEETS_MINIMUM_VERSION_3_14}" ]; then
patch -p1 -i ${ROOT}/patch-test-embed-prevent-segfault-3.14.patch
fi

# Most bits look at CFLAGS. But setup.py only looks at CPPFLAGS.
# So we need to set both.
CFLAGS="${EXTRA_TARGET_CFLAGS} -fPIC -I${TOOLS_PATH}/deps/include -I${TOOLS_PATH}/deps/include/ncursesw"
Expand Down Expand Up @@ -387,12 +405,7 @@ fi

if [ -n "${CPYTHON_OPTIMIZED}" ]; then
CONFIGURE_FLAGS="${CONFIGURE_FLAGS} --enable-optimizations"
if [[ -n "${PYTHON_MEETS_MINIMUM_VERSION_3_13}" && -n "${BOLT_CAPABLE}" ]]; then
# Due to a SEGFAULT when running `test_embed` with BOLT instrumented binaries, we can't use
# BOLT on Python 3.13+.
# TODO: Find a fix for this or consider skipping these tests specifically
echo "BOLT is disabled on Python 3.13+"
Comment on lines -391 to -394
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yay! Thank you.

elif [[ -n "${PYTHON_MEETS_MINIMUM_VERSION_3_12}" && -n "${BOLT_CAPABLE}" ]]; then
if [[ -n "${PYTHON_MEETS_MINIMUM_VERSION_3_12}" && -n "${BOLT_CAPABLE}" ]]; then
CONFIGURE_FLAGS="${CONFIGURE_FLAGS} --enable-bolt"
fi
fi
Expand Down
50 changes: 50 additions & 0 deletions cpython-unix/patch-configure-bolt-flags.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
diff --git a/configure.ac b/configure.ac
index bc8c357e996..eef55d4839a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2104,6 +2104,27 @@ AS_VAR_IF([enable_shared], [yes], [
BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)"
])

+AC_ARG_VAR(
+ [BOLT_COMMON_FLAGS],
+ [Common arguments to llvm-bolt when instrumenting and applying]
+)
+
+AC_MSG_CHECKING([BOLT_COMMON_FLAGS])
+if test -z "${BOLT_COMMON_FLAGS}"
+then
+ AS_VAR_SET(
+ [BOLT_COMMON_FLAGS],
+ [m4_normalize("
+ [-update-debug-sections]
+
+ dnl At least LLVM 19.x doesn't support computed gotos in PIC compiled code.
+ dnl Exclude functions containing computed gotos.
+ dnl TODO this may be fixed in LLVM 20.x via https://github.com/llvm/llvm-project/pull/120267.
+ [-skip-funcs=_PyEval_EvalFrameDefault,sre_ucs1_match/1,sre_ucs2_match/1,sre_ucs4_match/1]
+ ")]
+ )
+fi
+
AC_ARG_VAR(
[BOLT_INSTRUMENT_FLAGS],
[Arguments to llvm-bolt when instrumenting binaries]
@@ -2111,7 +2132,7 @@ AC_ARG_VAR(
AC_MSG_CHECKING([BOLT_INSTRUMENT_FLAGS])
if test -z "${BOLT_INSTRUMENT_FLAGS}"
then
- BOLT_INSTRUMENT_FLAGS=
+ BOLT_INSTRUMENT_FLAGS="${BOLT_COMMON_FLAGS}"
fi
AC_MSG_RESULT([$BOLT_INSTRUMENT_FLAGS])

@@ -2125,7 +2146,7 @@ then
AS_VAR_SET(
[BOLT_APPLY_FLAGS],
[m4_normalize("
- -update-debug-sections
+ ${BOLT_COMMON_FLAGS}
-reorder-blocks=ext-tsp
-reorder-functions=hfsort+
-split-functions
36 changes: 36 additions & 0 deletions cpython-unix/patch-test-embed-prevent-segfault-3.14.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 7110fb889f3..61e4d0f6179 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -940,6 +940,7 @@ def check_all_configs(self, testname, expected_config=None,
self.check_global_config(configs)
return configs

+ @unittest.skip("segfaults on BOLT instrumented binaries")
def test_init_default_config(self):
self.check_all_configs("test_init_initialize_config", api=API_COMPAT)

@@ -1039,6 +1040,7 @@ def test_init_from_config(self):
self.check_all_configs("test_init_from_config", config, preconfig,
api=API_COMPAT)

+ @unittest.skip("segfaults on BOLT instrumented binaries")
def test_init_compat_env(self):
preconfig = {
'allocator': ALLOCATOR_FOR_CONFIG,
@@ -1074,6 +1076,7 @@ def test_init_compat_env(self):
self.check_all_configs("test_init_compat_env", config, preconfig,
api=API_COMPAT)

+ @unittest.skip("segfaults on BOLT instrumented binaries")
def test_init_python_env(self):
preconfig = {
'allocator': ALLOCATOR_FOR_CONFIG,
@@ -1772,6 +1775,7 @@ def test_init_set_config(self):
self.check_all_configs("test_init_set_config", config,
api=API_ISOLATED)

+ @unittest.skip("segfaults on BOLT instrumented binaries")
def test_initconfig_api(self):
preconfig = {
'configure_locale': True,
20 changes: 20 additions & 0 deletions cpython-unix/patch-test-embed-prevent-segfault.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 13713cf37b8..ba23880b15f 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -1615,6 +1615,7 @@ def test_getpath_abspath_win32(self):
for (_, expected), result in zip(CASES, results):
self.assertEqual(result, expected)

+ @unittest.skip("segfaults on BOLT instrumented binaries")
def test_global_pathconfig(self):
# Test C API functions getting the path configuration:
#
@@ -1866,6 +1867,7 @@ def test_no_memleak(self):
self.assertEqual(blocks, 0, out)


[email protected]("segfaults on BOLT instrumented binaries")
class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase):
# Test PyStdPrinter_Type which is used by _PySys_SetPreliminaryStderr():
# "Set up a preliminary stderr printer until we have enough
Loading