diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a00ad83c2c..65f0b77238 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -191,12 +191,17 @@ jobs: MINGW_ASM_MASM_COMPILER: llvm-ml MINGW_ASM_MASM_FLAGS: -m64 - name: Android (API 21, NDK 23) - os: macos-15-large + os: ubuntu-latest ANDROID_API: 21 ANDROID_NDK: 23.2.8568313 ANDROID_ARCH: x86_64 + - name: Android (API 26, NDK 27) + os: ubuntu-latest + ANDROID_API: 26 + ANDROID_NDK: 27.3.13750724 + ANDROID_ARCH: x86_64 - name: Android (API 31, NDK 27) - os: macos-15-large + os: ubuntu-latest ANDROID_API: 31 ANDROID_NDK: 27.3.13750724 ANDROID_ARCH: x86_64 @@ -242,12 +247,12 @@ jobs: cache: "pip" - name: Check Linux CC/CXX - if: ${{ runner.os == 'Linux' && !matrix.container }} + if: ${{ runner.os == 'Linux' && !env['ANDROID_API'] &&!matrix.container }} run: | [ -n "$CC" ] && [ -n "$CXX" ] || { echo "Ubuntu runner configurations require toolchain selection via CC and CXX" >&2; exit 1; } - name: Installing Linux Dependencies - if: ${{ runner.os == 'Linux' && !env['TEST_X86'] && !matrix.container }} + if: ${{ runner.os == 'Linux' && !env['TEST_X86'] && !env['ANDROID_API'] && !matrix.container }} run: | sudo apt update # Install common dependencies @@ -278,7 +283,7 @@ jobs: sudo make install - name: Installing Linux 32-bit Dependencies - if: ${{ runner.os == 'Linux' && env['TEST_X86'] && !matrix.container }} + if: ${{ runner.os == 'Linux' && env['TEST_X86'] && !env['ANDROID_API'] &&!matrix.container }} run: | sudo dpkg --add-architecture i386 sudo apt update @@ -357,6 +362,22 @@ jobs: with: gradle-home-cache-cleanup: true + - name: Setup .NET for Android + if: ${{ env['ANDROID_API'] }} + uses: actions/setup-dotnet@v5 + with: + dotnet-version: '10.0.x' + + - name: Install .NET Android workload + if: ${{ env['ANDROID_API'] }} + run: dotnet workload restore tests/fixtures/dotnet_signal/test_dotnet.csproj + + - name: Enable KVM group perms + if: ${{ runner.os == 'Linux' && env['ANDROID_API'] }} + run: | + echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules + sudo udevadm control --reload-rules + sudo udevadm trigger --name-match=kvm - name: Add sentry.native.test hostname if: ${{ runner.os == 'Windows' }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ad01ff299..64c592502a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,13 @@ # Changelog -## Unreleased: +## Unreleased **Fixes**: - inproc: only the handling thread cleans up after the crash. ([#1579](https://github.com/getsentry/sentry-native/pull/1579)) - Propagate transport options (`ca_certs`, `proxy`, `user_agent`) and `handler_path` to the native backend crash daemon. Previously, the daemon did not receive SSL certificate or proxy settings from the parent process, causing SSL errors (curl code 60) when uploading crash reports. The daemon also ignored the user-configured handler path, requiring the `sentry-crash` binary to be placed next to the application executable. ([#1573](https://github.com/getsentry/sentry-native/pull/1573)) - Add module header pages to MemoryList and fix exception code in the native backend. ([#1576](https://github.com/getsentry/sentry-native/pull/1576)) +- Fix `CHAIN_AT_START` handler strategy crashing on Android when the chained Mono handler resets the signal handler and re-raises. ([#1572](https://github.com/getsentry/sentry-native/pull/1572)) ## 0.13.2 diff --git a/src/backends/sentry_backend_inproc.c b/src/backends/sentry_backend_inproc.c index 53aa0af54c..4ba0c5a7cd 100644 --- a/src/backends/sentry_backend_inproc.c +++ b/src/backends/sentry_backend_inproc.c @@ -26,6 +26,10 @@ #ifdef SENTRY_PLATFORM_UNIX # include #endif +#ifdef SENTRY_PLATFORM_ANDROID +# include +# include +#endif #include /** @@ -480,6 +484,17 @@ startup_inproc_backend( options ? sentry_options_get_handler_strategy(options) : # endif SENTRY_HANDLER_STRATEGY_DEFAULT; +# ifdef SENTRY_PLATFORM_ANDROID + // CHAIN_AT_START invokes the previous handler and expects to regain + // control. On Android API < 26, the old debuggerd daemon kills the + // crashing process via SIGKILL after the chained handler triggers + // it, so we fall back to DEFAULT which chains at the end instead. + if (g_backend_config.handler_strategy + == SENTRY_HANDLER_STRATEGY_CHAIN_AT_START + && android_get_device_api_level() < 26) { + g_backend_config.handler_strategy = SENTRY_HANDLER_STRATEGY_DEFAULT; + } +# endif if (backend) { backend->data = &g_backend_config; } @@ -1564,6 +1579,29 @@ process_ucontext(const sentry_ucontext_t *uctx) uintptr_t ip = get_instruction_pointer(uctx); uintptr_t sp = get_stack_pointer(uctx); +# ifdef SENTRY_PLATFORM_ANDROID + // Mask the signal so SA_NODEFER doesn't let re-raises from the chained + // handler kill the process before we regain control. + sigset_t mask, old_mask; + sigemptyset(&mask); + sigaddset(&mask, uctx->signum); + // Raw syscall because ART's libsigchain intercepts + // sigprocmask() and silently drops the request when called + // outside its own special handlers. Without the raw syscall + // the mask change would be ignored and SA_NODEFER would let + // the chained handler's raise() re-deliver the signal + // immediately, crashing the process before we can inspect + // the modified IP/SP. + // + // DANGER: this makes libsigchain's internal mask state + // diverge from the kernel's actual mask. If ART ever relies + // on that state for correctness (e.g. GC safepoints), this + // could cause subtle failures. We restore the mask right + // after the chained handler returns, limiting the window. + syscall( + SYS_rt_sigprocmask, SIG_BLOCK, &mask, &old_mask, sizeof(sigset_t)); +# endif + // invoke the previous handler (typically the CLR/Mono // signal-to-managed-exception handler) invoke_signal_handler( @@ -1579,6 +1617,19 @@ process_ucontext(const sentry_ucontext_t *uctx) return; } +# ifdef SENTRY_PLATFORM_ANDROID + // restore our handler after resend_signal() set SIG_DFL + sigaction(uctx->signum, &g_sigaction, NULL); + + // consume pending signal + struct timespec timeout = { 0, 0 }; + syscall(SYS_rt_sigtimedwait, &mask, NULL, &timeout, sizeof(sigset_t)); + + // unmask + syscall( + SYS_rt_sigprocmask, SIG_SETMASK, &old_mask, NULL, sizeof(sigset_t)); +# endif + // return from runtime handler; continue processing the crash on the // signal thread until the worker takes over } diff --git a/tests/fixtures/dotnet_signal/Directory.Build.props b/tests/fixtures/dotnet_signal/Directory.Build.props new file mode 100644 index 0000000000..cac7f5ab06 --- /dev/null +++ b/tests/fixtures/dotnet_signal/Directory.Build.props @@ -0,0 +1,2 @@ + + diff --git a/tests/fixtures/dotnet_signal/Platforms/Android/MainActivity.cs b/tests/fixtures/dotnet_signal/Platforms/Android/MainActivity.cs new file mode 100644 index 0000000000..a6b35ceba3 --- /dev/null +++ b/tests/fixtures/dotnet_signal/Platforms/Android/MainActivity.cs @@ -0,0 +1,31 @@ +using Android.App; +using Android.OS; + +// Required for "adb shell run-as" to access the app's data directory in Release builds +[assembly: Application(Debuggable = true)] + +namespace dotnet_signal; + +[Activity(Name = "dotnet_signal.MainActivity", MainLauncher = true)] +public class MainActivity : Activity +{ + protected override void OnResume() + { + base.OnResume(); + + var arg = Intent?.GetStringExtra("arg"); + if (!string.IsNullOrEmpty(arg)) + { + var databasePath = FilesDir?.AbsolutePath + "/.sentry-native"; + + // Post to the message queue so the activity finishes starting + // before the crash test runs. Without this, "am start -W" may hang. + new Handler(Looper.MainLooper!).Post(() => + { + Program.RunTest(new[] { arg }, databasePath); + FinishAndRemoveTask(); + Java.Lang.JavaSystem.Exit(0); + }); + } + } +} diff --git a/tests/fixtures/dotnet_signal/Program.cs b/tests/fixtures/dotnet_signal/Program.cs index 4e6217ac3e..c21e10ef71 100644 --- a/tests/fixtures/dotnet_signal/Program.cs +++ b/tests/fixtures/dotnet_signal/Program.cs @@ -20,10 +20,13 @@ class Program [DllImport("sentry", EntryPoint = "sentry_options_set_debug")] static extern IntPtr sentry_options_set_debug(IntPtr options, int debug); + [DllImport("sentry", EntryPoint = "sentry_options_set_database_path")] + static extern void sentry_options_set_database_path(IntPtr options, string path); + [DllImport("sentry", EntryPoint = "sentry_init")] static extern int sentry_init(IntPtr options); - static void Main(string[] args) + public static void RunTest(string[] args, string? databasePath = null) { var githubActions = Environment.GetEnvironmentVariable("GITHUB_ACTIONS") ?? string.Empty; if (githubActions == "true") { @@ -38,10 +41,13 @@ static void Main(string[] args) var options = sentry_options_new(); sentry_options_set_handler_strategy(options, 1); sentry_options_set_debug(options, 1); + if (databasePath != null) + { + sentry_options_set_database_path(options, databasePath); + } sentry_init(options); - var doNativeCrash = args is ["native-crash"]; - if (doNativeCrash) + if (args.Contains("native-crash")) { native_crash(); } @@ -51,9 +57,9 @@ static void Main(string[] args) { Console.WriteLine("dereference a NULL object from managed code"); var s = default(string); - var c = s.Length; + var c = s!.Length; } - catch (NullReferenceException exception) + catch (NullReferenceException) { } } @@ -61,7 +67,14 @@ static void Main(string[] args) { Console.WriteLine("dereference a NULL object from managed code (unhandled)"); var s = default(string); - var c = s.Length; + var c = s!.Length; } } + +#if !ANDROID + static void Main(string[] args) + { + RunTest(args); + } +#endif } \ No newline at end of file diff --git a/tests/fixtures/dotnet_signal/test_dotnet.csproj b/tests/fixtures/dotnet_signal/test_dotnet.csproj index 238f157e2e..e266400d00 100644 --- a/tests/fixtures/dotnet_signal/test_dotnet.csproj +++ b/tests/fixtures/dotnet_signal/test_dotnet.csproj @@ -1,8 +1,23 @@ Exe - net10.0 + net10.0 + $(TargetFrameworks);net10.0-android enable enable + + + io.sentry.ndk.dotnet.signal.test + 21 + true + + + + + + + + + diff --git a/tests/test_build_static.py b/tests/test_build_static.py index 36d502c957..6dab8ca505 100644 --- a/tests/test_build_static.py +++ b/tests/test_build_static.py @@ -2,7 +2,7 @@ import sys import os import pytest -from .conditions import has_breakpad, has_crashpad, has_native +from .conditions import has_breakpad, has_crashpad, has_native, is_android def test_static_lib(cmake): @@ -16,7 +16,7 @@ def test_static_lib(cmake): ) # on linux we can use `ldd` to check that we don’t link to `libsentry.so` - if sys.platform == "linux": + if sys.platform == "linux" and not is_android: output = subprocess.check_output("ldd sentry_example", cwd=tmp_path, shell=True) assert b"libsentry.so" not in output diff --git a/tests/test_dotnet_signals.py b/tests/test_dotnet_signals.py index 7c4e2a70dd..7614aa9c41 100644 --- a/tests/test_dotnet_signals.py +++ b/tests/test_dotnet_signals.py @@ -3,10 +3,11 @@ import shutil import subprocess import sys +import time import pytest -from tests.conditions import is_tsan, is_x86, is_asan +from tests.conditions import is_android, is_tsan, is_x86, is_asan project_fixture_path = pathlib.Path("tests/fixtures/dotnet_signal") @@ -49,19 +50,23 @@ def run_dotnet(tmp_path, args): def run_dotnet_managed_exception(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run", "managed-exception"]) + return run_dotnet( + tmp_path, ["dotnet", "run", "-f:net10.0", "--", "managed-exception"] + ) def run_dotnet_unhandled_managed_exception(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run", "unhandled-managed-exception"]) + return run_dotnet( + tmp_path, ["dotnet", "run", "-f:net10.0", "--", "unhandled-managed-exception"] + ) def run_dotnet_native_crash(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run", "native-crash"]) + return run_dotnet(tmp_path, ["dotnet", "run", "-f:net10.0", "--", "native-crash"]) @pytest.mark.skipif( - sys.platform != "linux" or is_x86 or is_asan or is_tsan, + bool(sys.platform != "linux" or is_x86 or is_asan or is_tsan or is_android), reason="dotnet signal handling is currently only supported on 64-bit Linux without sanitizers", ) def test_dotnet_signals_inproc(cmake): @@ -165,7 +170,7 @@ def run_aot_native_crash(tmp_path): @pytest.mark.skipif( - sys.platform != "linux" or is_x86 or is_asan or is_tsan, + bool(sys.platform != "linux" or is_x86 or is_asan or is_tsan or is_android), reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers", ) def test_aot_signals_inproc(cmake): @@ -199,6 +204,7 @@ def test_aot_signals_inproc(cmake): [ "dotnet", "publish", + "-f:net10.0", "-p:PublishAot=true", "-p:Configuration=Release", "-o", @@ -255,3 +261,200 @@ def test_aot_signals_inproc(cmake): shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True) shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) shutil.rmtree(project_fixture_path / "obj", ignore_errors=True) + + +ANDROID_PACKAGE = "io.sentry.ndk.dotnet.signal.test" + + +def wait_for(condition, timeout=10, interval=0.5): + start = time.time() + while time.time() - start < timeout: + if condition(): + return True + time.sleep(interval) + return condition() + + +def adb(*args, **kwargs): + adb_path = "{}/platform-tools/adb".format(os.environ["ANDROID_HOME"]) + return subprocess.run([adb_path, *args], **kwargs) + + +def run_android(args=None, timeout=30): + if args is None: + args = [] + adb("logcat", "-c") + adb("shell", "am", "force-stop", ANDROID_PACKAGE) + adb( + "shell", "run-as {} sh -c 'rm -rf files/.sentry-native'".format(ANDROID_PACKAGE) + ) + intent_args = [] + for arg in args: + intent_args += ["--es", "arg", arg] + try: + adb( + "shell", + "am", + "start", + "-W", + "-n", + "{}/dotnet_signal.MainActivity".format(ANDROID_PACKAGE), + *intent_args, + check=True, + timeout=10, + ) + except subprocess.TimeoutExpired: + pass + pid = adb( + "shell", "pidof", ANDROID_PACKAGE, capture_output=True, text=True + ).stdout.strip() + wait_for( + lambda: adb( + "shell", "pidof", ANDROID_PACKAGE, capture_output=True, text=True + ).returncode + != 0, + timeout=timeout, + ) + logcat_args = ["logcat", "-d"] + if pid: + logcat_args += ["--pid=" + pid] + return adb(*logcat_args, capture_output=True, text=True).stdout + + +def run_android_managed_exception(): + return run_android(["managed-exception"]) + + +def run_android_unhandled_managed_exception(): + return run_android(["unhandled-managed-exception"]) + + +def run_android_native_crash(): + return run_android(["native-crash"]) + + +@pytest.mark.skipif( + not is_android or int(is_android) < 26, + reason="needs Android API 26+ (tombstoned)", +) +def test_android_signals_inproc(cmake): + if shutil.which("dotnet") is None: + pytest.skip("dotnet is not installed") + + arch = os.environ.get("ANDROID_ARCH", "x86_64") + rid_map = { + "x86_64": "android-x64", + "x86": "android-x86", + "arm64-v8a": "android-arm64", + "armeabi-v7a": "android-arm", + } + + try: + tmp_path = cmake( + ["sentry"], + {"SENTRY_BACKEND": "inproc", "SENTRY_TRANSPORT": "none"}, + ) + + # build libcrash.so with NDK clang + ndk_prebuilt = pathlib.Path( + "{}/ndk/{}/toolchains/llvm/prebuilt".format( + os.environ["ANDROID_HOME"], os.environ["ANDROID_NDK"] + ) + ) + triples = { + "x86_64": "x86_64-linux-android", + "x86": "i686-linux-android", + "arm64-v8a": "aarch64-linux-android", + "armeabi-v7a": "armv7a-linux-androideabi", + } + ndk_clang = str( + next(ndk_prebuilt.iterdir()) + / "bin" + / "{}{}-clang".format(triples[arch], os.environ["ANDROID_API"]) + ) + native_lib_dir = project_fixture_path / "native" / arch + native_lib_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(tmp_path / "libsentry.so", native_lib_dir / "libsentry.so") + subprocess.run( + [ + ndk_clang, + "-Wall", + "-Wextra", + "-fPIC", + "-shared", + str(project_fixture_path / "crash.c"), + "-o", + str(native_lib_dir / "libcrash.so"), + ], + check=True, + ) + + # build and install the APK + subprocess.run( + [ + "dotnet", + "build", + "-f:net10.0-android", + "-p:RuntimeIdentifier={}".format(rid_map[arch]), + "-p:Configuration=Release", + ], + cwd=project_fixture_path, + check=True, + ) + apk_dir = ( + project_fixture_path / "bin" / "Release" / "net10.0-android" / rid_map[arch] + ) + apk_path = next(apk_dir.glob("*-Signed.apk")) + adb("install", "-r", str(apk_path), check=True) + + def run_as(cmd, **kwargs): + return adb( + "shell", + 'run-as {} sh -c "{}"'.format(ANDROID_PACKAGE, cmd), + **kwargs, + ) + + db = "files/.sentry-native" + + def file_exists(path): + return run_as("test -f " + path, capture_output=True).returncode == 0 + + def dir_exists(path): + return run_as("test -d " + path, capture_output=True).returncode == 0 + + def has_envelope(): + result = run_as( + "find " + db + " -name '*.envelope'", capture_output=True, text=True + ) + return bool(result.stdout.strip()) + + # managed exception: handled, no crash + logcat = run_android_managed_exception() + assert not ( + "NullReferenceException" in logcat + ), f"Managed exception leaked.\nlogcat:\n{logcat}" + assert wait_for(lambda: dir_exists(db)), "No database-path exists" + assert not file_exists(db + "/last_crash"), "A crash was registered" + assert not has_envelope(), "Unexpected envelope found" + + # unhandled managed exception: Mono calls exit(1), the native SDK + # should not register a crash (sentry-dotnet handles this at the + # managed layer via UnhandledExceptionRaiser) + logcat = run_android_unhandled_managed_exception() + assert ( + "NullReferenceException" in logcat + ), f"Expected NullReferenceException.\nlogcat:\n{logcat}" + assert wait_for(lambda: dir_exists(db)), "No database-path exists" + assert not file_exists(db + "/last_crash"), "A crash was registered" + assert not has_envelope(), "Unexpected envelope found" + + # native crash + run_android_native_crash() + assert wait_for(lambda: file_exists(db + "/last_crash")), "Crash marker missing" + assert wait_for(has_envelope), "Crash envelope is missing" + + finally: + shutil.rmtree(project_fixture_path / "native", ignore_errors=True) + shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) + shutil.rmtree(project_fixture_path / "obj", ignore_errors=True) + adb("uninstall", ANDROID_PACKAGE, check=False)