diff --git a/Makefile b/Makefile
index d25cd4a..904aa82 100644
--- a/Makefile
+++ b/Makefile
@@ -42,9 +42,9 @@ endif
 TARGET_ARCH   := $(shell uname -m)
 
 ifeq ($(TARGET_ARCH),aarch64)
-    SMS        ?= 53 61 70 72 75 80 86 87 90
+    SMS        ?= 75 80 86 87 89 90 120
 else
-    SMS        ?= 52 60 70 75 80 86 90
+    SMS        ?= 75 80 86 89 90 120
 endif
 
 # Generate SASS code for each SM architecture listed in $(SMS)
diff --git a/Makefile.config b/Makefile.config
index a981d0f..aee79d7 100644
--- a/Makefile.config
+++ b/Makefile.config
@@ -1,4 +1,4 @@
-CUDA_PATH ?= /usr/local/cuda-12.1
+CUDA_PATH ?= /usr/local/cuda
 
 SANALYZER_DIR    ?= ../sanalyzer
 
diff --git a/gpu_src/gpu_patch_block_divergence_analysis.cu b/gpu_src/gpu_patch_block_divergence_analysis.cu
new file mode 100644
index 0000000..0293185
--- /dev/null
+++ b/gpu_src/gpu_patch_block_divergence_analysis.cu
@@ -0,0 +1,151 @@
+#include "gpu_patch.h"
+
+#include <sanitizer_patching.h>
+
+#include "gpu_utils.h"
+#include <cstdio>
+
+static __device__ __inline__
+uint32_t GetBufferIndex(MemoryAccessTracker* pTracker) {
+    uint32_t idx = MEMORY_ACCESS_BUFFER_SIZE;
+
+    while (idx >= MEMORY_ACCESS_BUFFER_SIZE) {
+        idx = atomicAdd(&(pTracker->currentEntry), 1);
+
+        if (idx >= MEMORY_ACCESS_BUFFER_SIZE) {
+            // buffer is full, wait for last writing thread to flush
+            while (*(volatile uint32_t*)&(pTracker->currentEntry) >= MEMORY_ACCESS_BUFFER_SIZE);
+        }
+    }
+
+    return idx;
+}
+
+static __device__ __inline__
+void IncrementNumEntries(MemoryAccessTracker* pTracker) {
+    DoorBell* doorbell = pTracker->doorBell;
+    __threadfence();
+    const uint32_t numEntries = atomicAdd((int*)&(pTracker->numEntries), 1);
+
+    if (numEntries == MEMORY_ACCESS_BUFFER_SIZE - 1) {
+        // make sure everything is visible in memory
+        __threadfence_system();
+        doorbell->full = true;
+        while (doorbell->full);
+
+        pTracker->numEntries = 0;
+        __threadfence();
+        pTracker->currentEntry = 0;
+    }
+}
+
+static __device__
+SanitizerPatchResult CommonCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    MemoryType type)
+{
+    auto* pTracker = (MemoryAccessTracker*)userdata;
+
+    uint32_t active_mask = __activemask();
+    uint32_t laneid = get_laneid();
+    uint32_t first_laneid = __ffs(active_mask) - 1;
+
+    MemoryAccess* accesses = nullptr;
+
+    if (laneid == first_laneid) {
+        uint32_t idx = GetBufferIndex(pTracker);
+        accesses = &pTracker->access_buffer[idx];
+        accesses->accessSize = accessSize;
+        accesses->flags = flags;
+        accesses->warpId = get_warpid();
+        accesses->ctaId = get_ctaid_as_uint64();
+        accesses->pc = pc;
+        accesses->active_mask = active_mask;
+        accesses->type = type;
+    }
+
+    __syncwarp(active_mask);
+
+    accesses = (MemoryAccess*) shfl((uint64_t)accesses, first_laneid, active_mask);
+    if (accesses) {
+        accesses->addresses[laneid] = (uint64_t)(uintptr_t)ptr;
+    }
+
+    __syncwarp(active_mask);
+
+    if (laneid == first_laneid) {
+        IncrementNumEntries(pTracker);
+    }
+
+    return SANITIZER_PATCH_SUCCESS;
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemoryGlobalAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Global);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemorySharedAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Shared);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemoryLocalAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Local);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemcpyAsyncCallback(void* userdata, uint64_t pc, void* src, uint32_t dst, uint32_t accessSize)
+{
+    if (src)
+    {
+        CommonCallback(userdata, pc, src, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_READ, MemoryType::Global);
+    }
+
+    return CommonCallback(userdata, pc, (void*)dst, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_WRITE, MemoryType::Shared);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult BlockExitCallback(void* userdata, uint64_t pc)
+{
+    MemoryAccessTracker* tracker = (MemoryAccessTracker*)userdata;
+    DoorBell* doorbell = tracker->doorBell;
+
+    uint32_t active_mask = __activemask();
+    uint32_t laneid = get_laneid();
+    uint32_t first_laneid = __ffs(active_mask) - 1;
+    int32_t pop_count = __popc(active_mask);
+
+    if (laneid == first_laneid) {
+        atomicAdd((int*)&doorbell->num_threads, -pop_count);
+    }
+    __syncwarp(active_mask);
+
+    return SANITIZER_PATCH_SUCCESS;
+}
diff --git a/gpu_src/gpu_patch_heatmap_analysis.cu b/gpu_src/gpu_patch_heatmap_analysis.cu
new file mode 100644
index 0000000..d9d94b4
--- /dev/null
+++ b/gpu_src/gpu_patch_heatmap_analysis.cu
@@ -0,0 +1,172 @@
+#include "gpu_patch.h"
+
+#include <sanitizer_patching.h>
+
+#include "gpu_utils.h"
+#include <cstdio>
+
+static __device__ __inline__
+bool BypassCheckByCtaId(uint32_t block_idx, uint32_t block_idy, uint32_t block_idz) {
+    int4 ctaid = get_ctaid();
+    if(ctaid.x != block_idx || ctaid.y != block_idy || ctaid.z != block_idz) {
+        return true;
+    }
+    return false;
+}
+
+
+static __device__ __inline__
+uint32_t GetBufferIndex(MemoryAccessTracker* pTracker) {
+    uint32_t idx = MEMORY_ACCESS_BUFFER_SIZE;
+
+    while (idx >= MEMORY_ACCESS_BUFFER_SIZE) {
+        idx = atomicAdd(&(pTracker->currentEntry), 1);
+
+        if (idx >= MEMORY_ACCESS_BUFFER_SIZE) {
+            // buffer is full, wait for last writing thread to flush
+            while (*(volatile uint32_t*)&(pTracker->currentEntry) >= MEMORY_ACCESS_BUFFER_SIZE);
+        }
+    }
+
+    return idx;
+}
+
+static __device__ __inline__
+void IncrementNumEntries(MemoryAccessTracker* pTracker) {
+    DoorBell* doorbell = pTracker->doorBell;
+    __threadfence();
+    const uint32_t numEntries = atomicAdd((int*)&(pTracker->numEntries), 1);
+
+    if (numEntries == MEMORY_ACCESS_BUFFER_SIZE - 1) {
+        // make sure everything is visible in memory
+        __threadfence_system();
+        doorbell->full = true;
+        while (doorbell->full);
+
+        pTracker->numEntries = 0;
+        __threadfence();
+        pTracker->currentEntry = 0;
+    }
+}
+
+static __device__
+SanitizerPatchResult CommonCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    MemoryType type)
+{
+    auto* pTracker = (MemoryAccessTracker*)userdata;
+    if(BypassCheckByCtaId(pTracker->target_block[0], pTracker->target_block[1], pTracker->target_block[2])) {
+        return SANITIZER_PATCH_SUCCESS;
+    }
+
+    uint32_t active_mask = __activemask();
+    uint32_t laneid = get_laneid();
+    uint32_t first_laneid = __ffs(active_mask) - 1;
+
+    MemoryAccess* accesses = nullptr;
+
+    if (laneid == first_laneid) {
+        uint32_t idx = GetBufferIndex(pTracker);
+        accesses = &pTracker->access_buffer[idx];
+        accesses->accessSize = accessSize;
+        accesses->flags = flags;
+        accesses->warpId = get_warpid();
+        accesses->type = type;
+        accesses->pc = pc;
+        accesses->active_mask = active_mask;
+    }
+
+    __syncwarp(active_mask);
+
+    accesses = (MemoryAccess*) shfl((uint64_t)accesses, first_laneid, active_mask);
+    if (accesses) {
+        if(type == MemoryType::Local){
+            accesses->addresses[laneid] = (((uint64_t) get_warpid() * GPU_WARP_SIZE + get_laneid()) << 54) |((uint64_t)(uintptr_t)ptr); // use high 10 bits to store thread id for local memory access
+        } else {
+            accesses->addresses[laneid] = (uint64_t)(uintptr_t)ptr;
+        }
+    }
+
+    __syncwarp(active_mask);
+
+    if (laneid == first_laneid) {
+        IncrementNumEntries(pTracker);
+    }
+
+    return SANITIZER_PATCH_SUCCESS;
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemoryGlobalAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Global);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemorySharedAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Shared);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemoryLocalAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Local);
+}
+
+//For the future use of async memcpy
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemcpyAsyncCallback(void* userdata, uint64_t pc, void* src, uint32_t dst, uint32_t accessSize)
+{
+    if (src)
+    {
+        CommonCallback(userdata, pc, src, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_READ, MemoryType::Global);
+    }
+
+    return CommonCallback(userdata, pc, (void*)dst, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_WRITE, MemoryType::Shared);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult BlockExitCallback(void* userdata, uint64_t pc)
+{
+
+    MemoryAccessTracker* tracker = (MemoryAccessTracker*)userdata;
+    DoorBell* doorbell = tracker->doorBell;
+    if(BypassCheckByCtaId(tracker->target_block[0], tracker->target_block[1], tracker->target_block[2])) {
+        return SANITIZER_PATCH_SUCCESS;
+    }
+
+    uint32_t active_mask = __activemask();
+    uint32_t laneid = get_laneid();
+    uint32_t first_laneid = __ffs(active_mask) - 1;
+    int32_t pop_count = __popc(active_mask);
+
+    if (laneid == first_laneid) {
+        atomicAdd((int*)&doorbell->num_threads, -pop_count);
+    }
+    __syncwarp(active_mask);
+
+    return SANITIZER_PATCH_SUCCESS;
+}
diff --git a/gpu_src/gpu_patch_pc_dependency.cu b/gpu_src/gpu_patch_pc_dependency.cu
new file mode 100644
index 0000000..d560513
--- /dev/null
+++ b/gpu_src/gpu_patch_pc_dependency.cu
@@ -0,0 +1,164 @@
+#include "gpu_patch.h"
+
+#include <sanitizer_patching.h>
+
+#include "gpu_utils.h"
+#include <cstdio>
+
+
+static __device__ __inline__
+uint32_t GetBufferIndex(MemoryAccessTracker* pTracker) {
+    uint32_t idx = MEMORY_ACCESS_BUFFER_SIZE;
+
+    while (idx >= MEMORY_ACCESS_BUFFER_SIZE) {
+        idx = atomicAdd(&(pTracker->currentEntry), 1);
+
+        if (idx >= MEMORY_ACCESS_BUFFER_SIZE) {
+            // buffer is full, wait for last writing thread to flush
+            while (*(volatile uint32_t*)&(pTracker->currentEntry) >= MEMORY_ACCESS_BUFFER_SIZE);
+        }
+    }
+
+    return idx;
+}
+
+static __device__ __inline__
+void IncrementNumEntries(MemoryAccessTracker* pTracker) {
+    DoorBell* doorbell = pTracker->doorBell;
+    __threadfence();
+    const uint32_t numEntries = atomicAdd((int*)&(pTracker->numEntries), 1);
+
+    if (numEntries == MEMORY_ACCESS_BUFFER_SIZE - 1) {
+        // make sure everything is visible in memory
+        __threadfence_system();
+        doorbell->full = true;
+        while (doorbell->full);
+
+        pTracker->numEntries = 0;
+        __threadfence();
+        pTracker->currentEntry = 0;
+    }
+}
+
+static __device__
+SanitizerPatchResult CommonCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    MemoryType type)
+{
+    auto* pTracker = (MemoryAccessTracker*)userdata;
+    if (!pTracker->enabled_instrumenting) {
+        return SANITIZER_PATCH_SUCCESS;
+    }
+
+    uint32_t active_mask = __activemask();
+    uint32_t laneid = get_laneid();
+    uint32_t first_laneid = __ffs(active_mask) - 1;
+
+    MemoryAccess* accesses = nullptr;
+
+    uint32_t distinct_sector_count = get_distint_sector_count((uint64_t)(uintptr_t)ptr/32, active_mask); // sector size is 32 bytes so we divide by 32 to get the sector tag
+
+    if (laneid == first_laneid) {
+        uint32_t idx = GetBufferIndex(pTracker);
+        accesses = &pTracker->access_buffer[idx];
+        accesses->accessSize = accessSize;
+        accesses->flags = flags;
+        accesses->distinct_sector_count = distinct_sector_count;
+        accesses->warpId = get_flat_thread_id() / GPU_WARP_SIZE;
+        accesses->ctaId = get_ctaid_as_uint64();
+        accesses->type = type;
+        accesses->pc = pc - pTracker->kernel_pc; // for in-gpu pc offset calculation, so that the pc in corresponding analysis tool can be saved only in uint32_t
+        accesses->active_mask = active_mask;
+    }
+
+    __syncwarp(active_mask);
+
+    accesses = (MemoryAccess*) shfl((uint64_t)accesses, first_laneid, active_mask);
+    if (accesses) {
+        if(type == MemoryType::Local){
+            accesses->addresses[laneid] = ((get_flat_thread_id()) << 54) |((uint64_t)(uintptr_t)ptr); // use high 10 bits to store thread id for local memory access
+        } else {
+            accesses->addresses[laneid] = (uint64_t)(uintptr_t)ptr;
+        }
+    }
+
+    __syncwarp(active_mask);
+
+    if (laneid == first_laneid) {
+        IncrementNumEntries(pTracker);
+    }
+
+    return SANITIZER_PATCH_SUCCESS;
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemoryGlobalAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Global);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemorySharedAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Shared);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemoryLocalAccessCallback(
+    void* userdata,
+    uint64_t pc,
+    void* ptr,
+    uint32_t accessSize,
+    uint32_t flags,
+    const void *pData)
+{
+    return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryType::Local);
+}
+
+//For the future use of async memcpy
+extern "C" __device__ __noinline__
+SanitizerPatchResult MemcpyAsyncCallback(void* userdata, uint64_t pc, void* src, uint32_t dst, uint32_t accessSize)
+{
+    if (src)
+    {
+        CommonCallback(userdata, pc, src, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_READ, MemoryType::Global);
+    }
+
+    return CommonCallback(userdata, pc, (void*)dst, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_WRITE, MemoryType::Shared);
+}
+
+extern "C" __device__ __noinline__
+SanitizerPatchResult BlockExitCallback(void* userdata, uint64_t pc)
+{
+
+    MemoryAccessTracker* tracker = (MemoryAccessTracker*)userdata;
+    DoorBell* doorbell = tracker->doorBell;
+
+    uint32_t active_mask = __activemask();
+    uint32_t laneid = get_laneid();
+    uint32_t first_laneid = __ffs(active_mask) - 1;
+    int32_t pop_count = __popc(active_mask);
+
+    if (laneid == first_laneid) {
+        atomicAdd((int*)&doorbell->num_threads, -pop_count);
+    }
+    __syncwarp(active_mask);
+
+    return SANITIZER_PATCH_SUCCESS;
+}
diff --git a/gpu_src/include/gpu_patch.h b/gpu_src/include/gpu_patch.h
index 09152cb..52419e5 100644
--- a/gpu_src/include/gpu_patch.h
+++ b/gpu_src/include/gpu_patch.h
@@ -22,7 +22,11 @@ struct MemoryAccess
     uint64_t addresses[GPU_WARP_SIZE];
     uint32_t accessSize;
     uint32_t flags;
-    uint64_t warpId;
+    uint64_t ctaId;
+    uint64_t pc;
+    uint32_t warpId;
+    uint32_t distinct_sector_count;
+    uint32_t active_mask;
     MemoryType type;
 
     // copy constructor
@@ -34,8 +38,12 @@ struct MemoryAccess
         }
         accessSize = other.accessSize;
         flags = other.flags;
+        ctaId = other.ctaId;
         warpId = other.warpId;
+        distinct_sector_count = other.distinct_sector_count;
         type = other.type;
+        pc = other.pc;
+        active_mask = other.active_mask;
     }
 
     MemoryAccess() = default;
@@ -86,6 +94,9 @@ struct MemoryAccessTracker
     uint32_t numEntries;
     uint64_t accessCount;
     uint64_t accessSize;
+    uint64_t kernel_pc;
+    bool enabled_instrumenting;
+    int32_t target_block[3]; // target block to sample [x, y, z]
     DoorBell* doorBell;
     MemoryAccess* access_buffer;
     MemoryAccessState* access_state;
diff --git a/gpu_src/include/gpu_utils.h b/gpu_src/include/gpu_utils.h
index ca254ce..e517f14 100644
--- a/gpu_src/include/gpu_utils.h
+++ b/gpu_src/include/gpu_utils.h
@@ -36,6 +36,12 @@ __device__ __forceinline__ int4 get_ctaid(void) {
     return ret;
 }
 
+__device__ __forceinline__ uint64_t get_ctaid_as_uint64(void) {
+    int4 ctaid = get_ctaid();
+    uint64_t ret = (uint64_t)(ctaid.x + ctaid.y * gridDim.x + ctaid.z * gridDim.x * gridDim.y);
+    return ret;
+}
+
 //  Get the number of CTA ids per grid
 __device__ __forceinline__ int4 get_nctaid(void) {
     int4 ret;
@@ -65,6 +71,14 @@ __device__ __forceinline__ uint64_t get_block_num_threads() {
     return blockDim.x * blockDim.y * blockDim.z;
 }
 
+__device__ __forceinline__ uint32_t get_distint_sector_count(uint64_t sector_tag, uint32_t active_mask) {
+    uint32_t match_mask = 0;
+    match_mask = __match_any_sync(active_mask, sector_tag);
+    uint32_t leader = __ffs(match_mask) - 1;
+    uint32_t is_leader = (leader == get_laneid()) ? 1 : 0;
+    uint32_t final_mask = __ballot_sync(active_mask, is_leader);
+    return __popc(final_mask);
+}
 
 template <class T>
 __device__ __forceinline__ T shfl(T v, uint32_t srcline, uint32_t mask = 0xFFFFFFFF) {
diff --git a/src/compute_sanitizer.cpp b/src/compute_sanitizer.cpp
index 42adb0c..45f7aab 100644
--- a/src/compute_sanitizer.cpp
+++ b/src/compute_sanitizer.cpp
@@ -16,6 +16,8 @@
 #include <cstring>
 #include <cassert>
 #include <unordered_map>
+#include <unordered_set>
+#include <sstream>
 
 #define SANITIZER_VERBOSE 1
 
@@ -58,6 +60,55 @@ static std::unordered_map<CUmodule, bool> sanitizer_active_modules;
 // <context, device> for multi-GPU support
 static std::unordered_map<CUcontext, CUdevice> sanitizer_ctx_to_device;
 
+static std::unordered_set<std::string> sanitizer_kernel_white_list;
+
+// read whitelist file, parse kernel keywords line by line
+void LoadKernelWhiteList(const char* whitelist_path) {
+    if (!whitelist_path) return;
+
+    std::ifstream fin(whitelist_path);
+    if (!fin.is_open()) {
+        std::cerr << "[SANITIZER WARN] Failed to open whitelist file: "
+                  << whitelist_path << std::endl;
+        return;
+    }
+
+    auto trim = [](std::string& s) {
+        const char* ws = " \t\r\n";
+        auto start = s.find_first_not_of(ws);
+        if (start == std::string::npos) {
+            s.clear();
+            return;
+        }
+        auto end = s.find_last_not_of(ws);
+        s = s.substr(start, end - start + 1);
+    };
+
+    std::string line;
+    while (std::getline(fin, line)) {
+        trim(line);
+        if (line.empty() || line[0] == '#') continue;  // 跳过空行与注释
+        sanitizer_kernel_white_list.insert(line);
+    }
+
+    PRINT("[SANITIZER INFO] Loaded %zu kernel whitelist entries from %s\n",
+          sanitizer_kernel_white_list.size(), whitelist_path);
+    if (!sanitizer_kernel_white_list.empty()) {
+        PRINT("[SANITIZER INFO] Kernel whitelist: %s\n", sanitizer_kernel_white_list.begin()->c_str());
+    }
+}
+
+bool SanitizerKernelWhiteListCheck(const std::string& functionName){
+    if (sanitizer_kernel_white_list.empty()) {
+        return true;
+    }
+    for (const auto& kernel : sanitizer_kernel_white_list) {
+        if (functionName.find(kernel) != std::string::npos) {
+            return true;
+        }
+    }
+    return false;
+}
 
 void SanitizerTensorMallocCallback(uint64_t ptr, int64_t size, int64_t allocated, int64_t reserved, int device_id) {
     if (!sanitizer_options.sanitizer_callback_enabled) {
@@ -194,6 +245,45 @@ void ModuleLoadedCallback(CUmodule module)
                 SANITIZER_INSTRUCTION_GLOBAL_MEMORY_ACCESS, module, "MemoryGlobalAccessCallback"));
         SANITIZER_SAFECALL(
             sanitizerPatchInstructions(SANITIZER_INSTRUCTION_BLOCK_EXIT, module, "BlockExitCallback"));
+    } else if (sanitizer_options.patch_name == GPU_PATCH_HEATMAP_ANALYSIS) {
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_GLOBAL_MEMORY_ACCESS, module, "MemoryGlobalAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_SHARED_MEMORY_ACCESS, module, "MemorySharedAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_LOCAL_MEMORY_ACCESS, module, "MemoryLocalAccessCallback"));
+        // SANITIZER_SAFECALL(
+        //     sanitizerPatchInstructions(
+        //         SANITIZER_INSTRUCTION_MEMCPY_ASYNC, module, "MemcpyAsyncCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(SANITIZER_INSTRUCTION_BLOCK_EXIT, module, "BlockExitCallback"));
+    } else if (sanitizer_options.patch_name == GPU_PATCH_BLOCK_DIVERGENCE_ANALYSIS) {
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_GLOBAL_MEMORY_ACCESS, module, "MemoryGlobalAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_SHARED_MEMORY_ACCESS, module, "MemorySharedAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_LOCAL_MEMORY_ACCESS, module, "MemoryLocalAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(SANITIZER_INSTRUCTION_BLOCK_EXIT, module, "BlockExitCallback"));
+    } else if (sanitizer_options.patch_name == GPU_PATCH_PC_DEPENDENCY_ANALYSIS) {
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_GLOBAL_MEMORY_ACCESS, module, "MemoryGlobalAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_SHARED_MEMORY_ACCESS, module, "MemorySharedAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(
+                SANITIZER_INSTRUCTION_LOCAL_MEMORY_ACCESS, module, "MemoryLocalAccessCallback"));
+        SANITIZER_SAFECALL(
+            sanitizerPatchInstructions(SANITIZER_INSTRUCTION_BLOCK_EXIT, module, "BlockExitCallback"));
     }
     
     SANITIZER_SAFECALL(sanitizerPatchModule(module));
@@ -313,6 +403,63 @@ void buffer_init(CUcontext context) {
             SANITIZER_SAFECALL(
                 sanitizerAllocHost(context, (void**)&global_doorbell, sizeof(DoorBell)));
         }
+    } else if (sanitizer_options.patch_name == GPU_PATCH_HEATMAP_ANALYSIS) {
+        if (!device_access_buffer) {
+            SANITIZER_SAFECALL(
+                sanitizerAlloc(
+                    context,
+                    (void**)&device_access_buffer,
+                    sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE));
+        }
+        if (!host_access_buffer) {
+            SANITIZER_SAFECALL(
+                sanitizerAllocHost(
+                    context,
+                    (void**)&host_access_buffer,
+                    sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE));
+        }
+        if (!global_doorbell) {
+            SANITIZER_SAFECALL(
+                sanitizerAllocHost(context, (void**)&global_doorbell, sizeof(DoorBell)));
+        }
+    } else if (sanitizer_options.patch_name == GPU_PATCH_BLOCK_DIVERGENCE_ANALYSIS) {
+        if (!device_access_buffer) {
+            SANITIZER_SAFECALL(
+                sanitizerAlloc(
+                    context,
+                    (void**)&device_access_buffer,
+                    sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE));
+        }
+        if (!host_access_buffer) {
+            SANITIZER_SAFECALL(
+                sanitizerAllocHost(
+                    context,
+                    (void**)&host_access_buffer,
+                    sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE));
+        }
+        if (!global_doorbell) {
+            SANITIZER_SAFECALL(
+                sanitizerAllocHost(context, (void**)&global_doorbell, sizeof(DoorBell)));
+        }
+    } else if (sanitizer_options.patch_name == GPU_PATCH_PC_DEPENDENCY_ANALYSIS) {
+        if (!device_access_buffer) {
+            SANITIZER_SAFECALL(
+                sanitizerAlloc(
+                    context,
+                    (void**)&device_access_buffer,
+                    sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE));
+        }
+        if (!host_access_buffer) {
+            SANITIZER_SAFECALL(
+                sanitizerAllocHost(
+                    context,
+                    (void**)&host_access_buffer,
+                    sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE));
+        }
+        if (!global_doorbell) {
+            SANITIZER_SAFECALL(
+                sanitizerAllocHost(context, (void**)&global_doorbell, sizeof(DoorBell)));
+        }
     }
 }
 
@@ -321,6 +468,7 @@ void LaunchBeginCallback(
     CUcontext context,
     CUmodule module,
     CUfunction function,
+    uint64_t pc,
     std::string functionName,
     Sanitizer_StreamHandle hstream,
     dim3 blockDims,
@@ -328,8 +476,11 @@ void LaunchBeginCallback(
 {
     if (sanitizer_options.patch_name != GPU_NO_PATCH) {
         // sampling
+        bool launch_monitoring = true;
         sanitizer_options.grid_launch_id++;
-        if (sanitizer_options.grid_launch_id % sanitizer_options.sample_rate == 0) {
+        if ((sanitizer_options.grid_launch_id % sanitizer_options.sample_rate == 0) &&
+            SanitizerKernelWhiteListCheck(functionName)) {
+            
             PRINT("[SANITIZER INFO] Monitoring kernel %s, launch id %lu\n",
                     functionName.c_str(), sanitizer_options.grid_launch_id);
             auto it = sanitizer_active_modules.find(module);
@@ -340,12 +491,15 @@ void LaunchBeginCallback(
         } else {
             PRINT("[SANITIZER INFO] Skipping kernel %s monitoring, launch id %lu\n",
                     functionName.c_str(), sanitizer_options.grid_launch_id);
+            launch_monitoring = false;
             auto it = sanitizer_active_modules.find(module);
-            if (it->second) {
-                SANITIZER_SAFECALL(sanitizerUnpatchModule(module));
-                it->second = false;
+            if (it != sanitizer_active_modules.end()) {
+                if (it->second) {
+                    // SANITIZER_SAFECALL(sanitizerUnpatchModule(module));
+                    it->second = false;
+                }
+                return;
             }
-            return;
         }
 
         buffer_init(context);
@@ -439,6 +593,73 @@ void LaunchBeginCallback(
             host_tracker_handle->numEntries = 0;
             host_tracker_handle->access_buffer = device_access_buffer;
 
+            uint32_t num_threads =
+                        blockDims.x * blockDims.y * blockDims.z * gridDims.x * gridDims.y * gridDims.z;
+            global_doorbell->num_threads = num_threads;
+            global_doorbell->full = 0;
+            host_tracker_handle->doorBell = global_doorbell;
+        } else if (sanitizer_options.patch_name == GPU_PATCH_HEATMAP_ANALYSIS) {
+            SANITIZER_SAFECALL(
+                sanitizerMemset(
+                    device_access_buffer, 0, sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE, hstream));
+            host_tracker_handle->currentEntry = 0;
+            host_tracker_handle->numEntries = 0;
+            host_tracker_handle->access_buffer = device_access_buffer;
+            char* target_block_str = std::getenv("YOSEMITE_TARGET_BLOCK");
+            if (target_block_str) {
+                // target_block_str is "x,y,z"
+                std::stringstream ss(target_block_str);
+                std::string token;
+                int i = 0;
+                while (std::getline(ss, token, ',')) {
+                    host_tracker_handle->target_block[i] = std::stoi(token);
+                    i++;
+                }
+                if (i != 3) {
+                    PRINT("[SANITIZER ERROR] Invalid target block format: %s\n", target_block_str);
+                    exit(EXIT_FAILURE);
+                }
+                if (host_tracker_handle->target_block[0] < 0 || host_tracker_handle->target_block[1] < 0 || host_tracker_handle->target_block[2] < 0) {
+                    PRINT("[SANITIZER ERROR] Invalid target block: %d, %d, %d\n", host_tracker_handle->target_block[0], host_tracker_handle->target_block[1], host_tracker_handle->target_block[2]);
+                    exit(EXIT_FAILURE);
+                }
+                if (host_tracker_handle->target_block[0] >= gridDims.x || host_tracker_handle->target_block[1] >= gridDims.y || host_tracker_handle->target_block[2] >= gridDims.z) {
+                    PRINT("[SANITIZER ERROR] Invalid target block: %d, %d, %d\n", host_tracker_handle->target_block[0], host_tracker_handle->target_block[1], host_tracker_handle->target_block[2]);
+                    exit(EXIT_FAILURE);
+                }
+                PRINT("[SANITIZER INFO] Target block: %d, %d, %d\n", host_tracker_handle->target_block[0], host_tracker_handle->target_block[1], host_tracker_handle->target_block[2]);
+            } else {
+                host_tracker_handle->target_block[0] = 0;
+                host_tracker_handle->target_block[1] = 0;
+                host_tracker_handle->target_block[2] = 0;
+                PRINT("[SANITIZER INFO] No target block specified, using default (0, 0, 0)\n");
+            }
+            // only sample one block so the doorbell need to be updated to thread amount of one block
+            uint32_t num_threads = blockDims.x * blockDims.y * blockDims.z;
+            global_doorbell->num_threads = num_threads;
+            global_doorbell->full = 0;
+            host_tracker_handle->doorBell = global_doorbell;
+        } else if (sanitizer_options.patch_name == GPU_PATCH_BLOCK_DIVERGENCE_ANALYSIS) {
+            SANITIZER_SAFECALL(
+                sanitizerMemset(
+                    device_access_buffer, 0, sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE, hstream));
+            host_tracker_handle->currentEntry = 0;
+            host_tracker_handle->numEntries = 0;
+            host_tracker_handle->access_buffer = device_access_buffer;
+
+            uint32_t num_threads =
+                        blockDims.x * blockDims.y * blockDims.z * gridDims.x * gridDims.y * gridDims.z;
+            global_doorbell->num_threads = num_threads;
+            global_doorbell->full = 0;
+            host_tracker_handle->doorBell = global_doorbell;
+        } else if (sanitizer_options.patch_name == GPU_PATCH_PC_DEPENDENCY_ANALYSIS) {
+            SANITIZER_SAFECALL(
+                sanitizerMemset(
+                    device_access_buffer, 0, sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE, hstream));
+            host_tracker_handle->currentEntry = 0;
+            host_tracker_handle->numEntries = 0;
+            host_tracker_handle->access_buffer = device_access_buffer;
+
             uint32_t num_threads =
                         blockDims.x * blockDims.y * blockDims.z * gridDims.x * gridDims.y * gridDims.z;
             global_doorbell->num_threads = num_threads;
@@ -446,6 +667,8 @@ void LaunchBeginCallback(
             host_tracker_handle->doorBell = global_doorbell;
         }
 
+        host_tracker_handle->kernel_pc = pc; // for in-gpu offset calculation
+        host_tracker_handle->enabled_instrumenting = launch_monitoring;
         SANITIZER_SAFECALL(
             sanitizerMemcpyHostToDeviceAsync(
                 device_tracker_handle, host_tracker_handle, sizeof(MemoryAccessTracker), hstream));
@@ -466,7 +689,8 @@ void LaunchEndCallback(
     Sanitizer_StreamHandle phstream)
 {
     // sampling
-    if (sanitizer_options.grid_launch_id % sanitizer_options.sample_rate != 0) {
+    if (sanitizer_options.grid_launch_id % sanitizer_options.sample_rate != 0 ||
+        !SanitizerKernelWhiteListCheck(functionName)) {
         return;
     }
 
@@ -616,6 +840,90 @@ void LaunchEndCallback(
                 sanitizerMemcpyDeviceToHost(
                     host_access_buffer, device_access_buffer, sizeof(MemoryAccess) * numEntries, hstream));
 
+            yosemite_gpu_data_analysis(host_access_buffer, numEntries);
+        } else if (sanitizer_options.patch_name == GPU_PATCH_HEATMAP_ANALYSIS) {
+            while (true)
+            {
+                if (global_doorbell->num_threads == 0) {
+                    break;
+                }
+
+                if (global_doorbell->full) {
+                    PRINT("[SANITIZER INFO] Doorbell full with size %u. Analyzing data...\n",
+                                                                            MEMORY_ACCESS_BUFFER_SIZE);
+                    SANITIZER_SAFECALL(
+                        sanitizerMemcpyDeviceToHost(host_access_buffer, device_access_buffer,
+                                            sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE, phstream));
+                    yosemite_gpu_data_analysis(host_access_buffer, MEMORY_ACCESS_BUFFER_SIZE);
+                    global_doorbell->full = 0;
+                }
+            }
+            SANITIZER_SAFECALL(sanitizerStreamSynchronize(hstream));
+            SANITIZER_SAFECALL(
+                sanitizerMemcpyDeviceToHost(
+                    host_tracker_handle, device_tracker_handle, sizeof(MemoryAccessTracker), hstream));
+
+            auto numEntries = host_tracker_handle->numEntries;
+            SANITIZER_SAFECALL(
+                sanitizerMemcpyDeviceToHost(
+                    host_access_buffer, device_access_buffer, sizeof(MemoryAccess) * numEntries, hstream));
+
+            yosemite_gpu_data_analysis(host_access_buffer, numEntries);
+        } else if (sanitizer_options.patch_name == GPU_PATCH_BLOCK_DIVERGENCE_ANALYSIS) {
+            while (true)
+            {
+                if (global_doorbell->num_threads == 0) {
+                    break;
+                }
+
+                if (global_doorbell->full) {
+                    PRINT("[SANITIZER INFO] Doorbell full with size %u. Analyzing data...\n",
+                                                                            MEMORY_ACCESS_BUFFER_SIZE);
+                    SANITIZER_SAFECALL(
+                        sanitizerMemcpyDeviceToHost(host_access_buffer, device_access_buffer,
+                                            sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE, phstream));
+                    yosemite_gpu_data_analysis(host_access_buffer, MEMORY_ACCESS_BUFFER_SIZE);
+                    global_doorbell->full = 0;
+                }
+            }
+            SANITIZER_SAFECALL(sanitizerStreamSynchronize(hstream));
+            SANITIZER_SAFECALL(
+                sanitizerMemcpyDeviceToHost(
+                    host_tracker_handle, device_tracker_handle, sizeof(MemoryAccessTracker), hstream));
+
+            auto numEntries = host_tracker_handle->numEntries;
+            SANITIZER_SAFECALL(
+                sanitizerMemcpyDeviceToHost(
+                    host_access_buffer, device_access_buffer, sizeof(MemoryAccess) * numEntries, hstream));
+
+            yosemite_gpu_data_analysis(host_access_buffer, numEntries);
+        } else if (sanitizer_options.patch_name == GPU_PATCH_PC_DEPENDENCY_ANALYSIS) {
+            while (true)
+            {
+                if (global_doorbell->num_threads == 0) {
+                    break;
+                }
+
+                if (global_doorbell->full) {
+                    PRINT("[SANITIZER INFO] Doorbell full with size %u. Analyzing data...\n",
+                                                                            MEMORY_ACCESS_BUFFER_SIZE);
+                    SANITIZER_SAFECALL(
+                        sanitizerMemcpyDeviceToHost(host_access_buffer, device_access_buffer,
+                                            sizeof(MemoryAccess) * MEMORY_ACCESS_BUFFER_SIZE, phstream));
+                    yosemite_gpu_data_analysis(host_access_buffer, MEMORY_ACCESS_BUFFER_SIZE);
+                    global_doorbell->full = 0;
+                }
+            }
+            SANITIZER_SAFECALL(sanitizerStreamSynchronize(hstream));
+            SANITIZER_SAFECALL(
+                sanitizerMemcpyDeviceToHost(
+                    host_tracker_handle, device_tracker_handle, sizeof(MemoryAccessTracker), hstream));
+
+            auto numEntries = host_tracker_handle->numEntries;
+            SANITIZER_SAFECALL(
+                sanitizerMemcpyDeviceToHost(
+                    host_access_buffer, device_access_buffer, sizeof(MemoryAccess) * numEntries, hstream));
+
             yosemite_gpu_data_analysis(host_access_buffer, numEntries);
         }
     } else {
@@ -679,6 +987,12 @@ void ComputeSanitizerCallback(
                     auto* pContextData = (Sanitizer_ResourceContextData*)cbdata;
                     PRINT("[SANITIZER INFO] Context %p creation finished on device %p\n",
                             &pContextData->context, &pContextData->device);
+
+                    CUstream p_stream;
+                    Sanitizer_StreamHandle p_stream_handle;
+                    sanitizer_priority_stream_get(pContextData->context, &p_stream);
+                    PRINT("[SANITIZER INFO] Priority stream %p created on context %p\n",
+                            p_stream, pContextData->context);
                     break;
                 }
                 case SANITIZER_CBID_RESOURCE_CONTEXT_DESTROY_STARTING:
@@ -728,7 +1042,7 @@ void ComputeSanitizerCallback(
 
                     PRINT("[SANITIZER INFO] Malloc memory %p with size %lu (flag: %u) on device %d\n",
                             (void*)pModuleData->address, pModuleData->size, pModuleData->flags, device_id);
-
+                    PRINT("[SANITIZER INFO] Sector tag: %p, end tag: %p\n", (void*)(pModuleData->address >> 5), (void*)((pModuleData->address + pModuleData->size - 1) >> 5));
                     yosemite_alloc_callback(
                             pModuleData->address, pModuleData->size, pModuleData->flags, device_id);
                     break;
@@ -748,6 +1062,64 @@ void ComputeSanitizerCallback(
                             pModuleData->address, pModuleData->size, pModuleData->flags, device_id);
                     break;
                 }
+                case SANITIZER_CBID_RESOURCE_HOST_MEMORY_ALLOC:
+                {
+                    auto *pModuleData = (Sanitizer_ResourceMemoryData *)cbdata;
+                    if (pModuleData->flags == SANITIZER_MEMORY_FLAG_CG_RUNTIME || pModuleData->size == 0) {
+                        break;
+                    }
+                    PRINT("[SANITIZER INFO] Alloc host memory %p with size %lu (flag: %u)\n",
+                            (void*)pModuleData->address, pModuleData->size, pModuleData->flags);
+                    PRINT("[SANITIZER INFO] Sector tag: %p, end tag: %p\n", (void*)(pModuleData->address >> 5), (void*)((pModuleData->address + pModuleData->size - 1) >> 5));
+                    yosemite_alloc_callback(
+                            pModuleData->address, pModuleData->size, pModuleData->flags, 0);
+                    break;
+                }
+                case SANITIZER_CBID_RESOURCE_HOST_MEMORY_FREE:
+                {
+                    auto *pModuleData = (Sanitizer_ResourceMemoryData *)cbdata;
+                    if (pModuleData->flags == SANITIZER_MEMORY_FLAG_CG_RUNTIME || pModuleData->size == 0) {
+                        break;
+                    }
+                    
+                    PRINT("[SANITIZER INFO] Free host memory %p with size %lu (flag: %u)\n",
+                            (void*)pModuleData->address, pModuleData->size, pModuleData->flags);
+
+                    yosemite_free_callback(
+                            pModuleData->address, pModuleData->size, pModuleData->flags, 0);
+                    break;
+                }
+                case SANITIZER_CBID_RESOURCE_MEMORY_ALLOC_ASYNC:
+                {
+                    auto* pModuleData = (Sanitizer_ResourceMemoryData*)cbdata;
+                    if (pModuleData->flags == SANITIZER_MEMORY_FLAG_CG_RUNTIME || pModuleData->size == 0) {
+                        break;
+                    }
+
+                    CUdevice device_id = sanitizer_ctx_to_device[pModuleData->context];
+
+                    PRINT("[SANITIZER INFO] Alloc async memory %p with size %lu (flag: %u) on device %d\n",
+                            (void*)pModuleData->address, pModuleData->size, pModuleData->flags, device_id);
+                    yosemite_alloc_callback(
+                            pModuleData->address, pModuleData->size, pModuleData->flags, device_id);
+                    break;
+                }
+                case SANITIZER_CBID_RESOURCE_MEMORY_FREE_ASYNC:
+                {
+                    auto* pModuleData = (Sanitizer_ResourceMemoryData*)cbdata;
+                    if (pModuleData->flags == SANITIZER_MEMORY_FLAG_CG_RUNTIME || pModuleData->size == 0) {
+                        break;
+                    }
+
+                    CUdevice device_id = sanitizer_ctx_to_device[pModuleData->context];
+
+                    PRINT("[SANITIZER INFO] Free async memory %p with size %lu (flag: %u) on device %d\n",
+                            (void*)pModuleData->address, pModuleData->size, pModuleData->flags, device_id);
+
+                    yosemite_free_callback(
+                            pModuleData->address, pModuleData->size, pModuleData->flags, device_id);
+                    break;
+                }
                 default:
                     break;
             }
@@ -768,14 +1140,16 @@ void ComputeSanitizerCallback(
                     auto func_name = sanitizer_demangled_name_get(pLaunchData->functionName);
 
                     CUdevice device_id = sanitizer_ctx_to_device[pLaunchData->context];
-
-                    PRINT("[SANITIZER INFO] Launching kernel %s <<<(%u, %u, %u), (%u, %u, %u)>>> on device %d\n",
+                    uint64_t pc;
+                    uint64_t size;
+                    SANITIZER_SAFECALL(sanitizerGetFunctionPcAndSize(pLaunchData->module, pLaunchData->functionName, &pc, &size));
+                    PRINT("[SANITIZER INFO] Launching kernel %s <<<(%u, %u, %u), (%u, %u, %u)>>> on device %d, pc: 0x%lx, size: %lu\n",
                             func_name,
                             pLaunchData->gridDim_x, pLaunchData->gridDim_y, pLaunchData->gridDim_z,
                             pLaunchData->blockDim_x, pLaunchData->blockDim_y, pLaunchData->blockDim_z,
-                            device_id);
+                            device_id, pc, size);
 
-                    LaunchBeginCallback(pLaunchData->context, pLaunchData->module, pLaunchData->function,
+                    LaunchBeginCallback(pLaunchData->context, pLaunchData->module, pLaunchData->function, pc,
                                     func_name, pLaunchData->hStream, blockDims, gridDims);
                     break;
                 }
@@ -913,6 +1287,7 @@ void enable_compute_sanitizer(bool enable) {
 int InitializeInjection()
 {
     sanitizer_debug_wait();
+    LoadKernelWhiteList(std::getenv("YOSEMITE_KERNEL_WHITELIST"));
     Sanitizer_SubscriberHandle handle;
     SANITIZER_SAFECALL(sanitizerSubscribe(&handle, ComputeSanitizerCallback, nullptr));
     SANITIZER_SAFECALL(sanitizerEnableDomain(1, handle, SANITIZER_CB_DOMAIN_RESOURCE));