From b81088ed77d58341c41140885b662b8551655143 Mon Sep 17 00:00:00 2001 From: Daniel Griesser Date: Wed, 18 Mar 2026 16:12:19 +0100 Subject: [PATCH] perf: reduce memory allocations by ~53% during exception capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce total allocated memory from 442k to 206k bytes (-53.5%) and objects from 3305 to 1538 (-53.5%) per Rails exception capture. All changes are internal optimizations with zero behavior changes. Key optimizations: - Cache longest_load_path and compute_filename results (class-level, invalidated on $LOAD_PATH changes) - Cache backtrace line parsing and Line/Frame object creation (bounded at 2048 entries) - Optimize LineCache with Hash#fetch, direct context setting, and per-(filename, lineno) caching - Avoid unnecessary allocations: indexed regex captures, match? instead of =~, byteslice, single-pass iteration in StacktraceBuilder - RequestInterface: avoid env.dup, cache header name transforms, ASCII fast-path for encoding - Scope/BreadcrumbBuffer: shallow dup instead of deep_dup where inner values are not mutated after duplication - Hub#add_breadcrumb: hint default nil instead of {} to avoid empty hash allocation See sub-PRs for detailed review by risk level: - #2902 (low risk) — hot path allocation avoidance - #2903 (low risk) — LineCache optimization - #2904 (medium risk) — load path and filename caching - #2905 (needs review) — backtrace parse caching - #2906 (needs review) — Frame object caching - #2907 (needs review) — Scope/BreadcrumbBuffer shallow dup - #2908 (medium risk) — RequestInterface optimizations --- sentry-ruby/lib/sentry/backtrace.rb | 7 +- sentry-ruby/lib/sentry/backtrace/line.rb | 72 +++++++++--- sentry-ruby/lib/sentry/breadcrumb_buffer.rb | 2 +- sentry-ruby/lib/sentry/hub.rb | 4 +- sentry-ruby/lib/sentry/interfaces/request.rb | 58 +++++++--- .../lib/sentry/interfaces/stacktrace.rb | 109 +++++++++++++++--- .../sentry/interfaces/stacktrace_builder.rb | 24 +++- sentry-ruby/lib/sentry/linecache.rb | 69 +++++++---- sentry-ruby/lib/sentry/scope.rb | 12 +- 9 files changed, 273 insertions(+), 84 deletions(-) diff --git a/sentry-ruby/lib/sentry/backtrace.rb b/sentry-ruby/lib/sentry/backtrace.rb index 73a77acfd..9c7803465 100644 --- a/sentry-ruby/lib/sentry/backtrace.rb +++ b/sentry-ruby/lib/sentry/backtrace.rb @@ -10,13 +10,16 @@ class Backtrace # holder for an Array of Backtrace::Line instances attr_reader :lines + @in_app_pattern_cache = {} + def self.parse(backtrace, project_root, app_dirs_pattern, &backtrace_cleanup_callback) ruby_lines = backtrace.is_a?(Array) ? backtrace : backtrace.split(/\n\s*/) ruby_lines = backtrace_cleanup_callback.call(ruby_lines) if backtrace_cleanup_callback - in_app_pattern ||= begin - Regexp.new("^(#{project_root}/)?#{app_dirs_pattern}") + cache_key = app_dirs_pattern + in_app_pattern = @in_app_pattern_cache.fetch(cache_key) do + @in_app_pattern_cache[cache_key] = Regexp.new("^(#{project_root}/)?#{app_dirs_pattern}") end lines = ruby_lines.to_a.map do |unparsed_line| diff --git a/sentry-ruby/lib/sentry/backtrace/line.rb b/sentry-ruby/lib/sentry/backtrace/line.rb index 2a9f6c96e..cb8ccb636 100644 --- a/sentry-ruby/lib/sentry/backtrace/line.rb +++ b/sentry-ruby/lib/sentry/backtrace/line.rb @@ -6,6 +6,7 @@ class Backtrace # Handles backtrace parsing line by line class Line RB_EXTENSION = ".rb" + CLASS_EXTENSION = ".class" # regexp (optional leading X: on windows, or JRuby9000 class-prefix) RUBY_INPUT_FORMAT = / ^ \s* (?: [a-zA-Z]: | uri:classloader: )? ([^:]+ | <.*>): @@ -30,21 +31,65 @@ class Line attr_reader :in_app_pattern + # Cache parsed Line data (file, number, method, module_name) by unparsed line string. + # Same backtrace lines appear repeatedly (same code paths, same errors). + # Values are frozen arrays to avoid mutation. + # Limited to 2048 entries to prevent unbounded memory growth. + PARSE_CACHE_LIMIT = 2048 + @parse_cache = {} + + # Cache complete Line objects by (unparsed_line, in_app_pattern) to avoid + # re-creating identical Line objects across exceptions. + @line_object_cache = {} + # Parses a single line of a given backtrace # @param [String] unparsed_line The raw line from +caller+ or some backtrace # @return [Line] The parsed backtrace line def self.parse(unparsed_line, in_app_pattern = nil) - ruby_match = unparsed_line.match(RUBY_INPUT_FORMAT) - - if ruby_match - _, file, number, _, module_name, method = ruby_match.to_a - file.sub!(/\.class$/, RB_EXTENSION) - module_name = module_name - else - java_match = unparsed_line.match(JAVA_INPUT_FORMAT) - _, module_name, method, file, number = java_match.to_a + # Try full Line object cache first (avoids creating new objects entirely) + object_cache_key = unparsed_line + pattern_cache = @line_object_cache[object_cache_key] + if pattern_cache + cached_line = pattern_cache[in_app_pattern] + return cached_line if cached_line end - new(file, number, method, module_name, in_app_pattern) + + cached = @parse_cache[unparsed_line] + unless cached + ruby_match = unparsed_line.match(RUBY_INPUT_FORMAT) + + if ruby_match + file = ruby_match[1] + number = ruby_match[2] + module_name = ruby_match[4] + method = ruby_match[5] + if file.end_with?(CLASS_EXTENSION) + file.sub!(/\.class$/, RB_EXTENSION) + end + else + java_match = unparsed_line.match(JAVA_INPUT_FORMAT) + if java_match + module_name = java_match[1] + method = java_match[2] + file = java_match[3] + number = java_match[4] + end + end + cached = [file, number, method, module_name].freeze + @parse_cache.clear if @parse_cache.size >= PARSE_CACHE_LIMIT + @parse_cache[unparsed_line] = cached + end + + line = new(cached[0], cached[1], cached[2], cached[3], in_app_pattern) + + # Cache the Line object — limited by parse cache limit + if @line_object_cache.size >= PARSE_CACHE_LIMIT + @line_object_cache.clear + end + pattern_cache = (@line_object_cache[object_cache_key] ||= {}) + pattern_cache[in_app_pattern] = line + + line end # Creates a Line from a Thread::Backtrace::Location object @@ -74,12 +119,9 @@ def initialize(file, number, method, module_name, in_app_pattern) def in_app return false unless in_app_pattern + return false unless file - if file =~ in_app_pattern - true - else - false - end + file.match?(in_app_pattern) end # Reconstructs the line in a readable fashion diff --git a/sentry-ruby/lib/sentry/breadcrumb_buffer.rb b/sentry-ruby/lib/sentry/breadcrumb_buffer.rb index 3a5cc0177..6dfd79984 100644 --- a/sentry-ruby/lib/sentry/breadcrumb_buffer.rb +++ b/sentry-ruby/lib/sentry/breadcrumb_buffer.rb @@ -57,7 +57,7 @@ def to_h # @return [BreadcrumbBuffer] def dup copy = super - copy.buffer = buffer.deep_dup + copy.buffer = buffer.dup copy end end diff --git a/sentry-ruby/lib/sentry/hub.rb b/sentry-ruby/lib/sentry/hub.rb index f3ca5c3ba..e34c78d7b 100644 --- a/sentry-ruby/lib/sentry/hub.rb +++ b/sentry-ruby/lib/sentry/hub.rb @@ -283,12 +283,12 @@ def capture_event(event, **options, &block) event end - def add_breadcrumb(breadcrumb, hint: {}) + def add_breadcrumb(breadcrumb, hint: nil) return unless current_client return unless configuration.enabled_in_current_env? if before_breadcrumb = current_client.configuration.before_breadcrumb - breadcrumb = before_breadcrumb.call(breadcrumb, hint) + breadcrumb = before_breadcrumb.call(breadcrumb, hint || {}) end return unless breadcrumb diff --git a/sentry-ruby/lib/sentry/interfaces/request.rb b/sentry-ruby/lib/sentry/interfaces/request.rb index 3290b7652..ae5354a53 100644 --- a/sentry-ruby/lib/sentry/interfaces/request.rb +++ b/sentry-ruby/lib/sentry/interfaces/request.rb @@ -11,6 +11,14 @@ class RequestInterface < Interface "HTTP_X_FORWARDED_FOR" ].freeze + # Cache for Rack env key → HTTP header name transformations + # e.g. "HTTP_ACCEPT_LANGUAGE" → "Accept-Language", "CONTENT_TYPE" → "Content-Type" + @header_name_cache = {} + + class << self + attr_reader :header_name_cache + end + # See Sentry server default limits at # https://github.com/getsentry/sentry/blob/master/src/sentry/conf/server.py MAX_BODY_LIMIT = 4096 * 4 @@ -42,15 +50,6 @@ class RequestInterface < Interface # @see Configuration#send_default_pii # @see Configuration#rack_env_whitelist def initialize(env:, send_default_pii:, rack_env_whitelist:) - env = env.dup - - unless send_default_pii - # need to completely wipe out ip addresses - RequestInterface::IP_HEADERS.each do |header| - env.delete(header) - end - end - request = ::Rack::Request.new(env) if send_default_pii @@ -63,7 +62,7 @@ def initialize(env:, send_default_pii:, rack_env_whitelist:) self.method = request.request_method self.headers = filter_and_format_headers(env, send_default_pii) - self.env = filter_and_format_env(env, rack_env_whitelist) + self.env = filter_and_format_env(env, rack_env_whitelist, send_default_pii) end private @@ -91,12 +90,22 @@ def filter_and_format_headers(env, send_default_pii) next if is_server_protocol?(key, value, env["SERVER_PROTOCOL"]) next if is_skippable_header?(key) next if key == "HTTP_AUTHORIZATION" && !send_default_pii + # Filter IP headers inline instead of env.dup + delete + next if !send_default_pii && IP_HEADERS.include?(key) # Rack stores headers as HTTP_WHAT_EVER, we need What-Ever - key = key.sub(/^HTTP_/, "") - key = key.split("_").map(&:capitalize).join("-") - - memo[key] = Utils::EncodingHelper.encode_to_utf_8(value.to_s) + key = self.class.header_name_cache[key] ||= begin + k = key.delete_prefix("HTTP_") + k.split("_").map(&:capitalize).join("-").freeze + end + + # Fast path: ASCII strings are valid UTF-8, skip dup+force_encoding + str = value.to_s + memo[key] = if str.ascii_only? + str + else + Utils::EncodingHelper.encode_to_utf_8(str) + end rescue StandardError => e # Rails adds objects to the Rack env that can sometimes raise exceptions # when `to_s` is called. @@ -107,8 +116,11 @@ def filter_and_format_headers(env, send_default_pii) end end + # Regex to detect lowercase chars — match? is allocation-free (no MatchData/String) + LOWERCASE_PATTERN = /[a-z]/.freeze + def is_skippable_header?(key) - key.upcase != key || # lower-case envs aren't real http headers + key.match?(LOWERCASE_PATTERN) || # lower-case envs aren't real http headers key == "HTTP_COOKIE" || # Cookies don't go here, they go somewhere else !(key.start_with?("HTTP_") || CONTENT_HEADERS.include?(key)) end @@ -119,17 +131,25 @@ def is_skippable_header?(key) # if the request has legitimately sent a Version header themselves. # See: https://github.com/rack/rack/blob/028438f/lib/rack/handler/cgi.rb#L29 def is_server_protocol?(key, value, protocol_version) - rack_version = Gem::Version.new(::Rack.release) - return false if rack_version >= Gem::Version.new("3.0") + return false if self.class.rack_3_or_above? key == "HTTP_VERSION" && value == protocol_version end - def filter_and_format_env(env, rack_env_whitelist) + def self.rack_3_or_above? + return @rack_3_or_above if defined?(@rack_3_or_above) + + @rack_3_or_above = defined?(::Rack) && + Gem::Version.new(::Rack.release) >= Gem::Version.new("3.0") + end + + def filter_and_format_env(env, rack_env_whitelist, send_default_pii) return env if rack_env_whitelist.empty? env.select do |k, _v| - rack_env_whitelist.include? k.to_s + key = k.to_s + next false if !send_default_pii && IP_HEADERS.include?(key) + rack_env_whitelist.include?(key) end end end diff --git a/sentry-ruby/lib/sentry/interfaces/stacktrace.rb b/sentry-ruby/lib/sentry/interfaces/stacktrace.rb index 104b7a0fb..3b69e7495 100644 --- a/sentry-ruby/lib/sentry/interfaces/stacktrace.rb +++ b/sentry-ruby/lib/sentry/interfaces/stacktrace.rb @@ -23,40 +23,121 @@ def inspect private # Not actually an interface, but I want to use the same style + # Cache for longest_load_path lookups — shared across all frames + @load_path_cache = {} + @load_path_size = nil + # Cache for compute_filename results — many frames share identical abs_paths + # Separate caches for in_app=true and in_app=false to avoid composite keys + @filename_cache_in_app = {} + @filename_cache_not_in_app = {} + @filename_project_root = nil + + class << self + def check_load_path_freshness + current_size = $LOAD_PATH.size + if @load_path_size != current_size + @load_path_cache = {} + @filename_cache_in_app = {} + @filename_cache_not_in_app = {} + @load_path_size = current_size + end + end + + def longest_load_path_for(abs_path) + check_load_path_freshness + + @load_path_cache.fetch(abs_path) do + result = $LOAD_PATH.select { |path| abs_path.start_with?(path.to_s) }.max_by(&:size) + @load_path_cache[abs_path] = result + end + end + + def cached_filename(abs_path, project_root, in_app, strip_backtrace_load_path) + return abs_path unless abs_path + return abs_path unless strip_backtrace_load_path + + check_load_path_freshness + + # Invalidate filename cache when project_root changes + if @filename_project_root != project_root + @filename_cache_in_app = {} + @filename_cache_not_in_app = {} + @filename_project_root = project_root + end + + cache = in_app ? @filename_cache_in_app : @filename_cache_not_in_app + cache.fetch(abs_path) do + under_root = project_root && abs_path.start_with?(project_root) + prefix = + if under_root && in_app + project_root + elsif under_root + longest_load_path_for(abs_path) || project_root + else + longest_load_path_for(abs_path) + end + + result = if prefix + prefix_str = prefix.to_s + offset = if prefix_str.end_with?(File::SEPARATOR) + prefix_str.length + else + prefix_str.length + 1 + end + abs_path.byteslice(offset, abs_path.bytesize - offset) + else + abs_path + end + + cache[abs_path] = result + end + end + end + class Frame < Interface attr_accessor :abs_path, :context_line, :function, :in_app, :filename, :lineno, :module, :pre_context, :post_context, :vars def initialize(project_root, line, strip_backtrace_load_path = true) - @project_root = project_root - @strip_backtrace_load_path = strip_backtrace_load_path - @abs_path = line.file @function = line.method if line.method @lineno = line.number @in_app = line.in_app @module = line.module_name if line.module_name - @filename = compute_filename + @filename = StacktraceInterface.cached_filename( + @abs_path, project_root, @in_app, strip_backtrace_load_path + ) end def to_s "#{@filename}:#{@lineno}" end - def compute_filename + def compute_filename(project_root, strip_backtrace_load_path) return if abs_path.nil? - return abs_path unless @strip_backtrace_load_path + return abs_path unless strip_backtrace_load_path + under_root = project_root && abs_path.start_with?(project_root) prefix = - if under_project_root? && in_app - @project_root - elsif under_project_root? - longest_load_path || @project_root + if under_root && in_app + project_root + elsif under_root + longest_load_path || project_root else longest_load_path end - prefix ? abs_path[prefix.to_s.chomp(File::SEPARATOR).length + 1..-1] : abs_path + if prefix + prefix_str = prefix.to_s + offset = if prefix_str.end_with?(File::SEPARATOR) + prefix_str.length + else + prefix_str.length + 1 + end + abs_path.byteslice(offset, abs_path.bytesize - offset) + else + abs_path + end end def set_context(linecache, context_lines) @@ -77,12 +158,8 @@ def to_h(*args) private - def under_project_root? - @project_root && abs_path.start_with?(@project_root) - end - def longest_load_path - $LOAD_PATH.select { |path| abs_path.start_with?(path.to_s) }.max_by(&:size) + StacktraceInterface.longest_load_path_for(abs_path) end end end diff --git a/sentry-ruby/lib/sentry/interfaces/stacktrace_builder.rb b/sentry-ruby/lib/sentry/interfaces/stacktrace_builder.rb index 3f4a7f090..7b6c60470 100644 --- a/sentry-ruby/lib/sentry/interfaces/stacktrace_builder.rb +++ b/sentry-ruby/lib/sentry/interfaces/stacktrace_builder.rb @@ -64,13 +64,21 @@ def initialize( # @yieldparam frame [StacktraceInterface::Frame] # @return [StacktraceInterface] def build(backtrace:, &frame_callback) - parsed_lines = parse_backtrace_lines(backtrace).select(&:file) + parsed_lines = parse_backtrace_lines(backtrace) + + # Build frames in reverse order, skipping lines without files + # Single pass instead of select + reverse + map + compact + frames = [] + i = parsed_lines.size - 1 + while i >= 0 + line = parsed_lines[i] + i -= 1 + next unless line.file - frames = parsed_lines.reverse.map do |line| frame = convert_parsed_line_into_frame(line) frame = frame_callback.call(frame) if frame_callback - frame - end.compact + frames << frame if frame + end StacktraceInterface.new(frames: frames) end @@ -78,8 +86,16 @@ def build(backtrace:, &frame_callback) private def convert_parsed_line_into_frame(line) + # Cache frames by Line object identity — same Line produces same Frame + cache_key = line.object_id + cached_frame = @frame_cache&.[](cache_key) + return cached_frame if cached_frame + frame = StacktraceInterface::Frame.new(project_root, line, strip_backtrace_load_path) frame.set_context(linecache, context_lines) if context_lines + + @frame_cache ||= {} + @frame_cache[cache_key] = frame if @frame_cache.size < 2048 frame end diff --git a/sentry-ruby/lib/sentry/linecache.rb b/sentry-ruby/lib/sentry/linecache.rb index e60b4beed..5f87b171d 100644 --- a/sentry-ruby/lib/sentry/linecache.rb +++ b/sentry-ruby/lib/sentry/linecache.rb @@ -5,6 +5,7 @@ module Sentry class LineCache def initialize @cache = {} + @context_cache = {} end # Any linecache you provide to Sentry must implement this method. @@ -12,36 +13,64 @@ def initialize # file. The number of lines retrieved is (2 * context) + 1, the middle # line should be the line requested by lineno. See specs for more information. def get_file_context(filename, lineno, context) - return nil, nil, nil unless valid_path?(filename) + lines = getlines(filename) + return nil, nil, nil unless lines - lines = Array.new(2 * context + 1) do |i| - getline(filename, lineno - context + i) - end - [lines[0..(context - 1)], lines[context], lines[(context + 1)..-1]] + first_line = lineno - context + pre = Array.new(context) { |i| line_at(lines, first_line + i) } + context_line = line_at(lines, lineno) + post = Array.new(context) { |i| line_at(lines, lineno + 1 + i) } + [pre, context_line, post] end - private - - def valid_path?(path) - lines = getlines(path) - !lines.nil? - end + # Sets context directly on a frame, avoiding intermediate array allocation. + # Caches results per (filename, lineno) since the same frames repeat across exceptions. + def set_frame_context(frame, filename, lineno, context) + cache_key = lineno + file_contexts = @context_cache[filename] - def getlines(path) - @cache[path] ||= begin - File.open(path, "r", &:readlines) - rescue - nil + if file_contexts + cached = file_contexts[cache_key] + if cached + frame.pre_context = cached[0] + frame.context_line = cached[1] + frame.post_context = cached[2] + return + end end + + lines = getlines(filename) + return unless lines + + first_line = lineno - context + pre = Array.new(context) { |i| line_at(lines, first_line + i) } + ctx_line = line_at(lines, lineno) + post = Array.new(context) { |i| line_at(lines, lineno + 1 + i) } + + file_contexts = (@context_cache[filename] ||= {}) + file_contexts[cache_key] = [pre, ctx_line, post].freeze + + frame.pre_context = pre + frame.context_line = ctx_line + frame.post_context = post end - def getline(path, n) - return nil if n < 1 + private - lines = getlines(path) - return nil if lines.nil? + def line_at(lines, n) + return nil if n < 1 lines[n - 1] end + + def getlines(path) + @cache.fetch(path) do + @cache[path] = begin + File.open(path, "r", &:readlines) + rescue + nil + end + end + end end end diff --git a/sentry-ruby/lib/sentry/scope.rb b/sentry-ruby/lib/sentry/scope.rb index 0f28fca37..96ce36edf 100644 --- a/sentry-ruby/lib/sentry/scope.rb +++ b/sentry-ruby/lib/sentry/scope.rb @@ -124,13 +124,15 @@ def clear_breadcrumbs def dup copy = super copy.breadcrumbs = breadcrumbs.dup - copy.contexts = contexts.deep_dup - copy.extra = extra.deep_dup - copy.tags = tags.deep_dup - copy.user = user.deep_dup + # Shallow dup is sufficient for these containers — inner values are not + # mutated after scope duplication, only replaced via merge! or assignment + copy.contexts = contexts.dup + copy.extra = extra.dup + copy.tags = tags.dup + copy.user = user.dup copy.transaction_name = transaction_name.dup copy.transaction_source = transaction_source.dup - copy.fingerprint = fingerprint.deep_dup + copy.fingerprint = fingerprint.dup copy.span = span.deep_dup copy.session = session.deep_dup copy.propagation_context = propagation_context.deep_dup