From 0b8ecf57a2e447feda5372ec3753b7092c8cc8c2 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 21 Mar 2025 12:26:22 -0700 Subject: [PATCH 1/4] Fix incorrect parsing of PURLs beginning with pkg:/ --- lib/package_url.rb | 5 +++-- spec/package_url_spec.rb | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/lib/package_url.rb b/lib/package_url.rb index 511cc86..7e8e419 100644 --- a/lib/package_url.rb +++ b/lib/package_url.rb @@ -152,14 +152,15 @@ def self.parse(string) end # Strip the remainder from leading and trailing '/' + # Use gsub to remove ALL leading slashes instead of just one + string = string.gsub(/^\/+/, '').delete_suffix('/') # - Split this once from left on '/' # - The left side lowercased is the type # - The right side is the remainder - string = string.delete_suffix('/') case string.partition('/') in String => type, separator, remainder unless separator.empty? components[:type] = type - + string = remainder else raise InvalidPackageURL, 'invalid or missing package type' diff --git a/spec/package_url_spec.rb b/spec/package_url_spec.rb index 2b951c9..b0e0705 100644 --- a/spec/package_url_spec.rb +++ b/spec/package_url_spec.rb @@ -187,6 +187,44 @@ it { should have_description 'pkg:rpm/fedora/curl@7.50.3-1.fc25?arch=i386&distro=fedora-25' } end + + context 'with URLs containing extra slashes after scheme' do + it 'should parse pkg:/type/namespace/name correctly' do + purl = PackageURL.parse('pkg:/maven/org.apache.commons/io') + expect(purl).to have_attributes( + type: 'maven', + namespace: 'org.apache.commons', + name: 'io', + version: nil, + qualifiers: nil, + subpath: nil + ) + end + + it 'should parse pkg://type/namespace/name correctly' do + purl = PackageURL.parse('pkg://maven/org.apache.commons/io') + expect(purl).to have_attributes( + type: 'maven', + namespace: 'org.apache.commons', + name: 'io', + version: nil, + qualifiers: nil, + subpath: nil + ) + end + + it 'should parse pkg:///type/namespace/name correctly' do + purl = PackageURL.parse('pkg:///maven/org.apache.commons/io') + expect(purl).to have_attributes( + type: 'maven', + namespace: 'org.apache.commons', + name: 'io', + version: nil, + qualifiers: nil, + subpath: nil + ) + end + end end describe 'pattern matching' do From b2740de77e1a409ea6978c7f4c97fe6844ba93a2 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 21 Mar 2025 12:31:50 -0700 Subject: [PATCH 2/4] Fix escaping of subpaths --- lib/package_url.rb | 12 ++++++++--- spec/package_url_spec.rb | 45 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/lib/package_url.rb b/lib/package_url.rb index 7e8e419..1b69ff7 100644 --- a/lib/package_url.rb +++ b/lib/package_url.rb @@ -95,7 +95,7 @@ def self.parse(string) in String => remainder, separator, String => subpath unless separator.empty? components[:subpath] = subpath.split('/').select do |segment| !segment.empty? && segment != '.' && segment != '..' - end.compact.join('/') + end.map { |segment| URI.decode_www_form_component(segment) }.compact.join('/') string = remainder else @@ -160,7 +160,7 @@ def self.parse(string) case string.partition('/') in String => type, separator, remainder unless separator.empty? components[:type] = type - + string = remainder else raise InvalidPackageURL, 'invalid or missing package type' @@ -344,7 +344,13 @@ def to_s subpath.delete_prefix('/').delete_suffix('/').split('/').each do |segment| next if segment.empty? || segment == '.' || segment == '..' - segments << URI.encode_www_form_component(segment) + # Custom encoding for URL fragment segments: + # 1. Explicitly encode % as %25 to prevent double-encoding issues + # 2. Percent-encode special characters according to URL fragment rules + # 3. This ensures proper round-trip encoding/decoding with the parse method + segments << segment.gsub(/%|[^A-Za-z0-9\-\._~]/) { |m| + m == '%' ? '%25' : "%%%02X" % m.ord + } end unless segments.empty? diff --git a/spec/package_url_spec.rb b/spec/package_url_spec.rb index b0e0705..f87d753 100644 --- a/spec/package_url_spec.rb +++ b/spec/package_url_spec.rb @@ -188,6 +188,51 @@ it { should have_description 'pkg:rpm/fedora/curl@7.50.3-1.fc25?arch=i386&distro=fedora-25' } end + context 'with escaped subpath characters', url: 'pkg:type/name#path/with/%25/percent' do + it { + should have_attributes type: 'type', + namespace: nil, + name: 'name', + version: nil, + qualifiers: nil, + subpath: 'path/with/%/percent' + } + + it 'should properly round-trip the URL' do + expect(subject.to_s).to eq('pkg:type/name#path/with/%25/percent') + end + end + + context 'with multiple escaped subpath characters', url: 'pkg:type/name#path/%20space/%3Fquery/%25percent' do + it { + should have_attributes type: 'type', + namespace: nil, + name: 'name', + version: nil, + qualifiers: nil, + subpath: 'path/ space/?query/%percent' + } + + it 'should properly round-trip the URL' do + expect(subject.to_s).to eq('pkg:type/name#path/%20space/%3Fquery/%25percent') + end + end + + context 'with the specific issue case', url: 'pkg:t/n#%25' do + it { + should have_attributes type: 't', + namespace: nil, + name: 'n', + version: nil, + qualifiers: nil, + subpath: '%' + } + + it 'should properly round-trip the URL' do + expect(subject.to_s).to eq('pkg:t/n#%25') + end + end + context 'with URLs containing extra slashes after scheme' do it 'should parse pkg:/type/namespace/name correctly' do purl = PackageURL.parse('pkg:/maven/org.apache.commons/io') From 1326c833a521c717e552ad8ae1595958868c72de Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 21 Mar 2025 12:34:23 -0700 Subject: [PATCH 3/4] Relax Rubocop configuration --- .rubocop.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.rubocop.yml b/.rubocop.yml index 8738d9f..4d7e004 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -19,6 +19,6 @@ Metrics/MethodLength: Metrics/ParameterLists: Max: 7 Metrics/PerceivedComplexity: - Max: 20 + Max: 30 Style/ConditionalAssignment: Enabled: false From 894a971f5efb904ba483fa7026cdcfdf8cd1c08d Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 21 Mar 2025 12:38:44 -0700 Subject: [PATCH 4/4] Fix linting errors --- lib/package_url.rb | 19 ++++++++++++------- spec/package_url_spec.rb | 4 ++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/package_url.rb b/lib/package_url.rb index 1b69ff7..3b618dd 100644 --- a/lib/package_url.rb +++ b/lib/package_url.rb @@ -93,9 +93,14 @@ def self.parse(string) # - This is the subpath case string.rpartition('#') in String => remainder, separator, String => subpath unless separator.empty? - components[:subpath] = subpath.split('/').select do |segment| - !segment.empty? && segment != '.' && segment != '..' - end.map { |segment| URI.decode_www_form_component(segment) }.compact.join('/') + subpath_components = [] + subpath.split('/').each do |segment| + next if segment.empty? || segment == '.' || segment == '..' + + subpath_components << URI.decode_www_form_component(segment) + end + + components[:subpath] = subpath_components.compact.join('/') string = remainder else @@ -153,7 +158,7 @@ def self.parse(string) # Strip the remainder from leading and trailing '/' # Use gsub to remove ALL leading slashes instead of just one - string = string.gsub(/^\/+/, '').delete_suffix('/') + string = string.gsub(%r{^/+}, '').delete_suffix('/') # - Split this once from left on '/' # - The left side lowercased is the type # - The right side is the remainder @@ -348,9 +353,9 @@ def to_s # 1. Explicitly encode % as %25 to prevent double-encoding issues # 2. Percent-encode special characters according to URL fragment rules # 3. This ensures proper round-trip encoding/decoding with the parse method - segments << segment.gsub(/%|[^A-Za-z0-9\-\._~]/) { |m| - m == '%' ? '%25' : "%%%02X" % m.ord - } + segments << segment.gsub(/%|[^A-Za-z0-9\-\._~]/) do |m| + m == '%' ? '%25' : format('%%%02X', m.ord) + end end unless segments.empty? diff --git a/spec/package_url_spec.rb b/spec/package_url_spec.rb index f87d753..81a64df 100644 --- a/spec/package_url_spec.rb +++ b/spec/package_url_spec.rb @@ -212,7 +212,7 @@ qualifiers: nil, subpath: 'path/ space/?query/%percent' } - + it 'should properly round-trip the URL' do expect(subject.to_s).to eq('pkg:type/name#path/%20space/%3Fquery/%25percent') end @@ -227,7 +227,7 @@ qualifiers: nil, subpath: '%' } - + it 'should properly round-trip the URL' do expect(subject.to_s).to eq('pkg:t/n#%25') end