diff --git a/Cargo.toml b/Cargo.toml
index b3d0aa7..ecb5d42 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,19 +20,21 @@ name = "stringy"
 path = "src/main.rs"
 
 [dependencies]
-clap        = { version = "4.5.54", features = [ "derive" ] }
-entropy     = "0.4.2"
-goblin      = "0.10.4"
-lazy_static = "1.5"
-pelite      = "0.10.0"
-regex       = "1.12.2"
-serde       = { version = "1.0.228", features = [ "derive" ] }
-serde_json  = "1.0.148"
-thiserror   = "2.0.17"
+clap           = { version = "4.5.54", features = [ "derive" ] }
+cpp_demangle   = "0.5.1"
+entropy        = "0.4.2"
+goblin         = "0.10.4"
+once_cell      = "1.21.3"
+pelite         = "0.10.0"
+regex          = "1.12.2"
+rustc-demangle = "0.1.27"
+serde          = { version = "1.0.228", features = [ "derive" ] }
+serde_json     = "1.0.149"
+thiserror      = "2.0.17"
 
 [dev-dependencies]
 criterion = "0.8.1"
-insta     = "1.46.0"
+insta     = "1.46.1"
 tempfile  = "3.24.0"
 
 # The profile that 'dist' will build with
diff --git a/docs/src/classification.md b/docs/src/classification.md
index 2a5947a..170c216 100644
--- a/docs/src/classification.md
+++ b/docs/src/classification.md
@@ -76,51 +76,54 @@ Raw String -> Pattern Matching -> Tag Assignment
 
 #### GUIDs/UUIDs
 
-- **Pattern**: `\{[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\}`
-- **Examples**: `{12345678-1234-1234-1234-123456789abc}`
-- **Validation**: Format compliance, version checking
+- **Pattern**: `\{?[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\}?`
+- **Examples**: `{12345678-1234-1234-1234-123456789abc}`, `12345678-1234-1234-1234-123456789abc`
+- **Validation**: Format compliance
 - **Security relevance**: Medium - component identification
 
 #### Email Addresses
 
 - **Pattern**: `[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`
 - **Examples**: `admin@malware.com`, `support@legitimate.org`
-- **Validation**: RFC compliance, domain validation
+- **Validation**: Basic format validation
 - **Security relevance**: Medium - contact information
 
 ### Code Artifacts
 
 #### Format Strings
 
-- **Pattern**: `%[sdxo]|%\d+[sdxo]|\{\d+\}`
-- **Examples**: `Error: %s at line %d`, `User {0} logged in`
-- **Context**: Proximity to other format strings
+- **Pattern**: `%[-+0 #]*(\d+|\*)?(\.(\d+|\*))?(hh?|ll?|[Lzjt])?[diouxXeEfFgGaAcspn%]`
+- **Examples**: `Error: %s at line %d`, `Name: %s, Age: %d, Score: %.2f`
+- **Context**: Presence of real format specifiers (%% alone is ignored)
 - **Security relevance**: Low-Medium - debugging information
 
 #### Base64 Data
 
-- **Pattern**: `[A-Za-z0-9+/]{20,}={0,2}`
+- **Pattern**: Character set validation with padding rules
 - **Examples**: `SGVsbG8gV29ybGQ=`
-- **Validation**: Length divisibility, padding correctness
+- **Validation**: Length >= 16, Base64 character set, valid padding, reject length mod 4 of 1
 - **Security relevance**: Variable - encoded payloads
 
-### User Agents
+#### User Agents
 
-- **Pattern**: `Mozilla/[0-9.]+|Chrome/[0-9.]+|Safari/[0-9.]+`
-- **Examples**: `Mozilla/5.0 (Windows NT 10.0; Win64; x64)`
+- **Pattern**: Prefix match for common agents (Mozilla, curl, Wget, python-requests, libwww-perl, Java, Apache-HttpClient, okhttp, PostmanRuntime)
+- **Examples**: `Mozilla/5.0 (Windows NT 10.0; Win64; x64)`, `curl/7.68.0`
 - **Security relevance**: Medium - network fingerprinting
 
-### Pattern Matching Engine
+## Tag Specificity
 
-The semantic classifier uses cached regex patterns via `lazy_static!` and applies validation checks to reduce false positives.
+Tags are treated as either specific or broad. Specific tags indicate high confidence matches (for example URL, domain, IP, file path, GUID, email, format string, and user agent). Base64 is a broad tag and should be treated as ambiguous due to higher false positive risk.
+
+## Pattern Matching Engine
+
+The semantic classifier uses cached regex patterns via `once_cell::sync::Lazy` and applies validation checks to reduce false positives.
 
 ```rust
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use regex::Regex;
 
-lazy_static! {
-    static ref URL_REGEX: Regex = Regex::new(r#"https?://[^\s<>"{}|\\^\[\]\`]+"#).unwrap();
-}
+static URL_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"https?://[^\s<>"{}|\\^\[\]\`]+"#).unwrap());
 
 impl SemanticClassifier {
     pub fn classify(&self, string: &FoundString) -> Vec<Tag> {
@@ -152,22 +155,6 @@ impl SemanticClassifier {
 }
 ```
 
-## Implementation Details
-
-The classifier relies on `lazy_static!` to compile regex patterns once and reuse them across classification calls. Helper methods validate strings before assigning tags.
-
-### Method Signatures
-
-Key method signatures:
-
-```text
-pub fn classify(&self, string: &FoundString) -> Vec<Tag>;
-pub fn classify_posix_path(&self, text: &str) -> Option<Tag>;
-pub fn classify_windows_path(&self, text: &str) -> Option<Tag>;
-pub fn classify_unc_path(&self, text: &str) -> Option<Tag>;
-pub fn classify_registry_path(&self, text: &str) -> Option<Tag>;
-```
-
 ## Using the Classification System
 
 ```text
@@ -198,85 +185,7 @@ if tags.contains(&Tag::FilePath) {
 
 The current implementation returns tags without explicit confidence scores. Confidence is implicit in the validation and matching logic. A future update may introduce explicit confidence values per tag.
 
-## Planned Enhancements (implementation pending)
+## Planned Enhancements
 
 - Context-aware classification
-- Symbol classification
-- Additional semantic patterns (GUIDs, email addresses, base64, format strings) - documented above, implementation pending
-
-### Language-Specific Patterns
-
-Different programming languages have distinct string patterns:
-
-```rust
-pub enum LanguageHint {
-    Rust,
-    Go,
-    DotNet,
-    Native,
-}
-
-impl SemanticClassifier {
-    fn classify_with_language_hint(&self, text: &str, hint: LanguageHint) -> Vec<Tag> {
-        match hint {
-            LanguageHint::Rust => self.classify_rust_patterns(text),
-            LanguageHint::Go => self.classify_go_patterns(text),
-            LanguageHint::DotNet => self.classify_dotnet_patterns(text),
-            LanguageHint::Native => self.classify_native_patterns(text),
-        }
-    }
-}
-```
-
-### False Positive Reduction
-
-Several techniques reduce false positives:
-
-1. **Length thresholds**: Very short matches are filtered out
-2. **Context validation**: Surrounding data must make sense
-3. **Entropy checking**: High-entropy strings are likely binary data
-4. **Whitelist/blacklist**: Known good/bad patterns
-
-```text
-fn is_likely_false_positive(&self, text: &str, tag: &Tag) -> bool {
-    match tag {
-        Tag::Domain => {
-            // Too short or invalid TLD
-            text.len() < 4 || !self.has_valid_tld(text)
-        }
-        Tag::Base64 => {
-            // Too short or invalid padding
-            text.len() < 8 || !self.valid_base64_padding(text)
-        }
-        _ => false,
-    }
-}
-```
-
-## Performance Considerations
-
-### Regex Compilation Caching
-
-```rust
-lazy_static! {
-    static ref COMPILED_PATTERNS: SemanticClassifier = SemanticClassifier::new();
-}
-```
-
-### Parallel Classification
-
-```rust
-use rayon::prelude::*;
-
-fn classify_batch(strings: &[RawString]) -> Vec<ClassifiedString> {
-    strings.par_iter().map(|s| classify_single(s)).collect()
-}
-```
-
-### Memory Efficiency
-
-- Reuse regex objects across classifications
-- Use string interning for common patterns
-- Lazy evaluation for expensive validations
-
-This comprehensive classification system enables Stringy to automatically identify and categorize the most relevant strings in binary files, significantly improving analysis efficiency.
+- Language-specific refinements
diff --git a/src/classification/mod.rs b/src/classification/mod.rs
index d3dd9ad..320a3a6 100644
--- a/src/classification/mod.rs
+++ b/src/classification/mod.rs
@@ -12,14 +12,12 @@
 //! - **Domain Detection**: Identifies domain names with TLD validation
 //! - **File Path Detection**: Identifies POSIX, Windows, and UNC paths
 //! - **Registry Path Detection**: Identifies Windows registry paths
-//!
-//! ## Future Capabilities
-//!
-//! - GUIDs/UUIDs
-//! - Email addresses
-//! - Base64 data
-//! - Format strings
-//! - User agents
+//! - **GUID Detection**: Identifies GUIDs/UUIDs in standard format
+//! - **Email Detection**: Identifies email addresses
+//! - **Base64 Detection**: Identifies Base64-encoded data (broad tag)
+//! - **Format String Detection**: Identifies printf-style format strings
+//! - **User Agent Detection**: Identifies HTTP user agent strings
+//! - **Symbol Demangling**: Demangles Rust symbols to human-readable form
 //!
 //! ## Usage
 //!
@@ -49,5 +47,9 @@
 //! assert!(tags.contains(&Tag::FilePath));
 //! ```
 
+mod patterns;
 pub mod semantic;
+pub mod symbols;
+
 pub use semantic::SemanticClassifier;
+pub use symbols::SymbolDemangler;
diff --git a/src/classification/patterns/data.rs b/src/classification/patterns/data.rs
new file mode 100644
index 0000000..f650d39
--- /dev/null
+++ b/src/classification/patterns/data.rs
@@ -0,0 +1,227 @@
+//! Data format classification patterns
+//!
+//! This module provides GUID, email, Base64, format string, and user agent detection.
+
+use crate::types::Tag;
+use once_cell::sync::Lazy;
+use regex::Regex;
+
+/// Regular expression for matching GUIDs/UUIDs
+///
+/// Pattern matches standard GUID format: {XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}
+/// Also matches without braces and in lowercase.
+pub(crate) static GUID_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"(?i)^\{?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\}?$").unwrap()
+});
+
+/// Regular expression for matching email addresses
+///
+/// Pattern matches basic email format: user@domain.tld
+///
+/// This intentionally simplified pattern is tuned for binary and string
+/// extraction. It will match short forms like "a@b.cc" and common unquoted
+/// local-parts, but it does not support quoted local-parts, some valid edge
+/// cases (for example, certain plus or escape forms and full RFC 5322
+/// syntax), or internationalized domain names. The tradeoff is fewer false
+/// positives at the cost of not being fully RFC-compliant.
+pub(crate) static EMAIL_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap());
+
+/// Regular expression for matching printf-style format strings
+///
+/// Pattern detects format specifiers like %s, %d, %x, %f, etc.
+pub(crate) static FORMAT_STRING_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"%[-+0 #]*(\d+|\*)?(\.(\d+|\*))?(hh?|ll?|[Lzjt])?[diouxXeEfFgGaAcspn%]").unwrap()
+});
+
+/// Regular expression for matching common user agent patterns
+///
+/// Pattern matches common browser/bot user agent strings.
+pub(crate) static USER_AGENT_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"(?i)^Mozilla/\d|^curl/|^Wget/|^python-requests|^libwww-perl|^Java/|^Apache-HttpClient|^okhttp/|^PostmanRuntime/")
+        .unwrap()
+});
+
+/// Classifies a GUID/UUID
+///
+/// # Arguments
+/// * `text` - The text to check for GUID format
+///
+/// # Returns
+/// Returns `Some(Tag::Guid)` if valid, `None` otherwise.
+pub fn classify_guid(text: &str) -> Option<Tag> {
+    if GUID_REGEX.is_match(text) {
+        Some(Tag::Guid)
+    } else {
+        None
+    }
+}
+
+/// Classifies an email address
+///
+/// # Arguments
+/// * `text` - The text to check for email format
+///
+/// # Returns
+/// Returns `Some(Tag::Email)` if valid, `None` otherwise.
+pub fn classify_email(text: &str) -> Option<Tag> {
+    if EMAIL_REGEX.is_match(text) {
+        Some(Tag::Email)
+    } else {
+        None
+    }
+}
+
+/// Classifies Base64-encoded data
+///
+/// This is a broad/ambiguous tag with potential false positives.
+/// Returns `Some(Tag::Base64)` if the text appears to be Base64 encoded.
+///
+/// Detection criteria:
+/// - Minimum length of 16 characters
+/// - Only valid Base64 characters (A-Z, a-z, 0-9, +, /, =)
+/// - Proper padding (if present)
+/// - Length is a multiple of 4 or has valid padding
+/// - For unpadded strings: must have both uppercase and lowercase letters
+pub fn classify_base64(text: &str) -> Option<Tag> {
+    // Minimum length to reduce false positives
+    if text.len() < 16 {
+        return None;
+    }
+
+    // Check if it's valid Base64 characters only
+    let is_base64_chars = text
+        .chars()
+        .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/' || c == '=');
+
+    if !is_base64_chars {
+        return None;
+    }
+
+    // Count padding characters
+    let padding_count = text.chars().rev().take_while(|&c| c == '=').count();
+
+    // Padding should be at most 2 characters
+    if padding_count > 2 {
+        return None;
+    }
+
+    // Strip padding for length check
+    let content_len = text.len() - padding_count;
+
+    // Valid Base64 content length should avoid mod 4 remainder of 1
+    let remainder = (content_len + padding_count) % 4;
+    if remainder == 1 {
+        return None;
+    }
+
+    // Check for character diversity typical of Base64
+    let has_upper = text.chars().any(|c| c.is_ascii_uppercase());
+    let has_lower = text.chars().any(|c| c.is_ascii_lowercase());
+    let has_digit = text.chars().any(|c| c.is_ascii_digit());
+
+    // For strings with padding, the padding itself is strong evidence
+    // For strings without padding, require both upper and lowercase
+    // to avoid false positives on random alphanumeric strings
+    if padding_count == 0 {
+        // Require both upper and lower case for unpadded strings
+        if !has_upper || !has_lower {
+            return None;
+        }
+    } else {
+        // For padded strings, still require some diversity
+        let has_diversity = has_digit || (has_upper && has_lower);
+        if !has_diversity {
+            return None;
+        }
+    }
+
+    Some(Tag::Base64)
+}
+
+/// Classifies a printf-style format string
+///
+/// # Arguments
+/// * `text` - The text to check for format string patterns
+///
+/// # Returns
+/// Returns `Some(Tag::FormatString)` if valid, `None` otherwise.
+pub fn classify_format_string(text: &str) -> Option<Tag> {
+    // Find all format specifier matches
+    let matches: Vec<_> = FORMAT_STRING_REGEX.find_iter(text).collect();
+
+    if matches.is_empty() {
+        return None;
+    }
+
+    // Check if any match is a real format specifier (not just %%)
+    // %% is just an escaped percent sign, not a real format specifier
+    let has_real_specifier = matches.iter().any(|m| m.as_str() != "%%");
+
+    if !has_real_specifier {
+        return None;
+    }
+
+    // Exclude strings that are just a single format specifier
+    // (those are likely false positives)
+    if text.len() <= 2 {
+        return None;
+    }
+
+    Some(Tag::FormatString)
+}
+
+/// Classifies a user agent string
+///
+/// # Arguments
+/// * `text` - The text to check for user agent patterns
+///
+/// # Returns
+/// Returns `Some(Tag::UserAgent)` if valid, `None` otherwise.
+pub fn classify_user_agent(text: &str) -> Option<Tag> {
+    if USER_AGENT_REGEX.is_match(text) {
+        Some(Tag::UserAgent)
+    } else {
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_guid_valid_and_invalid() {
+        assert!(classify_guid("{12345678-1234-1234-1234-123456789ABC}").is_some());
+        assert!(classify_guid("12345678-1234-1234-1234-123456789ABC").is_some());
+        assert!(classify_guid("not-a-guid").is_none());
+    }
+
+    #[test]
+    fn test_email_valid_and_invalid() {
+        assert!(classify_email("user@example.com").is_some());
+        assert!(classify_email("not an email").is_none());
+    }
+
+    #[test]
+    fn test_base64_valid_and_invalid() {
+        assert!(classify_base64("SGVsbG8gV29ybGQh").is_some());
+        assert!(classify_base64("This is not base64!!").is_none());
+        assert!(classify_base64("YWJj").is_none());
+    }
+
+    #[test]
+    fn test_format_string_valid_and_invalid() {
+        assert!(classify_format_string("Error: %s at line %d").is_some());
+        assert!(classify_format_string("100%% done").is_none());
+    }
+
+    #[test]
+    fn test_user_agent_valid_and_invalid() {
+        assert!(
+            classify_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+                .is_some()
+        );
+        assert!(classify_user_agent("Not a user agent").is_none());
+    }
+}
diff --git a/src/classification/patterns/ip.rs b/src/classification/patterns/ip.rs
new file mode 100644
index 0000000..bb64164
--- /dev/null
+++ b/src/classification/patterns/ip.rs
@@ -0,0 +1,195 @@
+//! IP address classification patterns
+//!
+//! This module provides IPv4 and IPv6 address detection functionality.
+
+use crate::types::Tag;
+use once_cell::sync::Lazy;
+use regex::Regex;
+use std::net::{Ipv4Addr, Ipv6Addr};
+use std::str::FromStr;
+
+/// Regular expression for matching IPv4 addresses
+///
+/// Pattern matches IPv4 addresses with proper octet validation (0-255).
+/// Matches the entire string (used after port stripping).
+pub(crate) static IPV4_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$").unwrap()
+});
+
+/// Regular expression for matching IPv6 addresses
+///
+/// This is a permissive pre-filter that only allows hex digits, colons,
+/// and dots (for IPv4-mapped suffixes). Canonical validation is still
+/// performed by std::net::Ipv6Addr::from_str.
+pub(crate) static IPV6_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^[0-9a-f:.]+$").unwrap());
+
+/// Regular expression for detecting and stripping port suffixes
+///
+/// Matches :port where port is in the valid range 0-65535.
+pub(crate) static PORT_SUFFIX_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(
+        r":(?:[0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$",
+    )
+    .unwrap()
+});
+
+/// Regular expression for handling bracketed IPv6 addresses
+///
+/// Matches [IPv6] format used in URLs like [::1]:8080.
+pub(crate) static IPV6_BRACKETS_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^\[([^\]]+)\]$").unwrap());
+
+/// Strips the port suffix from an IP address string if present
+///
+/// # Arguments
+/// * `text` - The text that may contain a port suffix
+///
+/// # Returns
+/// The text with the port suffix removed, or the original text if no port found.
+pub fn strip_port(text: &str) -> &str {
+    if let Some(mat) = PORT_SUFFIX_REGEX.find(text) {
+        &text[..mat.start()]
+    } else {
+        text
+    }
+}
+
+/// Strips brackets from an IPv6 address if present
+///
+/// # Arguments
+/// * `text` - The text that may contain bracketed IPv6
+///
+/// # Returns
+/// The IPv6 address without brackets, or the original text if no brackets found.
+pub fn strip_ipv6_brackets(text: &str) -> &str {
+    if let Some(caps) = IPV6_BRACKETS_REGEX.captures(text)
+        && let Some(inner) = caps.get(1)
+    {
+        return inner.as_str();
+    }
+    text
+}
+
+/// Checks if the given text is a valid IPv4 address
+///
+/// This method first strips any port suffix, then validates the remaining
+/// text as an IPv4 address using both regex and standard library validation.
+///
+/// # Arguments
+/// * `text` - The text to check for IPv4 format
+///
+/// # Returns
+/// Returns `true` if the text is a valid IPv4 address.
+pub fn is_ipv4_address(text: &str) -> bool {
+    // Strip port suffix if present
+    let text_without_port = strip_port(text);
+
+    // Two-stage validation: regex pre-filter first
+    if !IPV4_REGEX.is_match(text_without_port) {
+        return false;
+    }
+
+    // Check for leading zeros in octets (e.g., 192.168.01.1 should be rejected)
+    for octet_str in text_without_port.split('.') {
+        // If an octet has more than 1 digit and starts with '0', it's invalid
+        if octet_str.len() > 1 && octet_str.starts_with('0') {
+            return false;
+        }
+    }
+
+    // Validate using std::net::Ipv4Addr for correctness
+    // This is the authoritative check - regex is just a pre-filter
+    Ipv4Addr::from_str(text_without_port).is_ok()
+}
+
+/// Checks if the given text is a valid IPv6 address
+///
+/// This method handles bracketed IPv6 addresses (e.g., [::1]:8080),
+/// strips any port suffix, and validates using both regex and standard library.
+///
+/// # Arguments
+/// * `text` - The text to check for IPv6 format
+///
+/// # Returns
+/// Returns `true` if the text is a valid IPv6 address.
+pub fn is_ipv6_address(text: &str) -> bool {
+    // Handle bracketed IPv6 addresses like [::1] or [::1]:8080
+    let mut ip_text = text;
+
+    // Check for bracketed format
+    if text.starts_with('[') {
+        // Strip port from the full text first (e.g., [::1]:8080 -> [::1])
+        let without_port = strip_port(text);
+        // Now extract the IPv6 from brackets
+        ip_text = strip_ipv6_brackets(without_port);
+    }
+
+    // Permissive pre-filter to reject obvious non-IPv6 strings early
+    if !IPV6_REGEX.is_match(ip_text) {
+        return false;
+    }
+
+    // Basic structure check - must contain colon and only valid hex/colon characters
+    if !ip_text.contains(':') {
+        return false;
+    }
+
+    // Validate using std::net::Ipv6Addr for correctness
+    Ipv6Addr::from_str(ip_text).is_ok()
+}
+
+/// Classifies IP addresses (both IPv4 and IPv6) in the given text
+///
+/// # Arguments
+/// * `text` - The text to classify
+///
+/// # Returns
+/// A vector of tags (IPv4 and/or IPv6) that apply to the text.
+pub fn classify_ip_addresses(text: &str) -> Vec<Tag> {
+    let mut tags = Vec::new();
+
+    if is_ipv4_address(text) {
+        tags.push(Tag::IPv4);
+    }
+
+    if is_ipv6_address(text) {
+        tags.push(Tag::IPv6);
+    }
+
+    tags
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_ipv4_valid_and_invalid() {
+        assert!(is_ipv4_address("192.168.1.1"));
+        assert!(is_ipv4_address("192.168.1.1:8080"));
+        assert!(!is_ipv4_address("256.1.1.1"));
+        assert!(!is_ipv4_address("01.02.03.04"));
+    }
+
+    #[test]
+    fn test_ipv6_valid_and_invalid() {
+        assert!(is_ipv6_address("2001:db8::1"));
+        assert!(is_ipv6_address("[::1]:8080"));
+        assert!(!is_ipv6_address("not an ipv6"));
+    }
+
+    #[test]
+    fn test_classify_ipv4_and_ipv6() {
+        let tags = classify_ip_addresses("192.168.1.1");
+        assert_eq!(tags, vec![Tag::IPv4]);
+
+        let tags = classify_ip_addresses("2001:db8::1");
+        assert_eq!(tags, vec![Tag::IPv6]);
+    }
+
+    #[test]
+    fn test_classify_no_ip() {
+        let tags = classify_ip_addresses("not an ip address");
+        assert!(tags.is_empty());
+    }
+}
diff --git a/src/classification/patterns/mod.rs b/src/classification/patterns/mod.rs
new file mode 100644
index 0000000..2852fdf
--- /dev/null
+++ b/src/classification/patterns/mod.rs
@@ -0,0 +1,34 @@
+//! Pattern classification modules
+//!
+//! This module contains submodules for different types of pattern classification:
+//! - `ip`: IPv4 and IPv6 address detection
+//! - `network`: URL and domain detection
+//! - `paths`: File and registry path detection
+//! - `data`: GUID, email, Base64, format string, and user agent detection
+
+pub mod data;
+pub mod ip;
+pub mod network;
+pub mod paths;
+
+// Re-export classification functions
+pub use data::{
+    classify_base64, classify_email, classify_format_string, classify_guid, classify_user_agent,
+};
+pub use ip::{
+    classify_ip_addresses, is_ipv4_address, is_ipv6_address, strip_ipv6_brackets, strip_port,
+};
+pub use network::{classify_domain, classify_url, has_valid_tld};
+pub use paths::{
+    classify_posix_path, classify_registry_path, classify_unc_path, classify_windows_path,
+    is_suspicious_posix_path, is_suspicious_registry_path, is_suspicious_windows_path,
+    is_valid_posix_path, is_valid_registry_path, is_valid_windows_path,
+};
+
+// Re-export regex patterns needed by SemanticClassifier for cache testing
+pub(crate) use ip::{IPV4_REGEX, IPV6_REGEX};
+pub(crate) use network::{DOMAIN_REGEX, URL_REGEX};
+pub(crate) use paths::{
+    POSIX_PATH_REGEX, REGISTRY_ABBREV_REGEX, REGISTRY_PATH_REGEX, UNC_PATH_REGEX,
+    WINDOWS_PATH_REGEX,
+};
diff --git a/src/classification/patterns/network.rs b/src/classification/patterns/network.rs
new file mode 100644
index 0000000..1ae6cb5
--- /dev/null
+++ b/src/classification/patterns/network.rs
@@ -0,0 +1,137 @@
+//! Network indicator classification patterns
+//!
+//! This module provides URL and domain name detection functionality.
+
+use crate::types::Tag;
+use once_cell::sync::Lazy;
+use regex::Regex;
+use std::collections::HashSet;
+
+/// Regular expression for matching HTTP/HTTPS URLs
+///
+/// Pattern matches URLs starting with http:// or https:// and excludes
+/// problematic characters that could cause false positives.
+pub(crate) static URL_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"https?://[^\s<>"{}|\\\^\[\]\`]+"#).unwrap());
+
+/// Regular expression for matching domain names
+///
+/// Pattern matches domain names with proper DNS format compliance (RFC 1035).
+/// It ensures domains start and end with alphanumeric characters, allows hyphens
+/// in the middle, and requires at least a 2-character TLD.
+pub(crate) static DOMAIN_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b").unwrap()
+});
+
+/// List of common TLDs for validation
+static COMMON_TLDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    HashSet::from([
+        "com", "org", "net", "edu", "gov", "mil", "int", "io", "co", "uk", "de", "fr", "jp", "cn",
+        "ru", "br", "in", "au", "ca", "es", "it", "nl", "pl", "se", "ch", "at", "be", "dk", "fi",
+        "no", "pt", "cz", "hu", "ro", "bg", "hr", "sk", "si", "ee", "lt", "lv", "ie", "gr", "cy",
+        "mt", "lu", "info", "biz", "name", "pro", "aero", "coop", "museum", "travel", "jobs",
+        "mobi", "tel", "asia", "cat", "xxx", "app", "dev", "page", "blog", "shop", "store",
+        "online", "site", "website", "tech", "cloud", "ai", "ml", "tv", "me", "cc", "ws", "bz",
+        "nu", "tk", "ga", "cf", "gq",
+    ])
+});
+
+/// Checks if the domain has a valid TLD
+///
+/// Trailing dots are treated as invalid to avoid accepting empty TLDs.
+///
+/// # Arguments
+/// * `domain` - The domain name to validate
+///
+/// # Returns
+/// Returns `true` if the domain has a known TLD.
+pub fn has_valid_tld(domain: &str) -> bool {
+    if domain.ends_with('.') {
+        return false;
+    }
+    if let Some(dot_pos) = domain.rfind('.') {
+        let tld = &domain[dot_pos + 1..];
+        let tld_lower = tld.to_lowercase();
+        COMMON_TLDS.contains(tld_lower.as_str())
+    } else {
+        false
+    }
+}
+
+/// Detects HTTP/HTTPS URLs in the given text
+///
+/// This method identifies URLs that start with `http://` or `https://`
+/// and contain valid URL characters.
+///
+/// # Arguments
+/// * `text` - The text to search for URLs
+///
+/// # Returns
+/// Returns `Some(Tag::Url)` if a URL is found, `None` otherwise.
+pub fn classify_url(text: &str) -> Option<Tag> {
+    if URL_REGEX.is_match(text) {
+        Some(Tag::Url)
+    } else {
+        None
+    }
+}
+
+/// Detects domain names that are not URLs
+///
+/// This method identifies domain names that match the domain pattern but
+/// are not already identified as URLs. It first checks if the text is NOT
+/// a URL to prevent double-tagging, then validates against the domain
+/// pattern and TLD list.
+///
+/// # Arguments
+/// * `text` - The text to search for domain names
+///
+/// # Returns
+/// Returns `Some(Tag::Domain)` if a valid domain is found (and it's not
+/// a URL), `None` otherwise.
+pub fn classify_domain(text: &str) -> Option<Tag> {
+    // First check if it's NOT a URL to prevent double-tagging
+    if URL_REGEX.is_match(text) {
+        return None;
+    }
+
+    // Check if it matches the domain pattern
+    if DOMAIN_REGEX.is_match(text) {
+        // Validate TLD to reduce false positives
+        if has_valid_tld(text) {
+            return Some(Tag::Domain);
+        }
+    }
+
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_url_valid_and_invalid() {
+        assert!(classify_url("https://example.com").is_some());
+        assert!(classify_url("not a url").is_none());
+    }
+
+    #[test]
+    fn test_domain_valid_and_invalid() {
+        assert!(classify_domain("example.com").is_some());
+        assert!(classify_domain("https://example.com").is_none());
+        assert!(classify_domain("invalid.xyz123").is_none());
+    }
+
+    #[test]
+    fn test_url_not_double_tagged() {
+        assert!(classify_url("https://example.com").is_some());
+        assert!(classify_domain("https://example.com").is_none());
+    }
+
+    #[test]
+    fn test_tld_validation() {
+        assert!(has_valid_tld("example.com"));
+        assert!(!has_valid_tld("nodot"));
+    }
+}
diff --git a/src/classification/patterns/paths.rs b/src/classification/patterns/paths.rs
new file mode 100644
index 0000000..ca9cd4b
--- /dev/null
+++ b/src/classification/patterns/paths.rs
@@ -0,0 +1,397 @@
+//! File and registry path classification patterns
+//!
+//! This module provides POSIX, Windows, UNC, and registry path detection.
+
+use crate::types::Tag;
+use once_cell::sync::Lazy;
+use regex::Regex;
+use std::collections::HashSet;
+
+/// Regular expression for matching POSIX file paths
+pub(crate) static POSIX_PATH_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^/[^\x00\n\r]*").unwrap());
+
+/// Regular expression for matching Windows file paths
+pub(crate) static WINDOWS_PATH_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^[A-Za-z]:\\[^\x00\n\r]*").unwrap());
+
+/// Regular expression for matching UNC network paths
+pub(crate) static UNC_PATH_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^\\\\[a-zA-Z0-9.-]+\\[^\x00\n\r]*").unwrap());
+
+/// Regular expression for matching full Windows registry paths
+pub(crate) static REGISTRY_PATH_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"(?i)^HKEY_[A-Z_]+\\[^\x00\n\r]*").unwrap());
+
+/// Regular expression for matching abbreviated registry paths
+pub(crate) static REGISTRY_ABBREV_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"(?i)^HK(LM|CU|CR|U|CC)\\[^\x00\n\r]*").unwrap());
+
+/// Common suspicious POSIX path prefixes for persistence detection
+static SUSPICIOUS_POSIX_PATHS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    HashSet::from([
+        "/etc/cron.d/",
+        "/etc/init.d/",
+        "/usr/local/bin/",
+        "/tmp/",
+        "/var/tmp/",
+        "/etc/rc.d/",
+        "/etc/crontab",
+        "/etc/systemd/system/",
+        "/Library/LaunchDaemons/",
+        "/Library/LaunchAgents/",
+    ])
+});
+
+/// Common suspicious Windows path prefixes for persistence detection
+static SUSPICIOUS_WINDOWS_PATHS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    HashSet::from([
+        "C:\\Windows\\System32\\",
+        "C:\\Windows\\Temp\\",
+        "\\AppData\\Roaming\\Microsoft\\Windows\\Start Menu\\Programs\\Startup\\",
+        "C:\\ProgramData\\Microsoft\\Windows\\Start Menu\\Programs\\Startup\\",
+        "C:\\Windows\\SysWOW64\\",
+    ])
+});
+
+/// Known valid POSIX path prefixes
+static KNOWN_POSIX_PREFIXES: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    HashSet::from([
+        "/usr/", "/etc/", "/var/", "/home/", "/opt/", "/bin/", "/sbin/", "/lib/", "/dev/",
+        "/proc/", "/sys/", "/tmp/",
+    ])
+});
+
+/// Known valid Windows path prefixes
+static KNOWN_WINDOWS_PREFIXES: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    HashSet::from([
+        "C:\\Windows\\",
+        "C:\\Program Files\\",
+        "C:\\Program Files (x86)\\",
+        "C:\\Users\\",
+        "C:\\ProgramData\\",
+    ])
+});
+
+/// Valid Windows registry root keys
+static VALID_REGISTRY_ROOTS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    HashSet::from([
+        "HKEY_LOCAL_MACHINE",
+        "HKEY_CURRENT_USER",
+        "HKEY_CLASSES_ROOT",
+        "HKEY_USERS",
+        "HKEY_CURRENT_CONFIG",
+    ])
+});
+
+/// Suspicious Windows registry paths for persistence detection
+static SUSPICIOUS_REGISTRY_PATHS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    HashSet::from([
+        "\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run",
+        "\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\RunOnce",
+        "\\System\\CurrentControlSet\\Services",
+        "\\SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Winlogon",
+        "\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders",
+    ])
+});
+
+/// Checks if a path contains ASCII case-insensitive substring
+fn contains_ascii_case_insensitive(haystack: &str, needle: &str) -> bool {
+    if needle.is_empty() {
+        return true;
+    }
+
+    let haystack_bytes = haystack.as_bytes();
+    let needle_bytes = needle.as_bytes();
+
+    if needle_bytes.len() > haystack_bytes.len() {
+        return false;
+    }
+
+    for start in 0..=haystack_bytes.len() - needle_bytes.len() {
+        let mut matched = true;
+        for i in 0..needle_bytes.len() {
+            let hay = haystack_bytes[start + i].to_ascii_lowercase();
+            let nee = needle_bytes[i].to_ascii_lowercase();
+            if hay != nee {
+                matched = false;
+                break;
+            }
+        }
+        if matched {
+            return true;
+        }
+    }
+
+    false
+}
+
+fn starts_with_ascii_case_insensitive(text: &str, prefix: &str) -> bool {
+    if prefix.len() > text.len() {
+        return false;
+    }
+
+    text.as_bytes()
+        .iter()
+        .take(prefix.len())
+        .zip(prefix.as_bytes())
+        .all(|(left, right)| left.eq_ignore_ascii_case(right))
+}
+
+const AUTOSTART_POSIX_SUBPATH: &str = "/.config/autostart/";
+
+/// Checks if text contains printf-style format placeholders
+fn contains_printf_placeholder(text: &str) -> bool {
+    // Look for common printf patterns that might appear in paths
+    let patterns = [
+        "%s", "%d", "%x", "%u", "%i", "%f", "%c", "%p", "%n", "%ld", "%lu",
+    ];
+    patterns.iter().any(|pattern| text.contains(pattern))
+}
+
+/// Checks if text contains control characters
+fn contains_control_chars(text: &str) -> bool {
+    text.chars().any(|c| c.is_control() && c != '\t')
+}
+
+/// Validates a POSIX path
+pub fn is_valid_posix_path(text: &str) -> bool {
+    // Must start with / and have at least one more character
+    if !text.starts_with('/') || text.len() < 2 {
+        return false;
+    }
+
+    // Check for null bytes or control characters
+    if contains_control_chars(text) {
+        return false;
+    }
+
+    // Check for known prefixes to boost confidence
+    for prefix in KNOWN_POSIX_PREFIXES.iter() {
+        if text.starts_with(prefix) {
+            return true;
+        }
+    }
+
+    // Additional validation for paths that don't start with known prefixes
+    // Must have at least one directory separator beyond the root
+    if text.len() > 1 && text[1..].contains('/') {
+        return true;
+    }
+
+    // Single directory under root (e.g., "/bin") - needs to be at least 3 chars
+    text.len() >= 3
+}
+
+/// Validates a Windows path
+pub fn is_valid_windows_path(text: &str) -> bool {
+    // Must match the basic pattern
+    if !WINDOWS_PATH_REGEX.is_match(text) {
+        return false;
+    }
+
+    // Check for null bytes or control characters
+    if contains_control_chars(text) {
+        return false;
+    }
+
+    // Validate drive letter is A-Z
+    let first_char = text.chars().next().unwrap_or(' ');
+    if !first_char.is_ascii_alphabetic() {
+        return false;
+    }
+
+    // Check for known prefixes to boost confidence
+    for prefix in KNOWN_WINDOWS_PREFIXES.iter() {
+        if starts_with_ascii_case_insensitive(text, prefix) {
+            return true;
+        }
+    }
+
+    // Path should have at least some content after the drive letter
+    text.len() >= 4
+}
+
+/// Validates a registry path
+pub fn is_valid_registry_path(text: &str) -> bool {
+    let upper_text = text.to_uppercase();
+
+    // Check for full registry root
+    if upper_text.starts_with("HKEY_") {
+        // Extract root key
+        if let Some(slash_pos) = text.find('\\') {
+            let root = &upper_text[..slash_pos];
+            if VALID_REGISTRY_ROOTS.contains(root) {
+                return true;
+            }
+        }
+    }
+
+    // Check for abbreviated forms (case-insensitive)
+    if REGISTRY_ABBREV_REGEX.is_match(text) {
+        return true;
+    }
+
+    // Also accept paths that use forward slashes (some tools do this)
+    if upper_text.starts_with("HKEY_")
+        && text.contains('/')
+        && let Some(slash_pos) = text.find('/')
+    {
+        let root = &upper_text[..slash_pos];
+        if VALID_REGISTRY_ROOTS.contains(root) {
+            return true;
+        }
+    }
+
+    false
+}
+
+/// Classifies a POSIX path
+///
+/// # Arguments
+/// * `text` - The text to check for POSIX path format
+///
+/// # Returns
+/// Returns `Some(Tag::FilePath)` if valid, `None` otherwise.
+pub fn classify_posix_path(text: &str) -> Option<Tag> {
+    if POSIX_PATH_REGEX.is_match(text) && is_valid_posix_path(text) {
+        Some(Tag::FilePath)
+    } else {
+        None
+    }
+}
+
+/// Classifies a Windows path
+///
+/// # Arguments
+/// * `text` - The text to check for Windows path format
+///
+/// # Returns
+/// Returns `Some(Tag::FilePath)` if valid, `None` otherwise.
+pub fn classify_windows_path(text: &str) -> Option<Tag> {
+    // Skip if it looks like a printf format string
+    if contains_printf_placeholder(text) {
+        return None;
+    }
+
+    if WINDOWS_PATH_REGEX.is_match(text) && is_valid_windows_path(text) {
+        Some(Tag::FilePath)
+    } else {
+        None
+    }
+}
+
+/// Classifies a UNC network path
+///
+/// # Arguments
+/// * `text` - The text to check for UNC path format
+///
+/// # Returns
+/// Returns `Some(Tag::FilePath)` if valid, `None` otherwise.
+pub fn classify_unc_path(text: &str) -> Option<Tag> {
+    if UNC_PATH_REGEX.is_match(text) {
+        // Basic validation - must have server and share
+        let parts: Vec<&str> = text.split('\\').collect();
+        // parts[0] and parts[1] are empty (before \\), parts[2] is server, parts[3] is share
+        if parts.len() >= 4 && !parts[2].is_empty() && !parts[3].is_empty() {
+            return Some(Tag::FilePath);
+        }
+    }
+    None
+}
+
+/// Classifies a Windows registry path
+///
+/// # Arguments
+/// * `text` - The text to check for registry path format
+///
+/// # Returns
+/// Returns `Some(Tag::RegistryPath)` if valid, `None` otherwise.
+pub fn classify_registry_path(text: &str) -> Option<Tag> {
+    // is_valid_registry_path handles both backslash and forward-slash styles
+    if is_valid_registry_path(text) {
+        Some(Tag::RegistryPath)
+    } else {
+        None
+    }
+}
+
+/// Checks if a POSIX path is suspicious (persistence-related)
+pub fn is_suspicious_posix_path(text: &str) -> bool {
+    if (text.starts_with("/home/") || text.starts_with("/Users/"))
+        && text.contains(AUTOSTART_POSIX_SUBPATH)
+    {
+        return true;
+    }
+    SUSPICIOUS_POSIX_PATHS.iter().any(|p| text.starts_with(p))
+}
+
+/// Checks if a Windows path is suspicious (persistence-related)
+///
+/// Uses prefix matching since suspicious Windows paths are anchored to
+/// well-known base directories like C:\Windows\.
+pub fn is_suspicious_windows_path(text: &str) -> bool {
+    SUSPICIOUS_WINDOWS_PATHS
+        .iter()
+        .any(|p| starts_with_ascii_case_insensitive(text, p))
+}
+
+/// Checks if a registry path is suspicious (persistence-related)
+///
+/// Uses substring matching since relevant registry keys can appear anywhere
+/// within a longer path string.
+pub fn is_suspicious_registry_path(text: &str) -> bool {
+    SUSPICIOUS_REGISTRY_PATHS
+        .iter()
+        .any(|p| contains_ascii_case_insensitive(text, p))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_posix_path_valid_and_invalid() {
+        assert!(classify_posix_path("/usr/bin/bash").is_some());
+        assert!(classify_posix_path("/").is_none());
+        assert!(classify_posix_path("not/a/path").is_none());
+    }
+
+    #[test]
+    fn test_windows_path_valid_and_invalid() {
+        assert!(classify_windows_path("C:\\Windows\\System32").is_some());
+        assert!(classify_windows_path("/unix/path").is_none());
+        assert!(classify_windows_path("1:\\Invalid\\Path").is_none());
+    }
+
+    #[test]
+    fn test_unc_path_valid_and_invalid() {
+        assert!(classify_unc_path("\\\\server\\share\\file.txt").is_some());
+        assert!(classify_unc_path("\\\\server").is_none());
+    }
+
+    #[test]
+    fn test_classify_unc_path_missing_share() {
+        assert!(classify_unc_path("\\\\server\\").is_none());
+    }
+
+    #[test]
+    fn test_registry_path_valid_and_invalid() {
+        assert!(
+            classify_registry_path(
+                "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"
+            )
+            .is_some()
+        );
+        assert!(classify_registry_path("HKEY_INVALID\\Path").is_none());
+    }
+
+    #[test]
+    fn test_suspicious_paths() {
+        assert!(is_suspicious_posix_path("/etc/cron.d/malicious"));
+        assert!(is_suspicious_windows_path("C:\\Windows\\System32\\cmd.exe"));
+        assert!(is_suspicious_registry_path(
+            "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"
+        ));
+    }
+}
diff --git a/src/classification/semantic.rs b/src/classification/semantic.rs
index b83bdfd..c6df7a7 100644
--- a/src/classification/semantic.rs
+++ b/src/classification/semantic.rs
@@ -10,6 +10,11 @@
 //! - IPv4 and IPv6 addresses
 //! - POSIX and Windows file paths (including UNC paths)
 //! - Windows registry paths
+//! - GUIDs/UUIDs
+//! - Email addresses
+//! - Base64-encoded data
+//! - Printf-style format strings
+//! - User agent strings
 //!
 //! # Usage
 //!
@@ -39,184 +44,35 @@
 //! assert_eq!(tags.len(), 1);
 //! assert!(matches!(tags[0], stringy::types::Tag::Url));
 //! ```
-//!
-//! # Patterns
-//!
-//! - **URLs**: Matches HTTP and HTTPS URLs using a pattern that excludes
-//!   problematic characters that could cause false positives.
-//!
-//! - **Domains**: Matches domain names using RFC 1035 compliant patterns
-//!   with additional TLD validation against a hardcoded list of common TLDs.
 
+use super::patterns;
 use crate::types::{FoundString, Tag};
-use lazy_static::lazy_static;
+use patterns::{
+    DOMAIN_REGEX, IPV4_REGEX, IPV6_REGEX, POSIX_PATH_REGEX, REGISTRY_ABBREV_REGEX,
+    REGISTRY_PATH_REGEX, UNC_PATH_REGEX, URL_REGEX, WINDOWS_PATH_REGEX,
+};
 use regex::Regex;
-use std::net::{Ipv4Addr, Ipv6Addr};
-use std::str::FromStr;
-
-lazy_static! {
-    /// Regular expression for matching HTTP/HTTPS URLs
-    ///
-    /// Pattern matches URLs starting with http:// or https:// and excludes
-    /// problematic characters that could cause false positives.
-    static ref URL_REGEX: Regex = Regex::new(r#"https?://[^\s<>"{}|\\\^\[\]\`]+"#).unwrap();
-
-    /// Regular expression for matching domain names
-    ///
-    /// Pattern matches domain names with proper DNS format compliance (RFC 1035).
-    /// It ensures domains start and end with alphanumeric characters, allows hyphens
-    /// in the middle, and requires at least a 2-character TLD.
-    static ref DOMAIN_REGEX: Regex = Regex::new(r"\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b").unwrap();
-
-    /// Regular expression for matching IPv4 addresses
-    ///
-    /// Pattern matches IPv4 addresses with proper octet validation (0-255).
-    /// Matches the entire string (used after port stripping).
-    static ref IPV4_REGEX: Regex = Regex::new(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$").unwrap();
-
-    /// Regular expression for matching IPv6 addresses
-    ///
-    /// Pattern matches IPv6 addresses including:
-    /// - Full notation: 2001:0db8:85a3:0000:0000:8a2e:0370:7334
-    /// - Compressed notation: 2001:db8::1, ::1, fe80::1
-    /// - Mixed notation: ::ffff:192.0.2.1, 64:ff9b::192.0.2.1
-    /// This is a permissive pattern that checks for basic IPv6 structure (colons and hex digits).
-    /// Actual validation is performed by std::net::Ipv6Addr::from_str.
-    static ref IPV6_REGEX: Regex = Regex::new(r"(?i)^(?:[0-9a-f]{1,4}:){1,7}[0-9a-f]{1,4}$|^(?:[0-9a-f]{1,4}:){1,7}:$|^(?:[0-9a-f]{1,4}:){1,6}:[0-9a-f]{1,4}$|^(?:[0-9a-f]{1,4}:){1,5}(?::[0-9a-f]{1,4}){1,2}$|^(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}$|^(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}$|^(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}$|^[0-9a-f]{1,4}:(?::[0-9a-f]{1,4}){1,6}$|^:(?::[0-9a-f]{1,4}){1,7}$|^::$|^::ffff:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$").unwrap();
-
-    /// Regular expression for detecting and stripping port suffixes
-    ///
-    /// Matches :port where port is in the valid range 0-65535.
-    /// Pattern: :[0-9]{1,4} matches 0-9999, |[1-5][0-9]{4} matches 10000-59999,
-    /// |6[0-4][0-9]{3} matches 60000-64999, |65[0-4][0-9]{2} matches 65000-65499,
-    /// |655[0-2][0-9] matches 65500-65529, |6553[0-5] matches 65530-65535.
-    static ref PORT_SUFFIX_REGEX: Regex = Regex::new(r":(?:[0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$").unwrap();
-
-    /// Regular expression for handling bracketed IPv6 addresses
-    ///
-    /// Matches [IPv6] format used in URLs like [::1]:8080.
-    static ref IPV6_BRACKETS_REGEX: Regex = Regex::new(r"^\[(.+)\]").unwrap();
-
-    /// Regular expression for matching POSIX file paths
-    ///
-    /// Pattern matches absolute POSIX paths starting with / followed by any characters
-    /// except null bytes, newlines, or carriage returns.
-    static ref POSIX_PATH_REGEX: Regex = Regex::new(r"^/[^\x00\n\r]*").unwrap();
-
-    /// Regular expression for matching Windows file paths
-    ///
-    /// Pattern matches Windows absolute paths starting with drive letter (C:\)
-    /// followed by any characters except null bytes, newlines, or carriage returns.
-    static ref WINDOWS_PATH_REGEX: Regex = Regex::new(r"^[A-Za-z]:\\[^\x00\n\r]*").unwrap();
-
-    /// Regular expression for matching UNC network paths
-    ///
-    /// Pattern matches UNC paths starting with \\ followed by server name and share.
-    static ref UNC_PATH_REGEX: Regex = Regex::new(r"^\\\\[a-zA-Z0-9.-]+\\[^\x00\n\r]*").unwrap();
 
-    /// Regular expression for matching full Windows registry paths
-    ///
-    /// Pattern matches registry paths starting with HKEY_ root keys (case-insensitive).
-    static ref REGISTRY_PATH_REGEX: Regex = Regex::new(r"(?i)^HKEY_[A-Z_]+\\[^\x00\n\r]*").unwrap();
-
-    /// Regular expression for matching abbreviated registry paths
-    ///
-    /// Pattern matches abbreviated registry forms like HKLM, HKCU, etc. (case-insensitive).
-    static ref REGISTRY_ABBREV_REGEX: Regex = Regex::new(r"(?i)^HK(LM|CU|CR|U|CC)\\[^\x00\n\r]*").unwrap();
-}
-
-lazy_static! {
-    /// Common suspicious POSIX path prefixes for persistence detection
-    static ref SUSPICIOUS_POSIX_PATHS: std::collections::HashSet<&'static str> = {
-        let mut set = std::collections::HashSet::new();
-        set.insert("/etc/cron.d/");
-        set.insert("/etc/init.d/");
-        set.insert("/usr/local/bin/");
-        set.insert("/tmp/");
-        set.insert("/var/tmp/");
-        set.insert("/etc/rc.d/");
-        set.insert("/etc/crontab");
-        set.insert("/etc/systemd/system/");
-        set.insert("~/.config/autostart/");
-        set.insert("/Library/LaunchDaemons/");
-        set.insert("/Library/LaunchAgents/");
-        set
-    };
-
-    /// Common suspicious Windows path prefixes for persistence detection
-    static ref SUSPICIOUS_WINDOWS_PATHS: std::collections::HashSet<&'static str> = {
-        let mut set = std::collections::HashSet::new();
-        set.insert("C:\\Windows\\System32\\");
-        set.insert("C:\\Windows\\Temp\\");
-        set.insert("\\AppData\\Roaming\\Microsoft\\Windows\\Start Menu\\Programs\\Startup\\");
-        set.insert("C:\\ProgramData\\Microsoft\\Windows\\Start Menu\\Programs\\Startup\\");
-        set.insert("C:\\Windows\\SysWOW64\\");
-        set
-    };
-
-    /// Known valid POSIX path prefixes
-    static ref KNOWN_POSIX_PREFIXES: std::collections::HashSet<&'static str> = {
-        let mut set = std::collections::HashSet::new();
-        set.insert("/usr/");
-        set.insert("/etc/");
-        set.insert("/var/");
-        set.insert("/home/");
-        set.insert("/opt/");
-        set.insert("/bin/");
-        set.insert("/sbin/");
-        set.insert("/lib/");
-        set.insert("/dev/");
-        set.insert("/proc/");
-        set.insert("/sys/");
-        set.insert("/tmp/");
-        set
-    };
-
-    /// Known valid Windows path prefixes
-    static ref KNOWN_WINDOWS_PREFIXES: std::collections::HashSet<&'static str> = {
-        let mut set = std::collections::HashSet::new();
-        set.insert("C:\\Windows\\");
-        set.insert("C:\\Program Files\\");
-        set.insert("C:\\Program Files (x86)\\");
-        set.insert("C:\\Users\\");
-        set.insert("C:\\ProgramData\\");
-        set
-    };
-
-    /// Valid Windows registry root keys
-    static ref VALID_REGISTRY_ROOTS: std::collections::HashSet<&'static str> = {
-        let mut set = std::collections::HashSet::new();
-        set.insert("HKEY_LOCAL_MACHINE");
-        set.insert("HKEY_CURRENT_USER");
-        set.insert("HKEY_CLASSES_ROOT");
-        set.insert("HKEY_USERS");
-        set.insert("HKEY_CURRENT_CONFIG");
-        set
-    };
-
-    /// Suspicious Windows registry paths for persistence detection
-    static ref SUSPICIOUS_REGISTRY_PATHS: std::collections::HashSet<&'static str> = {
-        let mut set = std::collections::HashSet::new();
-        set.insert("\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run");
-        set.insert("\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\RunOnce");
-        set.insert("\\System\\CurrentControlSet\\Services");
-        set.insert("\\SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Winlogon");
-        set.insert("\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders");
-        set
-    };
-}
+// Re-export pattern functions for backward compatibility
+pub use patterns::{
+    classify_base64, classify_domain, classify_email, classify_format_string, classify_guid,
+    classify_ip_addresses, classify_posix_path, classify_registry_path, classify_unc_path,
+    classify_url, classify_user_agent, classify_windows_path, has_valid_tld, is_ipv4_address,
+    is_ipv6_address, is_suspicious_posix_path, is_suspicious_registry_path,
+    is_suspicious_windows_path, is_valid_posix_path, is_valid_registry_path, is_valid_windows_path,
+    strip_ipv6_brackets, strip_port,
+};
 
 /// Semantic classifier for identifying network indicators in extracted strings
 ///
-/// The `SemanticClassifier` provides methods to detect URLs and domain names
+/// The `SemanticClassifier` provides methods to detect URLs, domain names,
+/// IP addresses, file paths, registry paths, GUIDs, emails, and other patterns
 /// within text content. It uses compiled regular expressions for efficient
-/// pattern matching and includes TLD validation to reduce false positives.
-///
-/// URLs are prioritized over domains to prevent double-tagging - if a string
-/// matches both patterns, it will only be tagged as a URL.
+/// pattern matching and includes validation to reduce false positives.
 #[derive(Debug, Default)]
 pub struct SemanticClassifier;
 
+/// Internal struct for regex cache address verification (used in testing)
 #[doc(hidden)]
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct RegexCacheAddresses {
@@ -233,11 +89,14 @@ pub struct RegexCacheAddresses {
 
 impl SemanticClassifier {
     /// Create a new instance of the semantic classifier
+    #[must_use]
     pub fn new() -> Self {
         Self
     }
 
+    /// Returns memory addresses of cached regex patterns (for testing)
     #[doc(hidden)]
+    #[must_use]
     pub fn regex_cache_addresses(&self) -> RegexCacheAddresses {
         RegexCacheAddresses {
             url: &*URL_REGEX as *const Regex as usize,
@@ -255,8 +114,7 @@ impl SemanticClassifier {
     /// Detects HTTP/HTTPS URLs in the given text
     ///
     /// This method identifies URLs that start with `http://` or `https://`
-    /// and contain valid URL characters. The pattern excludes problematic
-    /// characters to avoid false positives.
+    /// and contain valid URL characters.
     ///
     /// # Arguments
     ///
@@ -265,31 +123,15 @@ impl SemanticClassifier {
     /// # Returns
     ///
     /// Returns `Some(Tag::Url)` if a URL is found, `None` otherwise.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use stringy::classification::SemanticClassifier;
-    /// use stringy::types::Tag;
-    ///
-    /// let classifier = SemanticClassifier::new();
-    /// assert_eq!(classifier.classify_url("https://example.com"), Some(Tag::Url));
-    /// assert_eq!(classifier.classify_url("example.com"), None);
-    /// ```
+    #[must_use]
     pub fn classify_url(&self, text: &str) -> Option<Tag> {
-        if URL_REGEX.is_match(text) {
-            Some(Tag::Url)
-        } else {
-            None
-        }
+        classify_url(text)
     }
 
     /// Detects domain names that are not URLs
     ///
     /// This method identifies domain names that match the domain pattern but
-    /// are not already identified as URLs. It first checks if the text is NOT
-    /// a URL to prevent double-tagging, then validates against the domain
-    /// pattern and TLD list.
+    /// are not already identified as URLs.
     ///
     /// # Arguments
     ///
@@ -297,34 +139,10 @@ impl SemanticClassifier {
     ///
     /// # Returns
     ///
-    /// Returns `Some(Tag::Domain)` if a valid domain is found (and it's not
-    /// a URL), `None` otherwise.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use stringy::classification::SemanticClassifier;
-    /// use stringy::types::Tag;
-    ///
-    /// let classifier = SemanticClassifier::new();
-    /// assert_eq!(classifier.classify_domain("example.com"), Some(Tag::Domain));
-    /// assert_eq!(classifier.classify_domain("https://example.com"), None);
-    /// ```
+    /// Returns `Some(Tag::Domain)` if a valid domain is found, `None` otherwise.
+    #[must_use]
     pub fn classify_domain(&self, text: &str) -> Option<Tag> {
-        // First check if it's NOT a URL to prevent double-tagging
-        if URL_REGEX.is_match(text) {
-            return None;
-        }
-
-        // Check if it matches the domain pattern
-        if DOMAIN_REGEX.is_match(text) {
-            // Validate TLD to reduce false positives
-            if self.has_valid_tld(text) {
-                return Some(Tag::Domain);
-            }
-        }
-
-        None
+        classify_domain(text)
     }
 
     /// Main entry point for semantic classification
@@ -332,7 +150,7 @@ impl SemanticClassifier {
     /// This method analyzes a `FoundString` and returns a vector of semantic
     /// tags that apply to the string. URLs are checked first, then domains
     /// (which automatically excludes URLs to prevent double-tagging), then
-    /// IP addresses (IPv4 and IPv6).
+    /// IP addresses (IPv4 and IPv6), file paths, and other patterns.
     ///
     /// # Arguments
     ///
@@ -340,672 +158,190 @@ impl SemanticClassifier {
     ///
     /// # Returns
     ///
-    /// Returns a vector of `Tag` values that apply to the string. The vector
-    /// may be empty if no patterns match.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use stringy::classification::SemanticClassifier;
-    /// use stringy::types::{FoundString, Encoding, StringSource, Tag};
-    ///
-    /// let classifier = SemanticClassifier::new();
-    /// let found_string = FoundString {
-    ///     text: "https://example.com".to_string(),
-    ///     original_text: None,
-    ///     encoding: Encoding::Ascii,
-    ///     offset: 0,
-    ///     rva: None,
-    ///     section: None,
-    ///     length: 19,
-    ///     tags: Vec::new(),
-    ///     score: 0,
-    ///     section_weight: None,
-    ///     semantic_boost: None,
-    ///     noise_penalty: None,
-    ///     source: StringSource::SectionData,
-    ///     confidence: 1.0,
-    /// };
-    ///
-    /// let tags = classifier.classify(&found_string);
-    /// assert_eq!(tags.len(), 1);
-    /// assert!(matches!(tags[0], Tag::Url));
-    /// ```
+    /// Returns a vector of `Tag` values that apply to the string.
+    #[must_use]
     pub fn classify(&self, string: &FoundString) -> Vec<Tag> {
         let mut tags = Vec::new();
 
         // Check for URLs first
-        if let Some(tag) = self.classify_url(&string.text) {
+        if let Some(tag) = classify_url(&string.text) {
             tags.push(tag);
         }
 
         // Check for domains (this will automatically exclude URLs)
-        if let Some(tag) = self.classify_domain(&string.text) {
+        if let Some(tag) = classify_domain(&string.text) {
             tags.push(tag);
         }
 
         // Check for IP addresses (IPv4 and IPv6)
-        let ip_tags = self.classify_ip_addresses(&string.text);
+        let ip_tags = classify_ip_addresses(&string.text);
         tags.extend(ip_tags);
 
         // Check for file paths (POSIX, Windows, UNC) - only add FilePath tag once
-        if self.classify_posix_path(&string.text).is_some()
-            || self.classify_windows_path(&string.text).is_some()
-            || self.classify_unc_path(&string.text).is_some()
+        if classify_posix_path(&string.text).is_some()
+            || classify_windows_path(&string.text).is_some()
+            || classify_unc_path(&string.text).is_some()
         {
             tags.push(Tag::FilePath);
         }
 
         // Check for registry paths
-        if let Some(tag) = self.classify_registry_path(&string.text) {
+        if let Some(tag) = classify_registry_path(&string.text) {
             tags.push(tag);
         }
 
-        tags
-    }
+        // Check for GUIDs
+        if let Some(tag) = classify_guid(&string.text) {
+            tags.push(tag);
+        }
 
-    /// Validates the top-level domain (TLD) against a hardcoded list
-    ///
-    /// This method extracts the TLD from a domain string and validates it
-    /// against a comprehensive list of common TLDs. This helps reduce false
-    /// positives by ensuring domains have valid TLDs.
-    ///
-    /// # Arguments
-    ///
-    /// * `domain` - The domain string to validate
-    ///
-    /// # Returns
-    ///
-    /// Returns `true` if the TLD is valid and at least 2 characters long,
-    /// `false` otherwise.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use stringy::classification::SemanticClassifier;
-    ///
-    /// let classifier = SemanticClassifier::new();
-    /// assert!(classifier.has_valid_tld("example.com"));
-    /// assert!(!classifier.has_valid_tld("example.x"));
-    /// ```
-    fn has_valid_tld(&self, domain: &str) -> bool {
-        // Extract TLD (last segment after final dot)
-        let tld = domain.split('.').next_back().unwrap_or("");
-
-        // TLD must be at least 2 characters
-        if tld.len() < 2 {
-            return false;
+        // Check for email addresses
+        if let Some(tag) = classify_email(&string.text) {
+            tags.push(tag);
         }
 
-        // Normalize TLD to lowercase for case-insensitive validation
-        let tld_lower = tld.to_ascii_lowercase();
-
-        // Validate against hardcoded list of common TLDs
-        let valid_tlds = [
-            // Generic TLDs
-            "com",
-            "net",
-            "org",
-            "io",
-            "co",
-            // Country code TLDs
-            "gov",
-            "edu",
-            "mil",
-            "int",
-            "uk",
-            "de",
-            "fr",
-            "jp",
-            "cn",
-            "au",
-            "ca",
-            "ru",
-            "br",
-            "in",
-            "nl",
-            "eu",
-            // New gTLDs
-            "info",
-            "biz",
-            "dev",
-            "app",
-            "cloud",
-            "tech",
-            "online",
-            "site",
-            "xyz",
-            "top",
-            "win",
-            "bid",
-            // Additional common TLDs
-            "me",
-            "tv",
-            "cc",
-            "ws",
-            "name",
-            "pro",
-            "mobi",
-            "asia",
-            "tel",
-            "travel",
-            "jobs",
-            "museum",
-            "aero",
-            "coop",
-            "cat",
-            "xxx",
-            "post",
-            "arpa",
-            "test",
-            "example",
-            "localhost",
-        ];
-
-        valid_tlds.contains(&tld_lower.as_str())
+        // Check for format strings
+        if let Some(tag) = classify_format_string(&string.text) {
+            tags.push(tag);
+        }
+
+        // Check for user agent strings
+        if let Some(tag) = classify_user_agent(&string.text) {
+            tags.push(tag);
+        }
+
+        // Check for Base64 (broad tag - checked last as it has more false positives)
+        if let Some(tag) = classify_base64(&string.text) {
+            tags.push(tag);
+        }
+
+        tags
+    }
+
+    /// Validates a TLD against the known list
+    #[must_use]
+    pub fn has_valid_tld(&self, domain: &str) -> bool {
+        has_valid_tld(domain)
     }
 
     /// Strips port suffix from an IP address string
-    ///
-    /// Removes `:port` suffix if present (e.g., `192.168.1.1:8080` → `192.168.1.1`).
-    ///
-    /// # Arguments
-    ///
-    /// * `text` - The text that may contain a port suffix
-    ///
-    /// # Returns
-    ///
-    /// Returns a string slice without the port suffix.
-    fn strip_port<'a>(&self, text: &'a str) -> &'a str {
-        PORT_SUFFIX_REGEX
-            .find(text)
-            .map_or(text, |m| &text[..m.start()])
+    #[must_use]
+    pub fn strip_port<'a>(&self, text: &'a str) -> &'a str {
+        strip_port(text)
     }
 
-    /// Strips bracketed notation from IPv6 addresses
-    ///
-    /// Removes `[` and `]` from bracketed IPv6 addresses (e.g., `[::1]` → `::1`).
-    ///
-    /// # Arguments
-    ///
-    /// * `text` - The text that may contain bracketed IPv6 notation
-    ///
-    /// # Returns
-    ///
-    /// Returns a string slice without brackets, or the original text if no brackets found.
-    fn strip_ipv6_brackets<'a>(&self, text: &'a str) -> &'a str {
-        IPV6_BRACKETS_REGEX
-            .captures(text)
-            .and_then(|caps| caps.get(1))
-            .map_or(text, |m| m.as_str())
+    /// Strips brackets from IPv6 address
+    #[must_use]
+    pub fn strip_ipv6_brackets<'a>(&self, text: &'a str) -> &'a str {
+        strip_ipv6_brackets(text)
     }
 
-    /// Detects IPv4 addresses in the given text
-    ///
-    /// This method uses a two-stage validation approach:
-    /// 1. Regex pre-filter for performance
-    /// 2. `std::net::Ipv4Addr` validation for correctness
-    ///
-    /// It also handles port suffixes (e.g., "192.168.1.1:8080").
-    ///
-    /// # Note on Version Numbers
-    ///
-    /// This method accepts ALL valid IPv4 addresses in dotted-quad notation,
-    /// even if they could also be interpreted as version numbers (e.g., "1.2.3.4").
-    /// It is the responsibility of the caller to disambiguate between IP addresses
-    /// and version numbers based on context when necessary.
-    ///
-    /// # Arguments
-    ///
-    /// * `text` - The text to search for IPv4 addresses
-    ///
-    /// # Returns
-    ///
-    /// Returns `true` if a valid IPv4 address is found, `false` otherwise.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use stringy::classification::SemanticClassifier;
-    ///
-    /// let classifier = SemanticClassifier::new();
-    /// assert!(classifier.is_ipv4_address("192.168.1.1"));
-    /// assert!(classifier.is_ipv4_address("192.168.1.1:8080"));
-    /// assert!(classifier.is_ipv4_address("1.2.3.4")); // Valid IP (could also be a version number)
-    /// assert!(!classifier.is_ipv4_address("256.1.1.1")); // Invalid octet
-    /// ```
+    /// Checks if text is a valid IPv4 address
+    #[must_use]
     pub fn is_ipv4_address(&self, text: &str) -> bool {
-        // Strip port suffix if present
-        let text_without_port = self.strip_port(text);
-
-        // Two-stage validation: regex pre-filter first
-        if !IPV4_REGEX.is_match(text_without_port) {
-            return false;
-        }
-
-        // Check for leading zeros in octets (e.g., 192.168.01.1 should be rejected)
-        for octet_str in text_without_port.split('.') {
-            // If an octet has more than 1 digit and starts with '0', it's invalid
-            if octet_str.len() > 1 && octet_str.starts_with('0') {
-                return false;
-            }
-        }
-
-        // Validate using std::net::Ipv4Addr for correctness
-        // This is the authoritative check - regex is just a pre-filter
-        Ipv4Addr::from_str(text_without_port).is_ok()
+        is_ipv4_address(text)
     }
 
-    /// Detects IPv6 addresses in the given text
-    ///
-    /// This method uses a two-stage validation approach:
-    /// 1. Basic structure check (contains colons, looks like IPv6)
-    /// 2. `std::net::Ipv6Addr` validation for correctness
-    ///
-    /// It handles bracketed notation (e.g., `[::1]`) and port suffixes.
-    ///
-    /// # Arguments
-    ///
-    /// * `text` - The text to search for IPv6 addresses
-    ///
-    /// # Returns
-    ///
-    /// Returns `true` if a valid IPv6 address is found, `false` otherwise.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use stringy::classification::SemanticClassifier;
-    ///
-    /// let classifier = SemanticClassifier::new();
-    /// assert!(classifier.is_ipv6_address("2001:db8::1"));
-    /// assert!(classifier.is_ipv6_address("::1"));
-    /// assert!(classifier.is_ipv6_address("[::1]:8080"));
-    /// assert!(!classifier.is_ipv6_address("gggg::1")); // Invalid hex
-    /// ```
+    /// Checks if text is a valid IPv6 address
+    #[must_use]
     pub fn is_ipv6_address(&self, text: &str) -> bool {
-        // Handle bracketed IPv6 addresses like [::1] or [::1]:8080
-        // Strategy: strip port first (if present), then strip brackets
-
-        // If it looks like it has a port (contains ]:), strip port first
-        let after_port = if text.contains("]:") {
-            self.strip_port(text)
-        } else {
-            text
-        };
-
-        // Now strip brackets if present
-        let processed = self.strip_ipv6_brackets(after_port);
-
-        // Two-stage validation: regex pre-filter first
-        // Basic structure check: must contain colons (IPv6 addresses always have colons)
-        if !processed.contains(':') {
-            return false;
-        }
-
-        // For mixed notation (contains both colons and dots), skip regex check
-        // as the regex doesn't handle all mixed notation patterns
-        let is_mixed_notation = processed.contains('.');
-
-        if !is_mixed_notation {
-            // Use regex as pre-filter for non-mixed notation
-            if !IPV6_REGEX.is_match(processed) {
-                return false;
-            }
-        }
+        is_ipv6_address(text)
+    }
 
-        // Validate using std::net::Ipv6Addr for canonical validation
-        // This handles all IPv6 formats: full, compressed, mixed notation
-        Ipv6Addr::from_str(processed).is_ok()
+    /// Classifies IP addresses in text
+    #[must_use]
+    pub fn classify_ip_addresses(&self, text: &str) -> Vec<Tag> {
+        classify_ip_addresses(text)
     }
 
-    /// Detects POSIX file paths in the given text
-    ///
-    /// Returns `Some(Tag::FilePath)` if a POSIX path is detected and valid.
+    /// Classifies POSIX paths
+    #[must_use]
     pub fn classify_posix_path(&self, text: &str) -> Option<Tag> {
-        if !POSIX_PATH_REGEX.is_match(text) {
-            return None;
-        }
-
-        if !self.is_valid_posix_path(text) {
-            return None;
-        }
-
-        Some(Tag::FilePath)
+        classify_posix_path(text)
     }
 
-    /// Detects Windows file paths in the given text
-    ///
-    /// Returns `Some(Tag::FilePath)` if a Windows path is detected and valid.
+    /// Classifies Windows paths
+    #[must_use]
     pub fn classify_windows_path(&self, text: &str) -> Option<Tag> {
-        if !WINDOWS_PATH_REGEX.is_match(text) {
-            return None;
-        }
-
-        if !self.is_valid_windows_path(text) {
-            return None;
-        }
-
-        Some(Tag::FilePath)
+        classify_windows_path(text)
     }
 
-    /// Detects UNC network paths in the given text
-    ///
-    /// Returns `Some(Tag::FilePath)` if a UNC path is detected and valid.
-    /// Performs robust validation including:
-    /// - Maximum overall length (4096) and component length (255)
-    /// - Control character rejection
-    /// - Forward slash and printf placeholder rejection
-    /// - Reserved name and dots-only component rejection
-    /// - Empty segment detection
+    /// Classifies UNC paths
+    #[must_use]
     pub fn classify_unc_path(&self, text: &str) -> Option<Tag> {
-        if !UNC_PATH_REGEX.is_match(text) {
-            return None;
-        }
-
-        // Maximum overall length check
-        if text.len() > 4096 {
-            return None;
-        }
-
-        // Reject control characters
-        if self.contains_control_chars(text) {
-            return None;
-        }
-
-        // Reject forward slashes anywhere in the path
-        if text.contains('/') {
-            return None;
-        }
-
-        let trimmed = text.trim_start_matches('\\').trim_end_matches('\\');
-        let parts: Vec<&str> = trimmed.split('\\').collect();
-
-        // Must have at least server and share
-        if parts.len() < 2 {
-            return None;
-        }
-
-        let server = parts[0];
-        let share = parts[1];
-
-        if server.is_empty() || share.is_empty() {
-            return None;
-        }
-
-        // Validate all segments (no empty segments from double backslashes)
-        for segment in &parts {
-            // Reject empty segments (from consecutive backslashes like \\\\server\\\\share)
-            if segment.is_empty() {
-                return None;
-            }
-
-            // Enforce max component length (255 bytes)
-            if segment.len() > 255 {
-                return None;
-            }
-
-            // Reject components consisting solely of dots (but allow dots in domain names)
-            // Only reject if the segment is exactly "." or ".."
-            if *segment == "." || *segment == ".." {
-                return None;
-            }
-        }
-
-        // Reject printf-style placeholders in server or share
-        if self.contains_printf_placeholder(server) || self.contains_printf_placeholder(share) {
-            return None;
-        }
-
-        // Reject reserved Windows device names in server or share
-        let reserved_names = [
-            "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
-            "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
-        ];
-        let server_upper = server.to_ascii_uppercase();
-        let share_upper = share.to_ascii_uppercase();
-        for reserved in &reserved_names {
-            if server_upper == *reserved || share_upper == *reserved {
-                return None;
-            }
-        }
-
-        Some(Tag::FilePath)
+        classify_unc_path(text)
     }
 
-    /// Detects Windows registry paths in the given text
-    ///
-    /// Returns `Some(Tag::RegistryPath)` if a registry path is detected and valid.
+    /// Classifies registry paths
+    #[must_use]
     pub fn classify_registry_path(&self, text: &str) -> Option<Tag> {
-        if !REGISTRY_PATH_REGEX.is_match(text) && !REGISTRY_ABBREV_REGEX.is_match(text) {
-            return None;
-        }
-
-        if !self.is_valid_registry_path(text) {
-            return None;
-        }
-
-        Some(Tag::RegistryPath)
+        classify_registry_path(text)
     }
 
-    /// Checks if the POSIX path matches known suspicious locations
+    /// Checks if POSIX path is suspicious
+    #[must_use]
     pub fn is_suspicious_posix_path(&self, text: &str) -> bool {
-        SUSPICIOUS_POSIX_PATHS
-            .iter()
-            .any(|prefix| text.starts_with(prefix))
+        is_suspicious_posix_path(text)
     }
 
-    /// Checks if the Windows path matches known suspicious locations (case-insensitive)
+    /// Checks if Windows path is suspicious
+    #[must_use]
     pub fn is_suspicious_windows_path(&self, text: &str) -> bool {
-        let lowered_text = text.to_ascii_lowercase();
-        SUSPICIOUS_WINDOWS_PATHS.iter().any(|prefix| {
-            let lowered_prefix = prefix.to_ascii_lowercase();
-            if prefix.starts_with('\\') {
-                lowered_text.contains(&lowered_prefix)
-            } else {
-                lowered_text.starts_with(&lowered_prefix)
-            }
-        })
+        is_suspicious_windows_path(text)
     }
 
-    /// Checks if the registry path matches known persistence locations
+    /// Checks if registry path is suspicious
+    #[must_use]
     pub fn is_suspicious_registry_path(&self, text: &str) -> bool {
-        SUSPICIOUS_REGISTRY_PATHS
-            .iter()
-            .any(|path| self.contains_ascii_case_insensitive(text, path))
-    }
-
-    /// Case-insensitive ASCII substring search without allocations
-    fn contains_ascii_case_insensitive(&self, haystack: &str, needle: &str) -> bool {
-        if needle.is_empty() {
-            return true;
-        }
-
-        let haystack_bytes = haystack.as_bytes();
-        let needle_bytes = needle.as_bytes();
-
-        if needle_bytes.len() > haystack_bytes.len() {
-            return false;
-        }
-
-        haystack_bytes
-            .windows(needle_bytes.len())
-            .any(|window| window.eq_ignore_ascii_case(needle_bytes))
+        is_suspicious_registry_path(text)
     }
 
-    /// Detects printf-style placeholders to reduce false positives
-    fn contains_printf_placeholder(&self, text: &str) -> bool {
-        let mut chars = text.chars().peekable();
-
-        while let Some(ch) = chars.next() {
-            if ch == '%'
-                && let Some(next) = chars.peek()
-                && matches!(next, 's' | 'd' | 'x' | 'o' | 'u' | 'f')
-            {
-                return true;
-            }
-        }
-
-        false
-    }
-
-    /// Checks if text contains ASCII control characters (C0 controls: 0x00-0x1F and DEL: 0x7F)
-    fn contains_control_chars(&self, text: &str) -> bool {
-        text.bytes().any(|b| b <= 0x1F || b == 0x7F)
-    }
-
-    /// Validates POSIX path structure
+    /// Validates POSIX path
+    #[must_use]
     pub fn is_valid_posix_path(&self, text: &str) -> bool {
-        if text.len() > 4096 {
-            return false;
-        }
-
-        if text.contains('\0') || text.contains('\n') || text.contains('\r') {
-            return false;
-        }
-
-        if text.contains("//") {
-            return false;
-        }
-
-        if text.contains('\\') {
-            return false;
-        }
-
-        if self.contains_printf_placeholder(text) {
-            return false;
-        }
-
-        let has_known_prefix = KNOWN_POSIX_PREFIXES
-            .iter()
-            .any(|prefix| text.starts_with(prefix));
-        let is_suspicious = self.is_suspicious_posix_path(text);
-
-        if !has_known_prefix && !is_suspicious && text.len() > 2048 {
-            return false;
-        }
-
-        true
+        is_valid_posix_path(text)
     }
 
-    /// Validates Windows path structure
+    /// Validates Windows path
+    #[must_use]
     pub fn is_valid_windows_path(&self, text: &str) -> bool {
-        // Reject control characters early to prevent regex/prefix matching from being fooled
-        if self.contains_control_chars(text) {
-            return false;
-        }
-
-        if text.len() > 4096 {
-            return false;
-        }
-
-        if text.contains('/') {
-            return false;
-        }
-
-        if text.contains("\\\\") {
-            return false;
-        }
-
-        if self.contains_printf_placeholder(text) {
-            return false;
-        }
-
-        let has_known_prefix = KNOWN_WINDOWS_PREFIXES
-            .iter()
-            .any(|prefix| text.starts_with(prefix));
-        let is_suspicious = self.is_suspicious_windows_path(text);
-
-        if !has_known_prefix && !is_suspicious && text.len() > 2048 {
-            return false;
-        }
-
-        true
+        is_valid_windows_path(text)
     }
 
-    /// Validates Windows registry path structure
+    /// Validates registry path
+    #[must_use]
     pub fn is_valid_registry_path(&self, text: &str) -> bool {
-        // Reject control characters early to prevent regex/prefix matching from being fooled
-        if self.contains_control_chars(text) {
-            return false;
-        }
-
-        // Maximum length check (4096 bytes)
-        if text.len() > 4096 {
-            return false;
-        }
-
-        if text.contains('/') {
-            return false;
-        }
-
-        if text.contains("\\\\") {
-            return false;
-        }
-
-        let root = text.split('\\').next().unwrap_or("");
-        let root_upper = root.to_ascii_uppercase();
-
-        if root_upper.starts_with("HKEY_") {
-            return VALID_REGISTRY_ROOTS
-                .iter()
-                .any(|valid| *valid == root_upper);
-        }
-
-        if root_upper.starts_with("HK") {
-            return matches!(
-                root_upper.as_str(),
-                "HKLM" | "HKCU" | "HKCR" | "HKU" | "HKCC"
-            );
-        }
+        is_valid_registry_path(text)
+    }
 
-        false
+    /// Classifies GUIDs
+    #[must_use]
+    pub fn classify_guid(&self, text: &str) -> Option<Tag> {
+        classify_guid(text)
     }
 
-    /// Classifies IP addresses (IPv4 and IPv6) in the given text
-    ///
-    /// This method checks for both IPv4 and IPv6 addresses and returns
-    /// appropriate tags. A string may match both patterns (unlikely but possible).
-    ///
-    /// # Arguments
-    ///
-    /// * `text` - The text to search for IP addresses
-    ///
-    /// # Returns
-    ///
-    /// Returns a vector of `Tag` values (`Tag::IPv4` and/or `Tag::IPv6`).
-    /// The vector may be empty if no IP addresses are found.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use stringy::classification::SemanticClassifier;
-    /// use stringy::types::Tag;
-    ///
-    /// let classifier = SemanticClassifier::new();
-    /// let tags = classifier.classify_ip_addresses("192.168.1.1");
-    /// assert_eq!(tags, vec![Tag::IPv4]);
-    ///
-    /// let tags = classifier.classify_ip_addresses("::1");
-    /// assert_eq!(tags, vec![Tag::IPv6]);
-    ///
-    /// let tags = classifier.classify_ip_addresses("not an ip");
-    /// assert!(tags.is_empty());
-    /// ```
-    pub fn classify_ip_addresses(&self, text: &str) -> Vec<Tag> {
-        let mut tags = Vec::new();
+    /// Classifies email addresses
+    #[must_use]
+    pub fn classify_email(&self, text: &str) -> Option<Tag> {
+        classify_email(text)
+    }
 
-        // Check for IPv4
-        if self.is_ipv4_address(text) {
-            tags.push(Tag::IPv4);
-        }
+    /// Classifies Base64-encoded data
+    #[must_use]
+    pub fn classify_base64(&self, text: &str) -> Option<Tag> {
+        classify_base64(text)
+    }
 
-        // Check for IPv6
-        if self.is_ipv6_address(text) {
-            tags.push(Tag::IPv6);
-        }
+    /// Classifies format strings
+    #[must_use]
+    pub fn classify_format_string(&self, text: &str) -> Option<Tag> {
+        classify_format_string(text)
+    }
 
-        tags
+    /// Classifies user agent strings
+    #[must_use]
+    pub fn classify_user_agent(&self, text: &str) -> Option<Tag> {
+        classify_user_agent(text)
     }
 }
 
@@ -1014,7 +350,6 @@ mod tests {
     use super::*;
     use crate::types::{Encoding, StringSource};
 
-    /// Helper function to create a test FoundString
     fn create_test_string(text: &str) -> FoundString {
         FoundString {
             text: text.to_string(),
@@ -1035,606 +370,125 @@ mod tests {
     }
 
     #[test]
-    fn test_url_detection() {
-        let classifier = SemanticClassifier::new();
-
-        // Valid URLs
-        assert_eq!(
-            classifier.classify_url("https://example.com"),
-            Some(Tag::Url)
-        );
-        assert_eq!(
-            classifier.classify_url("http://api.malware.com/v1/data"),
-            Some(Tag::Url)
-        );
-        assert_eq!(
-            classifier.classify_url("https://192.168.1.1:8080/path"),
-            Some(Tag::Url)
-        );
-
-        // Invalid cases (not URLs)
-        assert_eq!(classifier.classify_url("example.com"), None);
-        assert_eq!(classifier.classify_url("not a url"), None);
-    }
-
-    #[test]
-    fn test_domain_detection() {
+    fn test_classify_mixed_strings() {
         let classifier = SemanticClassifier::new();
 
-        // Valid domains
-        assert_eq!(classifier.classify_domain("example.com"), Some(Tag::Domain));
-        assert_eq!(
-            classifier.classify_domain("api.service.io"),
-            Some(Tag::Domain)
-        );
-        assert_eq!(
-            classifier.classify_domain("malware-c2.net"),
-            Some(Tag::Domain)
-        );
-
-        // Valid domains with mixed-case TLDs
-        assert_eq!(classifier.classify_domain("example.COM"), Some(Tag::Domain));
-        assert_eq!(
-            classifier.classify_domain("api.service.IO"),
-            Some(Tag::Domain)
-        );
-        assert_eq!(
-            classifier.classify_domain("malware-c2.NET"),
-            Some(Tag::Domain)
-        );
-        assert_eq!(classifier.classify_domain("Example.OrG"), Some(Tag::Domain));
-
-        // URLs should not match as domains
-        assert_eq!(classifier.classify_domain("https://example.com"), None);
-
-        // Invalid domains
-        assert_eq!(classifier.classify_domain("invalid"), None);
-        assert_eq!(classifier.classify_domain("too.short.x"), None);
-    }
-
-    #[test]
-    fn test_url_classification() {
-        let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("https://example.com/api");
+        // URL
+        let url_string = create_test_string("https://example.com/api");
+        let tags = classifier.classify(&url_string);
+        assert!(tags.contains(&Tag::Url));
 
-        let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::Url));
-    }
+        // Domain
+        let domain_string = create_test_string("api.example.com");
+        let tags = classifier.classify(&domain_string);
+        assert!(tags.contains(&Tag::Domain));
 
-    #[test]
-    fn test_domain_classification() {
-        let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("example.com");
+        // IPv4
+        let ipv4_string = create_test_string("192.168.1.1");
+        let tags = classifier.classify(&ipv4_string);
+        assert!(tags.contains(&Tag::IPv4));
 
-        let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::Domain));
+        // Windows path
+        let path_string = create_test_string("C:\\Windows\\System32\\cmd.exe");
+        let tags = classifier.classify(&path_string);
+        assert!(tags.contains(&Tag::FilePath));
     }
 
     #[test]
-    fn test_url_not_double_tagged() {
+    fn test_classify_posix_path_in_found_string() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("https://example.com");
+        let found_string = create_test_string("/usr/local/bin/app");
 
         let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::Url));
-        // Ensure it's NOT also tagged as Domain
-        assert!(!tags.iter().any(|t| matches!(t, Tag::Domain)));
-    }
-
-    #[test]
-    fn test_tld_validation() {
-        let classifier = SemanticClassifier::new();
-
-        // Valid TLDs
-        assert!(classifier.has_valid_tld("example.com"));
-        assert!(classifier.has_valid_tld("test.net"));
-        assert!(classifier.has_valid_tld("site.org"));
-        assert!(classifier.has_valid_tld("api.io"));
-
-        // Valid TLDs with mixed case (should be normalized)
-        assert!(classifier.has_valid_tld("example.COM"));
-        assert!(classifier.has_valid_tld("test.NET"));
-        assert!(classifier.has_valid_tld("site.ORG"));
-        assert!(classifier.has_valid_tld("api.IO"));
-        assert!(classifier.has_valid_tld("Example.CoM"));
-
-        // Invalid TLDs
-        assert!(!classifier.has_valid_tld("example.x"));
-        assert!(!classifier.has_valid_tld("test.invalid"));
-        assert!(!classifier.has_valid_tld("site.toolong123"));
+        assert!(tags.contains(&Tag::FilePath));
     }
 
     #[test]
-    fn test_edge_cases() {
+    fn test_classify_windows_path_in_found_string() {
         let classifier = SemanticClassifier::new();
+        let found_string = create_test_string("C:\\Program Files\\Application\\app.exe");
 
-        // Empty string
-        let empty = create_test_string("");
-        let tags = classifier.classify(&empty);
-        assert_eq!(tags.len(), 0);
-
-        // Very long domain (within RFC 1035 limits)
-        let long_domain = "a".repeat(60) + ".com";
-        let found_string = create_test_string(&long_domain);
         let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::Domain));
-
-        // String with no valid domain pattern
-        let no_domain = create_test_string("just some text without domains");
-        let tags = classifier.classify(&no_domain);
-        assert_eq!(tags.len(), 0);
-
-        // Malformed URL
-        let malformed = create_test_string("http://");
-        let tags = classifier.classify(&malformed);
-        assert_eq!(tags.len(), 0);
-    }
-
-    #[test]
-    fn test_ipv4_valid_addresses() {
-        let classifier = SemanticClassifier::new();
-
-        // Valid IPv4 addresses
-        assert!(classifier.is_ipv4_address("192.168.1.1"));
-        assert!(classifier.is_ipv4_address("10.0.0.1"));
-        assert!(classifier.is_ipv4_address("8.8.8.8"));
-        assert!(classifier.is_ipv4_address("1.1.1.1"));
-        assert!(classifier.is_ipv4_address("127.0.0.1"));
-        assert!(classifier.is_ipv4_address("0.0.0.0"));
-        assert!(classifier.is_ipv4_address("255.255.255.255"));
-    }
-
-    #[test]
-    fn test_ipv4_invalid_addresses() {
-        let classifier = SemanticClassifier::new();
-
-        // Invalid IPv4 addresses
-        assert!(!classifier.is_ipv4_address("256.1.1.1")); // Octet > 255
-        assert!(!classifier.is_ipv4_address("192.168.1")); // Missing octet
-        assert!(!classifier.is_ipv4_address("192.168.1.1.1")); // Too many octets
-        assert!(!classifier.is_ipv4_address("999.999.999.999")); // All octets > 255
-        assert!(!classifier.is_ipv4_address("192.168.01.1")); // Leading zero (invalid format)
-    }
-
-    #[test]
-    fn test_ipv4_with_port() {
-        let classifier = SemanticClassifier::new();
-
-        // IPv4 addresses with ports should be detected
-        assert!(classifier.is_ipv4_address("192.168.1.1:8080"));
-        assert!(classifier.is_ipv4_address("10.0.0.1:443"));
-        assert!(classifier.is_ipv4_address("127.0.0.1:3000"));
-    }
-
-    #[test]
-    fn test_ipv4_version_numbers() {
-        let classifier = SemanticClassifier::new();
-
-        // Valid IPv4 addresses that could also be version numbers are accepted
-        // It's the caller's responsibility to disambiguate based on context
-        assert!(classifier.is_ipv4_address("1.2.3.4"));
-        assert!(classifier.is_ipv4_address("2.0.1.0"));
-        assert!(classifier.is_ipv4_address("10.5.2.1"));
-        assert!(classifier.is_ipv4_address("10.5.2.20"));
-    }
-
-    #[test]
-    fn test_ipv4_edge_cases() {
-        let classifier = SemanticClassifier::new();
-
-        // Boundary values
-        assert!(classifier.is_ipv4_address("0.0.0.0"));
-        assert!(classifier.is_ipv4_address("255.255.255.255"));
-        assert!(classifier.is_ipv4_address("192.0.0.1"));
-        assert!(classifier.is_ipv4_address("0.255.0.255"));
-
-        // Private network addresses
-        assert!(classifier.is_ipv4_address("192.168.0.1"));
-        assert!(classifier.is_ipv4_address("10.0.0.1"));
-        assert!(classifier.is_ipv4_address("172.16.0.1"));
-    }
-
-    #[test]
-    fn test_ipv6_full_notation() {
-        let classifier = SemanticClassifier::new();
-
-        // Full IPv6 notation
-        assert!(classifier.is_ipv6_address("2001:0db8:85a3:0000:0000:8a2e:0370:7334"));
-        assert!(classifier.is_ipv6_address("2001:0db8:85a3:0000:0000:8a2e:0370:7334"));
-    }
-
-    #[test]
-    fn test_ipv6_compressed() {
-        let classifier = SemanticClassifier::new();
-
-        // Compressed IPv6 notation
-        assert!(classifier.is_ipv6_address("2001:db8::1"));
-        assert!(classifier.is_ipv6_address("::1"));
-        assert!(classifier.is_ipv6_address("fe80::1"));
-        assert!(classifier.is_ipv6_address("::"));
-    }
-
-    #[test]
-    fn test_ipv6_mixed_notation() {
-        let classifier = SemanticClassifier::new();
-
-        // Mixed IPv4/IPv6 notation
-        assert!(classifier.is_ipv6_address("::ffff:192.0.2.1"));
-        assert!(classifier.is_ipv6_address("64:ff9b::192.0.2.1"));
-    }
-
-    #[test]
-    fn test_ipv6_invalid() {
-        let classifier = SemanticClassifier::new();
-
-        // Invalid IPv6 addresses
-        assert!(!classifier.is_ipv6_address("gggg::1")); // Invalid hex
-        assert!(!classifier.is_ipv6_address("2001:db8::1::2")); // Double ::
-        assert!(!classifier.is_ipv6_address("2001:db8:1")); // Too short
-    }
-
-    #[test]
-    fn test_ipv6_with_brackets() {
-        let classifier = SemanticClassifier::new();
-
-        // IPv6 addresses with brackets
-        assert!(classifier.is_ipv6_address("[2001:db8::1]"));
-        assert!(classifier.is_ipv6_address("[::1]"));
-    }
-
-    #[test]
-    fn test_ipv6_with_port() {
-        let classifier = SemanticClassifier::new();
-
-        // IPv6 addresses with brackets and ports
-        assert!(classifier.is_ipv6_address("[2001:db8::1]:8080"));
-        assert!(classifier.is_ipv6_address("[::1]:8080"));
+        assert!(tags.contains(&Tag::FilePath));
     }
 
     #[test]
-    fn test_classify_ipv4() {
+    fn test_classify_registry_path_in_found_string() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("192.168.1.1");
+        let found_string =
+            create_test_string("HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion");
 
         let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::IPv4));
+        assert!(tags.contains(&Tag::RegistryPath));
     }
 
     #[test]
-    fn test_classify_ipv6() {
+    fn test_no_false_positives_on_random_data() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("::1");
+        let found_string = create_test_string("x9qz1p0t8v7w6r5y4u3i2o1p");
 
         let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::IPv6));
-    }
-
-    #[test]
-    fn test_classify_no_ip() {
-        let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("not an ip address");
-
-        let tags = classifier.classify_ip_addresses(&found_string.text);
         assert!(tags.is_empty());
     }
 
     #[test]
-    fn test_classify_ipv4_with_port() {
+    fn test_guid_in_found_string() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("192.168.1.1:8080");
+        let found_string = create_test_string("{12345678-1234-1234-1234-123456789ABC}");
 
         let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::IPv4));
+        assert!(tags.contains(&Tag::Guid));
     }
 
     #[test]
-    fn test_classify_ipv6_with_brackets_and_port() {
+    fn test_email_in_found_string() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("[::1]:8080");
+        let found_string = create_test_string("user@example.com");
 
         let tags = classifier.classify(&found_string);
-        assert_eq!(tags.len(), 1);
-        assert!(matches!(tags[0], Tag::IPv6));
-    }
-
-    #[test]
-    fn test_posix_absolute_path() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_posix_path("/usr/bin/bash"),
-            Some(Tag::FilePath)
-        );
-        assert_eq!(
-            classifier.classify_posix_path("/etc/passwd"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_posix_home_directory() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_posix_path("/home/user/.bashrc"),
-            Some(Tag::FilePath)
-        );
-        assert_eq!(
-            classifier.classify_posix_path("/home/user/.config/app"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_posix_with_spaces() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_posix_path("/Users/John Doe/Documents/file.txt"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_posix_system_directories() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_posix_path("/usr/"), Some(Tag::FilePath));
-        assert_eq!(classifier.classify_posix_path("/etc/"), Some(Tag::FilePath));
-        assert_eq!(classifier.classify_posix_path("/var/"), Some(Tag::FilePath));
-    }
-
-    #[test]
-    fn test_posix_suspicious_paths() {
-        let classifier = SemanticClassifier::new();
-
-        assert!(classifier.is_suspicious_posix_path("/tmp/malware"));
-        assert!(classifier.is_suspicious_posix_path("/etc/cron.d/backdoor"));
-    }
-
-    #[test]
-    fn test_posix_too_short() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_posix_path("/a"), Some(Tag::FilePath));
-    }
-
-    #[test]
-    fn test_posix_invalid() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_posix_path("usr/bin/bash"), None);
-    }
-
-    #[test]
-    fn test_posix_with_null_bytes() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_posix_path("/tmp/evil\0bin"), None);
-    }
-
-    #[test]
-    fn test_windows_absolute_path() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_windows_path("C:\\Windows\\System32\\cmd.exe"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_windows_program_files() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_windows_path("C:\\Program Files (x86)\\App"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_windows_with_spaces() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_windows_path("D:\\My Documents\\file.txt"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_windows_different_drives() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_windows_path("D:\\"),
-            Some(Tag::FilePath)
-        );
-        assert_eq!(
-            classifier.classify_windows_path("E:\\Data\\"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_windows_suspicious_paths() {
-        let classifier = SemanticClassifier::new();
-
-        assert!(classifier.is_suspicious_windows_path("C:\\Windows\\Temp\\evil.exe"));
-    }
-
-    #[test]
-    fn test_windows_case_insensitive() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_windows_path("c:\\windows\\"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_windows_invalid() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_windows_path("C:/forward/slash"), None);
-    }
-
-    #[test]
-    fn test_windows_invalid_drive() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_windows_path("1:\\path"), None);
-    }
-
-    #[test]
-    fn test_unc_path() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_unc_path("\\\\server\\share\\file.txt"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_unc_with_domain() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_unc_path("\\\\server.domain.com\\share\\"),
-            Some(Tag::FilePath)
-        );
-    }
-
-    #[test]
-    fn test_unc_invalid() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_unc_path("\\\\\\\\"), None);
-        assert_eq!(classifier.classify_unc_path("\\\\server"), None);
-    }
-
-    #[test]
-    fn test_registry_run_key() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_registry_path(
-                "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"
-            ),
-            Some(Tag::RegistryPath)
-        );
-    }
-
-    #[test]
-    fn test_registry_current_user() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_registry_path("HKEY_CURRENT_USER\\Software\\App\\Settings"),
-            Some(Tag::RegistryPath)
-        );
-    }
-
-    #[test]
-    fn test_registry_abbreviated_hklm() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_registry_path("HKLM\\System\\CurrentControlSet"),
-            Some(Tag::RegistryPath)
-        );
-    }
-
-    #[test]
-    fn test_registry_abbreviated_hkcu() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_registry_path("HKCU\\Software\\Microsoft"),
-            Some(Tag::RegistryPath)
-        );
-    }
-
-    #[test]
-    fn test_registry_persistence_run() {
-        let classifier = SemanticClassifier::new();
-
-        assert!(classifier.is_suspicious_registry_path(
-            "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"
-        ));
-    }
-
-    #[test]
-    fn test_registry_invalid_root() {
-        let classifier = SemanticClassifier::new();
-
-        assert_eq!(
-            classifier.classify_registry_path("HKEY_INVALID\\Path"),
-            None
-        );
+        assert!(tags.contains(&Tag::Email));
     }
 
     #[test]
-    fn test_registry_forward_slash() {
+    fn test_base64_in_found_string() {
         let classifier = SemanticClassifier::new();
-
-        assert_eq!(classifier.classify_registry_path("HKLM/Software"), None);
-    }
-
-    #[test]
-    fn test_classify_mixed_strings() {
-        let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("https://example.com");
+        let found_string = create_test_string("SGVsbG8gV29ybGQh");
 
         let tags = classifier.classify(&found_string);
-        assert!(tags.contains(&Tag::Url));
+        assert!(tags.contains(&Tag::Base64));
     }
 
     #[test]
-    fn test_classify_posix_path_in_found_string() {
+    fn test_format_string_in_found_string() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("/usr/bin/bash");
+        let found_string = create_test_string("Error: %s at line %d");
 
         let tags = classifier.classify(&found_string);
-        assert!(tags.contains(&Tag::FilePath));
+        assert!(tags.contains(&Tag::FormatString));
     }
 
     #[test]
-    fn test_classify_windows_path_in_found_string() {
+    fn test_user_agent_in_found_string() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("C:\\Windows\\System32\\cmd.exe");
+        let found_string =
+            create_test_string("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36");
 
         let tags = classifier.classify(&found_string);
-        assert!(tags.contains(&Tag::FilePath));
+        assert!(tags.contains(&Tag::UserAgent));
     }
 
     #[test]
-    fn test_classify_registry_path_in_found_string() {
+    fn test_multiple_tags_format_and_base64_not_both() {
         let classifier = SemanticClassifier::new();
-        let found_string = create_test_string(
-            "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run",
-        );
-
-        let tags = classifier.classify(&found_string);
-        assert!(tags.contains(&Tag::RegistryPath));
-    }
 
-    #[test]
-    fn test_no_false_positives_on_random_data() {
-        let classifier = SemanticClassifier::new();
-        let found_string = create_test_string("x9qz1p0t8v7w6r5y4u3i2o1p");
+        // Format string should get FormatString tag
+        let format = create_test_string("Hello %s, your score is %d");
+        let tags = classifier.classify(&format);
+        assert!(tags.contains(&Tag::FormatString));
 
-        let tags = classifier.classify(&found_string);
-        assert!(tags.is_empty());
+        // Pure Base64 should get Base64 tag
+        let base64 = create_test_string("VGhpcyBpcyBhIHRlc3Q=");
+        let tags = classifier.classify(&base64);
+        assert!(tags.contains(&Tag::Base64));
     }
 }
diff --git a/src/classification/symbols.rs b/src/classification/symbols.rs
new file mode 100644
index 0000000..27b7cd2
--- /dev/null
+++ b/src/classification/symbols.rs
@@ -0,0 +1,508 @@
+//! Symbol demangling for Rust and C++ symbols
+//!
+//! This module provides functionality to detect and demangle mangled symbols
+//! from compiled Rust and C++ binaries. When a mangled symbol is detected, the
+//! original mangled form is preserved in `FoundString.original_text` while the
+//! demangled human-readable form replaces `FoundString.text`.
+//!
+//! # Supported Symbol Formats
+//!
+//! - **Rust legacy mangling**: Symbols starting with `_ZN` (uses Itanium ABI-like encoding)
+//! - **Rust v0 mangling**: Symbols starting with `_R` (new Rust-specific encoding)
+//! - **C++ Itanium ABI**: Symbols starting with `_Z` (used by GCC, Clang, and others)
+//!
+//! # Usage
+//!
+//! ```rust
+//! use stringy::classification::SymbolDemangler;
+//! use stringy::types::{FoundString, Encoding, StringSource, Tag};
+//!
+//! let demangler = SymbolDemangler::new();
+//! let mut found_string = FoundString {
+//!     text: "_ZN4core3fmt5Write9write_str17h1234567890abcdefE".to_string(),
+//!     original_text: None,
+//!     encoding: Encoding::Ascii,
+//!     offset: 0,
+//!     rva: None,
+//!     section: None,
+//!     length: 47,
+//!     tags: Vec::new(),
+//!     score: 0,
+//!     section_weight: None,
+//!     semantic_boost: None,
+//!     noise_penalty: None,
+//!     source: StringSource::ImportName,
+//!     confidence: 1.0,
+//! };
+//!
+//! demangler.demangle(&mut found_string);
+//! // found_string.text now contains the demangled symbol
+//! // found_string.original_text contains the original mangled form
+//! // found_string.tags contains Tag::DemangledSymbol
+//! ```
+
+use crate::types::{FoundString, Tag};
+use cpp_demangle::Symbol as CppSymbol;
+
+/// Symbol demangler for Rust and C++ symbols
+///
+/// Uses the `rustc-demangle` crate for Rust symbols and the `cpp_demangle`
+/// crate for C++ symbols. Converts mangled symbols into human-readable form
+/// while preserving the original mangled text.
+#[derive(Debug, Default, Clone)]
+pub struct SymbolDemangler;
+
+impl SymbolDemangler {
+    /// Create a new instance of the symbol demangler
+    #[must_use]
+    pub fn new() -> Self {
+        Self
+    }
+
+    /// Check if a symbol appears to be a mangled Rust or C++ symbol
+    ///
+    /// Returns `true` if the symbol starts with known mangling prefixes:
+    /// - `_ZN` - Rust legacy mangling or C++ nested names (Itanium ABI)
+    /// - `_R` - Rust v0 mangling scheme
+    /// - `_Z` - C++ Itanium ABI mangling (used by GCC, Clang)
+    ///
+    /// # Arguments
+    ///
+    /// * `symbol` - The symbol string to check
+    ///
+    /// # Returns
+    ///
+    /// Returns `true` if the symbol appears to be mangled, `false` otherwise.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use stringy::classification::SymbolDemangler;
+    ///
+    /// let demangler = SymbolDemangler::new();
+    /// // Rust symbols
+    /// assert!(demangler.is_mangled("_ZN4core3fmt5Write9write_str17h1234567890abcdefE"));
+    /// assert!(demangler.is_mangled("_RNvNtCs123_4core3fmt5write"));
+    /// // C++ symbols
+    /// assert!(demangler.is_mangled("_ZN3foo3barEv"));
+    /// assert!(demangler.is_mangled("_Z3foov"));
+    /// assert!(!demangler.is_mangled("printf"));
+    /// ```
+    #[must_use]
+    pub fn is_mangled(&self, symbol: &str) -> bool {
+        // Rust v0 mangling scheme (Rust-specific, check first)
+        if symbol.starts_with("_R") {
+            return true;
+        }
+
+        // Itanium ABI mangling (used by both Rust legacy and C++)
+        // This includes _ZN (nested names), _ZL (local), _ZTV (vtable), etc.
+        if symbol.starts_with("_Z") {
+            return true;
+        }
+
+        false
+    }
+
+    /// Attempt to demangle a symbol in a `FoundString`
+    ///
+    /// If the string appears to be a mangled Rust or C++ symbol and can be
+    /// successfully demangled:
+    /// - The original mangled form is stored in `original_text`
+    /// - The demangled form replaces `text`
+    /// - `Tag::DemangledSymbol` is added to the tags
+    ///
+    /// The demangler tries Rust demangling first (for `_R` and `_ZN` prefixes),
+    /// then falls back to C++ demangling for `_Z` prefixes.
+    ///
+    /// If demangling fails or the symbol is not mangled, the `FoundString` is
+    /// left unchanged.
+    ///
+    /// # Arguments
+    ///
+    /// * `string` - The `FoundString` to process (modified in-place)
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use stringy::classification::SymbolDemangler;
+    /// use stringy::types::{FoundString, Encoding, StringSource, Tag};
+    ///
+    /// let demangler = SymbolDemangler::new();
+    /// let mut found_string = FoundString {
+    ///     text: "_ZN4core3fmt5Write9write_str17h1234567890abcdefE".to_string(),
+    ///     original_text: None,
+    ///     encoding: Encoding::Ascii,
+    ///     offset: 0,
+    ///     rva: None,
+    ///     section: None,
+    ///     length: 47,
+    ///     tags: Vec::new(),
+    ///     score: 0,
+    ///     section_weight: None,
+    ///     semantic_boost: None,
+    ///     noise_penalty: None,
+    ///     source: StringSource::ImportName,
+    ///     confidence: 1.0,
+    /// };
+    ///
+    /// demangler.demangle(&mut found_string);
+    /// assert!(found_string.tags.contains(&Tag::DemangledSymbol));
+    /// assert!(found_string.original_text.is_some());
+    /// ```
+    pub fn demangle(&self, string: &mut FoundString) {
+        // Only attempt demangling if it looks like a mangled symbol
+        if !self.is_mangled(&string.text) {
+            return;
+        }
+
+        // Try to demangle
+        let demangled_str = match self.try_demangle_internal(&string.text) {
+            Some(s) => s,
+            None => return,
+        };
+
+        // Store original mangled form and replace with demangled
+        string.original_text = Some(string.text.clone());
+        string.text = demangled_str;
+
+        // Add the DemangledSymbol tag if not already present
+        if !string.tags.contains(&Tag::DemangledSymbol) {
+            string.tags.push(Tag::DemangledSymbol);
+        }
+    }
+
+    /// Internal demangling logic that tries Rust then C++
+    fn try_demangle_internal(&self, symbol: &str) -> Option<String> {
+        // For Rust v0 symbols (_R prefix), only try Rust demangling
+        if symbol.starts_with("_R") {
+            return self.try_rust_demangle(symbol);
+        }
+
+        // For _Z prefixed symbols, try Rust first (for legacy Rust symbols),
+        // then fall back to C++ if Rust demangling doesn't work
+        if symbol.starts_with("_Z") {
+            // Try Rust first (handles _ZN Rust legacy symbols)
+            if let Some(demangled) = self.try_rust_demangle(symbol) {
+                return Some(demangled);
+            }
+
+            // Fall back to C++ demangling
+            return self.try_cpp_demangle(symbol);
+        }
+
+        None
+    }
+
+    /// Try to demangle as a Rust symbol
+    fn try_rust_demangle(&self, symbol: &str) -> Option<String> {
+        let demangled = rustc_demangle::demangle(symbol);
+        let demangled_str = demangled.to_string();
+
+        // Check if demangling actually produced a different result
+        if demangled_str != symbol {
+            Some(demangled_str)
+        } else {
+            None
+        }
+    }
+
+    /// Try to demangle as a C++ symbol
+    fn try_cpp_demangle(&self, symbol: &str) -> Option<String> {
+        // Parse the symbol using cpp_demangle
+        let parsed = CppSymbol::new(symbol).ok()?;
+        let demangled_str = parsed.demangle().ok()?;
+
+        // Check if demangling actually produced a different result
+        if demangled_str != symbol {
+            Some(demangled_str)
+        } else {
+            None
+        }
+    }
+
+    /// Try to demangle a symbol string directly
+    ///
+    /// This is a convenience method for demangling without a `FoundString`.
+    /// Supports both Rust and C++ mangled symbols.
+    ///
+    /// # Arguments
+    ///
+    /// * `symbol` - The mangled symbol string
+    ///
+    /// # Returns
+    ///
+    /// Returns `Some(demangled)` if demangling succeeded and produced a different
+    /// result, `None` otherwise.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use stringy::classification::SymbolDemangler;
+    ///
+    /// let demangler = SymbolDemangler::new();
+    ///
+    /// // Rust symbol
+    /// let result = demangler.try_demangle("_ZN4core3fmt5Write9write_str17h1234567890abcdefE");
+    /// assert!(result.is_some());
+    ///
+    /// // C++ symbol
+    /// let result = demangler.try_demangle("_ZN3foo3barEv");
+    /// assert!(result.is_some());
+    ///
+    /// // Not mangled
+    /// let result = demangler.try_demangle("printf");
+    /// assert!(result.is_none());
+    /// ```
+    #[must_use]
+    pub fn try_demangle(&self, symbol: &str) -> Option<String> {
+        if !self.is_mangled(symbol) {
+            return None;
+        }
+
+        self.try_demangle_internal(symbol)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{Encoding, StringSource};
+
+    fn create_test_string(text: &str) -> FoundString {
+        FoundString {
+            text: text.to_string(),
+            original_text: None,
+            encoding: Encoding::Ascii,
+            offset: 0,
+            rva: None,
+            section: None,
+            length: text.len() as u32,
+            tags: Vec::new(),
+            score: 0,
+            section_weight: None,
+            semantic_boost: None,
+            noise_penalty: None,
+            source: StringSource::ImportName,
+            confidence: 1.0,
+        }
+    }
+
+    #[test]
+    fn test_is_mangled_rust_legacy() {
+        let demangler = SymbolDemangler::new();
+
+        // Legacy Rust mangling (_ZN prefix)
+        assert!(demangler.is_mangled("_ZN4core3fmt5Write9write_str17h1234567890abcdefE"));
+        assert!(demangler.is_mangled("_ZN3std2io5stdio6_print17h1234567890abcdefE"));
+    }
+
+    #[test]
+    fn test_is_mangled_rust_v0() {
+        let demangler = SymbolDemangler::new();
+
+        // Rust v0 mangling (_R prefix)
+        assert!(demangler.is_mangled("_RNvNtCs123_4core3fmt5write"));
+        assert!(demangler.is_mangled("_RNvCs123_5hello4main"));
+    }
+
+    #[test]
+    fn test_is_mangled_not_mangled() {
+        let demangler = SymbolDemangler::new();
+
+        // Regular symbols should not be detected as mangled
+        assert!(!demangler.is_mangled("printf"));
+        assert!(!demangler.is_mangled("malloc"));
+        assert!(!demangler.is_mangled("main"));
+        assert!(!demangler.is_mangled("CreateFileW"));
+        assert!(!demangler.is_mangled(""));
+    }
+
+    #[test]
+    fn test_demangle_rust_symbol() {
+        let demangler = SymbolDemangler::new();
+        let mut found_string =
+            create_test_string("_ZN4core3fmt5Write9write_str17h1234567890abcdefE");
+
+        demangler.demangle(&mut found_string);
+
+        // Should have been demangled
+        assert!(found_string.original_text.is_some());
+        assert_eq!(
+            found_string.original_text.as_ref().unwrap(),
+            "_ZN4core3fmt5Write9write_str17h1234567890abcdefE"
+        );
+        assert!(found_string.tags.contains(&Tag::DemangledSymbol));
+        // Demangled text should be different from original
+        assert_ne!(
+            found_string.text,
+            "_ZN4core3fmt5Write9write_str17h1234567890abcdefE"
+        );
+    }
+
+    #[test]
+    fn test_demangle_non_mangled() {
+        let demangler = SymbolDemangler::new();
+        let mut found_string = create_test_string("printf");
+
+        demangler.demangle(&mut found_string);
+
+        // Should not have been modified
+        assert_eq!(found_string.text, "printf");
+        assert!(found_string.original_text.is_none());
+        assert!(!found_string.tags.contains(&Tag::DemangledSymbol));
+    }
+
+    #[test]
+    fn test_try_demangle_success() {
+        let demangler = SymbolDemangler::new();
+        let result = demangler.try_demangle("_ZN4core3fmt5Write9write_str17h1234567890abcdefE");
+
+        assert!(result.is_some());
+        let demangled = result.unwrap();
+        assert!(!demangled.is_empty());
+        assert_ne!(
+            demangled,
+            "_ZN4core3fmt5Write9write_str17h1234567890abcdefE"
+        );
+    }
+
+    #[test]
+    fn test_try_demangle_failure() {
+        let demangler = SymbolDemangler::new();
+
+        assert!(demangler.try_demangle("printf").is_none());
+        assert!(demangler.try_demangle("").is_none());
+        assert!(demangler.try_demangle("main").is_none());
+    }
+
+    #[test]
+    fn test_demangle_preserves_existing_tags() {
+        let demangler = SymbolDemangler::new();
+        let mut found_string =
+            create_test_string("_ZN4core3fmt5Write9write_str17h1234567890abcdefE");
+        found_string.tags.push(Tag::Import);
+
+        demangler.demangle(&mut found_string);
+
+        // Should have both the original tag and the new demangled tag
+        assert!(found_string.tags.contains(&Tag::Import));
+        assert!(found_string.tags.contains(&Tag::DemangledSymbol));
+    }
+
+    #[test]
+    fn test_demangle_idempotent() {
+        let demangler = SymbolDemangler::new();
+        let mut found_string =
+            create_test_string("_ZN4core3fmt5Write9write_str17h1234567890abcdefE");
+
+        demangler.demangle(&mut found_string);
+        let first_text = found_string.text.clone();
+        let first_original = found_string.original_text.clone();
+
+        // Calling demangle again should not change anything
+        demangler.demangle(&mut found_string);
+
+        assert_eq!(found_string.text, first_text);
+        assert_eq!(found_string.original_text, first_original);
+        // Should only have one DemangledSymbol tag
+        assert_eq!(
+            found_string
+                .tags
+                .iter()
+                .filter(|t| matches!(t, Tag::DemangledSymbol))
+                .count(),
+            1
+        );
+    }
+
+    // C++ demangling tests
+
+    #[test]
+    fn test_is_mangled_cpp_symbols() {
+        let demangler = SymbolDemangler::new();
+
+        // C++ Itanium ABI mangled symbols
+        assert!(demangler.is_mangled("_ZN3foo3barEv")); // foo::bar()
+        assert!(demangler.is_mangled("_Z3foov")); // foo()
+        assert!(demangler.is_mangled("_ZN9__gnu_cxx13new_allocatorIcE10deallocateEPcm"));
+        assert!(demangler.is_mangled("_ZNSt6vectorIiSaIiEE9push_backERKi"));
+        assert!(demangler.is_mangled("_ZTV5MyClass")); // vtable for MyClass
+        assert!(demangler.is_mangled("_ZTI5MyClass")); // typeinfo for MyClass
+    }
+
+    #[test]
+    fn test_demangle_cpp_symbol() {
+        let demangler = SymbolDemangler::new();
+        let mut found_string = create_test_string("_ZN3foo3barEv");
+
+        demangler.demangle(&mut found_string);
+
+        // Should have been demangled
+        assert!(found_string.original_text.is_some());
+        assert_eq!(
+            found_string.original_text.as_ref().unwrap(),
+            "_ZN3foo3barEv"
+        );
+        assert!(found_string.tags.contains(&Tag::DemangledSymbol));
+        // Demangled text should contain "foo" and "bar"
+        assert!(found_string.text.contains("foo"));
+        assert!(found_string.text.contains("bar"));
+    }
+
+    #[test]
+    fn test_try_demangle_cpp_success() {
+        let demangler = SymbolDemangler::new();
+
+        // Simple C++ function
+        let result = demangler.try_demangle("_Z3foov");
+        assert!(result.is_some());
+        let demangled = result.unwrap();
+        assert!(demangled.contains("foo"));
+
+        // Namespaced C++ function
+        let result = demangler.try_demangle("_ZN3foo3barEv");
+        assert!(result.is_some());
+        let demangled = result.unwrap();
+        assert!(demangled.contains("foo"));
+        assert!(demangled.contains("bar"));
+    }
+
+    #[test]
+    fn test_demangle_cpp_with_parameters() {
+        let demangler = SymbolDemangler::new();
+
+        // C++ function with int parameter: void foo(int)
+        let result = demangler.try_demangle("_Z3fooi");
+        assert!(result.is_some());
+        let demangled = result.unwrap();
+        assert!(demangled.contains("foo"));
+        assert!(demangled.contains("int"));
+    }
+
+    #[test]
+    fn test_demangle_cpp_template() {
+        let demangler = SymbolDemangler::new();
+
+        // C++ template: std::vector<int>
+        let result = demangler.try_demangle("_ZNSt6vectorIiSaIiEEC1Ev");
+        assert!(result.is_some());
+        let demangled = result.unwrap();
+        assert!(demangled.contains("vector"));
+    }
+
+    #[test]
+    fn test_cpp_symbol_in_found_string() {
+        let demangler = SymbolDemangler::new();
+        let mut found_string = create_test_string("_Z3fooi");
+        found_string.tags.push(Tag::Export);
+
+        demangler.demangle(&mut found_string);
+
+        // Should have been demangled and preserved existing tags
+        assert!(found_string.original_text.is_some());
+        assert!(found_string.tags.contains(&Tag::Export));
+        assert!(found_string.tags.contains(&Tag::DemangledSymbol));
+        assert!(found_string.text.contains("foo"));
+    }
+}
diff --git a/src/extraction/dedup.rs b/src/extraction/dedup.rs
index d7ad2b7..b25bae0 100644
--- a/src/extraction/dedup.rs
+++ b/src/extraction/dedup.rs
@@ -39,6 +39,9 @@ pub struct StringOccurrence {
     pub rva: Option<u64>,
     /// Section name where string was found
     pub section: Option<String>,
+    /// Original text before demangling (if applicable)
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub original_text: Option<String>,
     /// Extraction source type
     pub source: StringSource,
     /// Tags from this specific occurrence
@@ -80,7 +83,7 @@ pub struct StringOccurrence {
 pub fn deduplicate(
     strings: Vec<FoundString>,
     dedup_threshold: Option<usize>,
-    preserve_all_occurrences: bool,
+    _preserve_all_occurrences: bool,
 ) -> Vec<CanonicalString> {
     if strings.is_empty() {
         return Vec::new();
@@ -109,22 +112,10 @@ pub fn deduplicate(
             // All strings in group have same encoding, use first one
             let encoding = found_strings[0].encoding;
 
-            let occurrences: Vec<StringOccurrence> = if preserve_all_occurrences {
-                // Store full occurrence metadata
-                found_strings
-                    .into_iter()
-                    .map(found_string_to_occurrence)
-                    .collect()
-            } else {
-                // Store only the first occurrence as representative, but we still need
-                // the count for scoring, so we'll keep all but mark them as "count only"
-                // For now, we'll still store all occurrences but this could be optimized
-                // to store just a count field in the future
-                found_strings
-                    .into_iter()
-                    .map(found_string_to_occurrence)
-                    .collect()
-            };
+            let occurrences: Vec<StringOccurrence> = found_strings
+                .into_iter()
+                .map(found_string_to_occurrence)
+                .collect();
 
             let merged_tags = merge_tags(&occurrences);
 
@@ -254,6 +245,7 @@ pub fn found_string_to_occurrence(fs: FoundString) -> StringOccurrence {
         offset: fs.offset,
         rva: fs.rva,
         section: fs.section,
+        original_text: fs.original_text,
         source: fs.source,
         original_tags: fs.tags,
         original_score: fs.score,
@@ -282,7 +274,7 @@ impl CanonicalString {
 
         FoundString {
             text: self.text.clone(),
-            original_text: None,
+            original_text: first_occurrence.original_text.clone(),
             encoding: self.encoding,
             offset: first_occurrence.offset,
             rva: first_occurrence.rva,
diff --git a/src/extraction/mod.rs b/src/extraction/mod.rs
index 19b5038..6b3a85f 100644
--- a/src/extraction/mod.rs
+++ b/src/extraction/mod.rs
@@ -123,6 +123,7 @@
 //! let load_command_strings = extract_load_command_strings(&macho_data);
 //! ```
 
+use crate::classification::{SemanticClassifier, SymbolDemangler};
 use crate::types::{
     ContainerInfo, Encoding, FoundString, Result, SectionInfo, SectionType, StringSource,
 };
@@ -147,6 +148,20 @@ pub use utf16::{
     extract_utf16_strings,
 };
 
+fn apply_semantic_enrichment(strings: &mut [FoundString]) {
+    let classifier = SemanticClassifier::new();
+    let demangler = SymbolDemangler::new();
+    for string in strings {
+        demangler.demangle(string);
+        let tags = classifier.classify(string);
+        for tag in tags {
+            if !string.tags.contains(&tag) {
+                string.tags.push(tag);
+            }
+        }
+    }
+}
+
 /// Configuration for string extraction
 ///
 /// Controls various aspects of the extraction process including minimum/maximum
@@ -521,6 +536,9 @@ impl StringExtractor for BasicExtractor {
             }
         }
 
+        // Apply demangling and semantic classification before deduplication
+        apply_semantic_enrichment(&mut all_strings);
+
         // Apply deduplication if enabled
         if config.enable_deduplication {
             let canonical_strings = deduplicate(
@@ -625,6 +643,9 @@ impl StringExtractor for BasicExtractor {
             }
         }
 
+        // Apply demangling and semantic classification before deduplication
+        apply_semantic_enrichment(&mut all_strings);
+
         // Apply deduplication if enabled, otherwise convert each string to a canonical form
         if config.enable_deduplication {
             Ok(deduplicate(
diff --git a/src/types.rs b/src/types.rs
index 5347f33..b8cdfb0 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -32,6 +32,8 @@ pub enum Tag {
     FormatString,
     #[serde(rename = "user-agent-ish")]
     UserAgent,
+    #[serde(rename = "demangled")]
+    DemangledSymbol,
     Import,
     Export,
     Version,
diff --git a/tests/snapshots/classification_integration__classification_snapshots.snap b/tests/snapshots/classification_integration__classification_snapshots.snap
index 21b9b32..f110d38 100644
--- a/tests/snapshots/classification_integration__classification_snapshots.snap
+++ b/tests/snapshots/classification_integration__classification_snapshots.snap
@@ -1,6 +1,5 @@
 ---
 source: tests/classification_integration.rs
-assertion_line: 150
 expression: snapshot
 ---
 [