From 5a077682700b995d1f1a25aef60afcfce71a8be5 Mon Sep 17 00:00:00 2001 From: Maarten Deprez Date: Fri, 20 Mar 2026 15:49:21 +0100 Subject: [PATCH 1/2] Add support for multi-threaded xpath evaluation. --- src/readonly.rs | 8 +++++++- src/readonly/context.rs | 25 +++++++++++++++++++++++++ src/readonly/document.rs | 16 ++++++++++++++++ src/readonly/tree.rs | 10 +++++++++- src/readonly/xpath.rs | 16 ++++++++++++++++ src/tree/document.rs | 7 ++++++- 6 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 src/readonly/context.rs create mode 100644 src/readonly/document.rs create mode 100644 src/readonly/xpath.rs diff --git a/src/readonly.rs b/src/readonly.rs index e3d3893553..0516a5cbb9 100644 --- a/src/readonly.rs +++ b/src/readonly.rs @@ -1,3 +1,9 @@ +mod context; +mod document; mod tree; +mod xpath; -pub use self::tree::RoNode; +pub use context::RoContext; +pub use document::RoDocument; +pub use tree::RoNode; +pub use xpath::RoObject; diff --git a/src/readonly/context.rs b/src/readonly/context.rs new file mode 100644 index 0000000000..8b571d43d0 --- /dev/null +++ b/src/readonly/context.rs @@ -0,0 +1,25 @@ +use crate::{ + readonly::{RoDocument, RoNode, RoObject}, + xpath::Context, +}; + +/// A read-only libxml2 Context +#[derive(Clone)] +pub struct RoContext(Context); + +// SAFETY: we promise to only provide methods that need read-only access. +unsafe impl Sync for RoContext {} +unsafe impl Send for RoContext {} + +impl RoContext { + /// create a read-only xpath context for a document + pub fn new(owner: &RoDocument) -> Result { + let context = Context::new(&owner.0)?; + Ok(Self(context)) + } + + /// evaluate an xpath on a context RoNode + pub fn node_evaluate_readonly(&self, xpath: &str, node: RoNode) -> Result { + self.0.node_evaluate_readonly(xpath, node).map(RoObject) + } +} diff --git a/src/readonly/document.rs b/src/readonly/document.rs new file mode 100644 index 0000000000..d261d62c62 --- /dev/null +++ b/src/readonly/document.rs @@ -0,0 +1,16 @@ +use crate::{readonly::RoNode, tree::Document}; + +/// A read-only libxml2 Document +#[derive(Clone)] +pub struct RoDocument(pub(crate) Document); + +// SAFETY: we promise to only provide methods that need read-only access. +unsafe impl Sync for RoDocument {} +unsafe impl Send for RoDocument {} + +impl RoDocument { + /// Get the root element of the document (read-only) + pub fn get_root_readonly(&self) -> Option { + self.0.get_root_readonly() + } +} diff --git a/src/readonly/tree.rs b/src/readonly/tree.rs index 06bcc8fd0c..a390981f64 100644 --- a/src/readonly/tree.rs +++ b/src/readonly/tree.rs @@ -6,9 +6,10 @@ use std::str; use crate::bindings::*; use crate::c_helpers::*; +use crate::readonly::{RoContext, RoDocument}; +use crate::tree::Document; use crate::tree::namespace::Namespace; use crate::tree::nodetype::NodeType; -use crate::tree::Document; use crate::xpath::Context; /// Lightweight struct for read-only parallel processing @@ -518,6 +519,13 @@ impl RoNode { Ok(evaluated.get_readonly_nodes_as_vec()) } + /// find read-only nodes via xpath, at the specified node and a given document + pub fn findnodes_readonly(self, xpath: &str, owner: &RoDocument) -> Result, ()> { + let context = RoContext::new(owner)?; + let evaluated = context.node_evaluate_readonly(xpath, self)?; + Ok(evaluated.get_readonly_nodes_as_vec()) + } + /// Read-only nodes are always linked pub fn is_unlinked(self) -> bool { false diff --git a/src/readonly/xpath.rs b/src/readonly/xpath.rs new file mode 100644 index 0000000000..aae95a96e3 --- /dev/null +++ b/src/readonly/xpath.rs @@ -0,0 +1,16 @@ +use crate::{readonly::RoNode, xpath::Object}; + +/// Read-only version of the xpath object. +#[derive(Debug)] +pub struct RoObject(pub(crate) Object); + +// SAFETY: we promise to only provide methods that need read-only access. +unsafe impl Sync for RoObject {} +unsafe impl Send for RoObject {} + +impl RoObject { + /// returns the result set as a vector of `RoNode` objects + pub fn get_readonly_nodes_as_vec(&self) -> Vec { + self.0.get_readonly_nodes_as_vec() + } +} diff --git a/src/tree/document.rs b/src/tree/document.rs index ca33df530a..b584fd85d6 100644 --- a/src/tree/document.rs +++ b/src/tree/document.rs @@ -10,7 +10,7 @@ use std::rc::{Rc, Weak}; use std::str; use crate::bindings::*; -use crate::readonly::RoNode; +use crate::readonly::{RoDocument, RoNode}; use crate::tree::node::Node; pub(crate) type DocumentRef = Rc>; @@ -100,6 +100,11 @@ impl Document { } } + /// Create a readonly version of this document. + pub fn into_readonly(self) -> RoDocument { + RoDocument(self) + } + /// Obtain the underlying libxml2 `xmlDocPtr` for this Document pub fn doc_ptr(&self) -> xmlDocPtr { self.0.borrow().doc_ptr From a0479dfa7ccf7f0119297b2b03eae73c0c82af00 Mon Sep 17 00:00:00 2001 From: Maarten Deprez Date: Sun, 22 Mar 2026 12:37:58 +0100 Subject: [PATCH 2/2] Add `xpath_readonly_tests` based on `xpath_tests` and add the necessary readonly methods. --- src/readonly/context.rs | 42 +++++++ src/readonly/document.rs | 4 +- src/readonly/tree.rs | 8 +- src/readonly/xpath.rs | 21 +++- tests/xpath_readonly_tests.rs | 230 ++++++++++++++++++++++++++++++++++ 5 files changed, 301 insertions(+), 4 deletions(-) create mode 100644 tests/xpath_readonly_tests.rs diff --git a/src/readonly/context.rs b/src/readonly/context.rs index 8b571d43d0..7318cb9b50 100644 --- a/src/readonly/context.rs +++ b/src/readonly/context.rs @@ -18,8 +18,50 @@ impl RoContext { Ok(Self(context)) } + /// evaluate an xpath + pub fn evaluate(&self, xpath: &str) -> Result { + self.0.evaluate(xpath).map(RoObject) + } + + ///evaluate an xpath on a context Node + pub fn node_evaluate(&self, xpath: &str, node: &RoNode) -> Result { + self.0.node_evaluate_readonly(xpath, *node).map(RoObject) + } + /// evaluate an xpath on a context RoNode pub fn node_evaluate_readonly(&self, xpath: &str, node: RoNode) -> Result { self.0.node_evaluate_readonly(xpath, node).map(RoObject) } + + /// find nodes via xpath, at a specified node or the document root + pub fn findnodes(&self, xpath: &str, node_opt: Option<&RoNode>) -> Result, ()> { + // Note: we cannot implemented this as `self.0.findnodes(...)` because that + // method takes `&mut self`. + let evaluated = if let Some(node) = node_opt { + self.node_evaluate(xpath, node)? + } else { + self.evaluate(xpath)? + }; + Ok(evaluated.get_nodes_as_vec()) + } + + /// find literal values via xpath, at a specified node or the document root + pub fn findvalues(&self, xpath: &str, node_opt: Option<&RoNode>) -> Result, ()> { + let evaluated = if let Some(node) = node_opt { + self.node_evaluate(xpath, node)? + } else { + self.evaluate(xpath)? + }; + Ok(evaluated.get_nodes_as_str()) + } + + /// find a literal value via xpath, at a specified node or the document root + pub fn findvalue(&self, xpath: &str, node_opt: Option<&RoNode>) -> Result { + let evaluated = if let Some(node) = node_opt { + self.node_evaluate(xpath, node)? + } else { + self.evaluate(xpath)? + }; + Ok(evaluated.to_string()) + } } diff --git a/src/readonly/document.rs b/src/readonly/document.rs index d261d62c62..71228358c3 100644 --- a/src/readonly/document.rs +++ b/src/readonly/document.rs @@ -9,8 +9,8 @@ unsafe impl Sync for RoDocument {} unsafe impl Send for RoDocument {} impl RoDocument { - /// Get the root element of the document (read-only) - pub fn get_root_readonly(&self) -> Option { + /// Get the root element of the document + pub fn get_root_element(&self) -> Option { self.0.get_root_readonly() } } diff --git a/src/readonly/tree.rs b/src/readonly/tree.rs index a390981f64..0b083981d2 100644 --- a/src/readonly/tree.rs +++ b/src/readonly/tree.rs @@ -523,7 +523,13 @@ impl RoNode { pub fn findnodes_readonly(self, xpath: &str, owner: &RoDocument) -> Result, ()> { let context = RoContext::new(owner)?; let evaluated = context.node_evaluate_readonly(xpath, self)?; - Ok(evaluated.get_readonly_nodes_as_vec()) + Ok(evaluated.get_nodes_as_vec()) + } + + /// find String values via xpath, at a specified node and a given document + pub fn findvalues_readonly(&self, xpath: &str, owner: &RoDocument) -> Result, ()> { + let context = RoContext::new(owner)?; + context.findvalues(xpath, Some(self)) } /// Read-only nodes are always linked diff --git a/src/readonly/xpath.rs b/src/readonly/xpath.rs index aae95a96e3..9cb7253a17 100644 --- a/src/readonly/xpath.rs +++ b/src/readonly/xpath.rs @@ -1,3 +1,5 @@ +use std::fmt; + use crate::{readonly::RoNode, xpath::Object}; /// Read-only version of the xpath object. @@ -10,7 +12,24 @@ unsafe impl Send for RoObject {} impl RoObject { /// returns the result set as a vector of `RoNode` objects - pub fn get_readonly_nodes_as_vec(&self) -> Vec { + pub fn get_nodes_as_vec(&self) -> Vec { self.0.get_readonly_nodes_as_vec() } + + /// returns the result set as a vector of Strings + pub fn get_nodes_as_str(&self) -> Vec { + self.0.get_nodes_as_str() + } + + /// get the number of nodes in the result set + pub fn get_number_of_nodes(&self) -> usize { + self.0.get_number_of_nodes() + } +} + +impl fmt::Display for RoObject { + /// use if the XPath used was meant to return a string, such as string(//foo/@attr) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } } diff --git a/tests/xpath_readonly_tests.rs b/tests/xpath_readonly_tests.rs new file mode 100644 index 0000000000..f55daa6b18 --- /dev/null +++ b/tests/xpath_readonly_tests.rs @@ -0,0 +1,230 @@ +//! xpath module tests +//! + +use libxml::parser::Parser; +use libxml::readonly::RoContext; + +#[test] +/// Test the evaluation of an xpath expression yields the correct number of nodes +fn xpath_result_number_correct() { + let parser = Parser::default(); + let doc_result = parser.parse_file("tests/resources/file01.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let context = RoContext::new(&doc).unwrap(); + + let result1 = context.evaluate("//child").unwrap(); + assert_eq!(result1.get_number_of_nodes(), 2); + assert_eq!(result1.get_nodes_as_vec().len(), 2); + + let result2 = context.evaluate("//nonexistent").unwrap(); + assert_eq!(result2.get_number_of_nodes(), 0); + assert_eq!(result2.get_nodes_as_vec().len(), 0); +} + +// #[test] +// /// Test xpath with namespaces +// fn xpath_with_namespaces() { +// let parser = Parser::default(); +// let doc_result = parser.parse_file("tests/resources/simple_namespaces.xml"); +// assert!(doc_result.is_ok()); + +// let doc = doc_result.unwrap().into_readonly(); +// let context = RoContext::new(&doc).unwrap(); +// assert!( +// context +// .register_namespace("h", "http://example.com/ns/hello") +// .is_ok() +// ); +// assert!( +// context +// .register_namespace("f", "http://example.com/ns/farewell") +// .is_ok() +// ); +// assert!( +// context +// .register_namespace("r", "http://example.com/ns/root") +// .is_ok() +// ); +// let result_h_td = context.evaluate("//h:td").unwrap(); +// assert_eq!(result_h_td.get_number_of_nodes(), 3); +// assert_eq!(result_h_td.get_nodes_as_vec().len(), 3); + +// let result_h_table = context.evaluate("//h:table").unwrap(); +// assert_eq!(result_h_table.get_number_of_nodes(), 2); +// assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); + +// let result_f_footer = context.evaluate("//f:footer").unwrap(); +// assert_eq!(result_f_footer.get_number_of_nodes(), 2); +// assert_eq!(result_f_footer.get_nodes_as_vec().len(), 2); + +// let result_r = context.evaluate("//r:*").unwrap(); +// assert_eq!(result_r.get_number_of_nodes(), 1); +// assert_eq!(result_r.get_nodes_as_vec().len(), 1); + +// let result_h = context.evaluate("//h:*").unwrap(); +// assert_eq!(result_h.get_number_of_nodes(), 7); +// assert_eq!(result_h.get_nodes_as_vec().len(), 7); + +// let result_f = context.evaluate("//f:*").unwrap(); +// assert_eq!(result_f.get_number_of_nodes(), 4); +// assert_eq!(result_f.get_nodes_as_vec().len(), 4); + +// let result_all = context.evaluate("//*").unwrap(); +// assert_eq!(result_all.get_number_of_nodes(), 12); +// assert_eq!(result_all.get_nodes_as_vec().len(), 12); + +// let result_h_table = context.evaluate("//table").unwrap(); +// assert_eq!(result_h_table.get_number_of_nodes(), 0); +// assert_eq!(result_h_table.get_nodes_as_vec().len(), 0); + +// assert!(doc.as_node().recursively_remove_namespaces().is_ok()); +// let result_h_table = context.evaluate("//table").unwrap(); +// assert_eq!(result_h_table.get_number_of_nodes(), 2); +// assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); +// } + +#[test] +/// Test that an xpath expression finds the correct node and +/// that the class names are interpreted correctly. +fn class_names() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file02.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let context = RoContext::new(&doc).unwrap(); + + let p_result = context.evaluate("/html/body/p"); + assert!(p_result.is_ok()); + let p = p_result.unwrap(); + assert_eq!(p.get_number_of_nodes(), 1); + + let node = &p.get_nodes_as_vec()[0]; + let names = node.get_class_names(); + assert_eq!(names.len(), 2); + assert!(names.contains("paragraph")); + assert!(names.contains("important")); + assert!(!names.contains("nonsense")); +} + +#[test] +/// Test that an xpath string() function processed correctly +fn xpath_string_function() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file01.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let context = RoContext::new(&doc).unwrap(); + + let p_result = context.evaluate("string(//root//child[1]/@attribute)"); + assert!(p_result.is_ok()); + let p = p_result.unwrap(); + // Not a node really + assert_eq!(p.get_number_of_nodes(), 0); + let content = p.to_string(); + assert_eq!(content, "value"); +} + +#[test] +/// Test that the dual findnodes interfaces are operational +fn findnodes_interfaces() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file02.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + + // Xpath interface + let context = RoContext::new(&doc).unwrap(); + let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); + let p_result = context.findnodes("p", body.first()); + assert!(p_result.is_ok()); + let p = p_result.unwrap(); + assert_eq!(p.len(), 1); + + // Node interface + let body_node = body.first().unwrap(); + let p2_result = body_node.findnodes_readonly("p", &doc); + assert!(p2_result.is_ok()); + let p2 = p2_result.unwrap(); + assert_eq!(p2.len(), 1); +} + +#[test] +/// Clone is safe on Context objects +fn safe_context_clone() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file02.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + + // Xpath interface + let context = RoContext::new(&doc).unwrap(); + let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); + assert_eq!(body.len(), 1); + let context2 = context.clone(); + let body2 = context2.evaluate("/html/body").unwrap().get_nodes_as_vec(); + assert_eq!(body2.len(), 1); +} + +// #[test] +// fn cleanup_safely_unlinked_xpath_nodes() { +// let p = Parser::default(); +// let doc_result = p.parse_string(r##" +// +// +// +// +// +// +// +// +// "##); +// assert!(doc_result.is_ok(), "successfully parsed SVG snippet"); +// let doc = doc_result.unwrap(); +// let mut xpath = libxml::xpath::Context::new(&doc).unwrap(); +// xpath +// .register_namespace("svg", "http://www.w3.org/2000/svg") +// .unwrap(); +// for mut k in xpath.findnodes("//svg:c", None).unwrap() { +// k.unlink_node(); +// } +// drop(xpath); +// drop(doc); +// assert!(true, "Drops went OK."); +// } + +#[test] +fn xpath_find_string_values() { + let parser = Parser::default(); + let doc_result = parser.parse_file("tests/resources/ids.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let xpath = RoContext::new(&doc).unwrap(); + match doc.get_root_element() { + Some(root) => { + let tests = root.get_child_elements(); + let empty_test = &tests[0]; + let ids_test = &tests[1]; + let empty_values = xpath.findvalues(".//@xml:id", Some(empty_test)); + assert_eq!(empty_values, Ok(Vec::new())); + let ids_values = xpath.findvalues(".//@xml:id", Some(ids_test)); + let expected_ids = Ok(vec![ + String::from("start"), + String::from("mid"), + String::from("end"), + ]); + assert_eq!(ids_values, expected_ids); + let node_ids_values = ids_test.findvalues_readonly(".//@xml:id", &doc); + assert_eq!(node_ids_values, expected_ids); + } + _ => { + panic!("Document fails to obtain root!"); + } + } +}