diff --git a/src/readonly.rs b/src/readonly.rs index e3d389355..0516a5cbb 100644 --- a/src/readonly.rs +++ b/src/readonly.rs @@ -1,3 +1,9 @@ +mod context; +mod document; mod tree; +mod xpath; -pub use self::tree::RoNode; +pub use context::RoContext; +pub use document::RoDocument; +pub use tree::RoNode; +pub use xpath::RoObject; diff --git a/src/readonly/context.rs b/src/readonly/context.rs new file mode 100644 index 000000000..7318cb9b5 --- /dev/null +++ b/src/readonly/context.rs @@ -0,0 +1,67 @@ +use crate::{ + readonly::{RoDocument, RoNode, RoObject}, + xpath::Context, +}; + +/// A read-only libxml2 Context +#[derive(Clone)] +pub struct RoContext(Context); + +// SAFETY: we promise to only provide methods that need read-only access. +unsafe impl Sync for RoContext {} +unsafe impl Send for RoContext {} + +impl RoContext { + /// create a read-only xpath context for a document + pub fn new(owner: &RoDocument) -> Result { + let context = Context::new(&owner.0)?; + Ok(Self(context)) + } + + /// evaluate an xpath + pub fn evaluate(&self, xpath: &str) -> Result { + self.0.evaluate(xpath).map(RoObject) + } + + ///evaluate an xpath on a context Node + pub fn node_evaluate(&self, xpath: &str, node: &RoNode) -> Result { + self.0.node_evaluate_readonly(xpath, *node).map(RoObject) + } + + /// evaluate an xpath on a context RoNode + pub fn node_evaluate_readonly(&self, xpath: &str, node: RoNode) -> Result { + self.0.node_evaluate_readonly(xpath, node).map(RoObject) + } + + /// find nodes via xpath, at a specified node or the document root + pub fn findnodes(&self, xpath: &str, node_opt: Option<&RoNode>) -> Result, ()> { + // Note: we cannot implemented this as `self.0.findnodes(...)` because that + // method takes `&mut self`. + let evaluated = if let Some(node) = node_opt { + self.node_evaluate(xpath, node)? + } else { + self.evaluate(xpath)? + }; + Ok(evaluated.get_nodes_as_vec()) + } + + /// find literal values via xpath, at a specified node or the document root + pub fn findvalues(&self, xpath: &str, node_opt: Option<&RoNode>) -> Result, ()> { + let evaluated = if let Some(node) = node_opt { + self.node_evaluate(xpath, node)? + } else { + self.evaluate(xpath)? + }; + Ok(evaluated.get_nodes_as_str()) + } + + /// find a literal value via xpath, at a specified node or the document root + pub fn findvalue(&self, xpath: &str, node_opt: Option<&RoNode>) -> Result { + let evaluated = if let Some(node) = node_opt { + self.node_evaluate(xpath, node)? + } else { + self.evaluate(xpath)? + }; + Ok(evaluated.to_string()) + } +} diff --git a/src/readonly/document.rs b/src/readonly/document.rs new file mode 100644 index 000000000..71228358c --- /dev/null +++ b/src/readonly/document.rs @@ -0,0 +1,16 @@ +use crate::{readonly::RoNode, tree::Document}; + +/// A read-only libxml2 Document +#[derive(Clone)] +pub struct RoDocument(pub(crate) Document); + +// SAFETY: we promise to only provide methods that need read-only access. +unsafe impl Sync for RoDocument {} +unsafe impl Send for RoDocument {} + +impl RoDocument { + /// Get the root element of the document + pub fn get_root_element(&self) -> Option { + self.0.get_root_readonly() + } +} diff --git a/src/readonly/tree.rs b/src/readonly/tree.rs index 06bcc8fd0..0b083981d 100644 --- a/src/readonly/tree.rs +++ b/src/readonly/tree.rs @@ -6,9 +6,10 @@ use std::str; use crate::bindings::*; use crate::c_helpers::*; +use crate::readonly::{RoContext, RoDocument}; +use crate::tree::Document; use crate::tree::namespace::Namespace; use crate::tree::nodetype::NodeType; -use crate::tree::Document; use crate::xpath::Context; /// Lightweight struct for read-only parallel processing @@ -518,6 +519,19 @@ impl RoNode { Ok(evaluated.get_readonly_nodes_as_vec()) } + /// find read-only nodes via xpath, at the specified node and a given document + pub fn findnodes_readonly(self, xpath: &str, owner: &RoDocument) -> Result, ()> { + let context = RoContext::new(owner)?; + let evaluated = context.node_evaluate_readonly(xpath, self)?; + Ok(evaluated.get_nodes_as_vec()) + } + + /// find String values via xpath, at a specified node and a given document + pub fn findvalues_readonly(&self, xpath: &str, owner: &RoDocument) -> Result, ()> { + let context = RoContext::new(owner)?; + context.findvalues(xpath, Some(self)) + } + /// Read-only nodes are always linked pub fn is_unlinked(self) -> bool { false diff --git a/src/readonly/xpath.rs b/src/readonly/xpath.rs new file mode 100644 index 000000000..9cb7253a1 --- /dev/null +++ b/src/readonly/xpath.rs @@ -0,0 +1,35 @@ +use std::fmt; + +use crate::{readonly::RoNode, xpath::Object}; + +/// Read-only version of the xpath object. +#[derive(Debug)] +pub struct RoObject(pub(crate) Object); + +// SAFETY: we promise to only provide methods that need read-only access. +unsafe impl Sync for RoObject {} +unsafe impl Send for RoObject {} + +impl RoObject { + /// returns the result set as a vector of `RoNode` objects + pub fn get_nodes_as_vec(&self) -> Vec { + self.0.get_readonly_nodes_as_vec() + } + + /// returns the result set as a vector of Strings + pub fn get_nodes_as_str(&self) -> Vec { + self.0.get_nodes_as_str() + } + + /// get the number of nodes in the result set + pub fn get_number_of_nodes(&self) -> usize { + self.0.get_number_of_nodes() + } +} + +impl fmt::Display for RoObject { + /// use if the XPath used was meant to return a string, such as string(//foo/@attr) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} diff --git a/src/tree/document.rs b/src/tree/document.rs index ca33df530..b584fd85d 100644 --- a/src/tree/document.rs +++ b/src/tree/document.rs @@ -10,7 +10,7 @@ use std::rc::{Rc, Weak}; use std::str; use crate::bindings::*; -use crate::readonly::RoNode; +use crate::readonly::{RoDocument, RoNode}; use crate::tree::node::Node; pub(crate) type DocumentRef = Rc>; @@ -100,6 +100,11 @@ impl Document { } } + /// Create a readonly version of this document. + pub fn into_readonly(self) -> RoDocument { + RoDocument(self) + } + /// Obtain the underlying libxml2 `xmlDocPtr` for this Document pub fn doc_ptr(&self) -> xmlDocPtr { self.0.borrow().doc_ptr diff --git a/tests/xpath_readonly_tests.rs b/tests/xpath_readonly_tests.rs new file mode 100644 index 000000000..f55daa6b1 --- /dev/null +++ b/tests/xpath_readonly_tests.rs @@ -0,0 +1,230 @@ +//! xpath module tests +//! + +use libxml::parser::Parser; +use libxml::readonly::RoContext; + +#[test] +/// Test the evaluation of an xpath expression yields the correct number of nodes +fn xpath_result_number_correct() { + let parser = Parser::default(); + let doc_result = parser.parse_file("tests/resources/file01.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let context = RoContext::new(&doc).unwrap(); + + let result1 = context.evaluate("//child").unwrap(); + assert_eq!(result1.get_number_of_nodes(), 2); + assert_eq!(result1.get_nodes_as_vec().len(), 2); + + let result2 = context.evaluate("//nonexistent").unwrap(); + assert_eq!(result2.get_number_of_nodes(), 0); + assert_eq!(result2.get_nodes_as_vec().len(), 0); +} + +// #[test] +// /// Test xpath with namespaces +// fn xpath_with_namespaces() { +// let parser = Parser::default(); +// let doc_result = parser.parse_file("tests/resources/simple_namespaces.xml"); +// assert!(doc_result.is_ok()); + +// let doc = doc_result.unwrap().into_readonly(); +// let context = RoContext::new(&doc).unwrap(); +// assert!( +// context +// .register_namespace("h", "http://example.com/ns/hello") +// .is_ok() +// ); +// assert!( +// context +// .register_namespace("f", "http://example.com/ns/farewell") +// .is_ok() +// ); +// assert!( +// context +// .register_namespace("r", "http://example.com/ns/root") +// .is_ok() +// ); +// let result_h_td = context.evaluate("//h:td").unwrap(); +// assert_eq!(result_h_td.get_number_of_nodes(), 3); +// assert_eq!(result_h_td.get_nodes_as_vec().len(), 3); + +// let result_h_table = context.evaluate("//h:table").unwrap(); +// assert_eq!(result_h_table.get_number_of_nodes(), 2); +// assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); + +// let result_f_footer = context.evaluate("//f:footer").unwrap(); +// assert_eq!(result_f_footer.get_number_of_nodes(), 2); +// assert_eq!(result_f_footer.get_nodes_as_vec().len(), 2); + +// let result_r = context.evaluate("//r:*").unwrap(); +// assert_eq!(result_r.get_number_of_nodes(), 1); +// assert_eq!(result_r.get_nodes_as_vec().len(), 1); + +// let result_h = context.evaluate("//h:*").unwrap(); +// assert_eq!(result_h.get_number_of_nodes(), 7); +// assert_eq!(result_h.get_nodes_as_vec().len(), 7); + +// let result_f = context.evaluate("//f:*").unwrap(); +// assert_eq!(result_f.get_number_of_nodes(), 4); +// assert_eq!(result_f.get_nodes_as_vec().len(), 4); + +// let result_all = context.evaluate("//*").unwrap(); +// assert_eq!(result_all.get_number_of_nodes(), 12); +// assert_eq!(result_all.get_nodes_as_vec().len(), 12); + +// let result_h_table = context.evaluate("//table").unwrap(); +// assert_eq!(result_h_table.get_number_of_nodes(), 0); +// assert_eq!(result_h_table.get_nodes_as_vec().len(), 0); + +// assert!(doc.as_node().recursively_remove_namespaces().is_ok()); +// let result_h_table = context.evaluate("//table").unwrap(); +// assert_eq!(result_h_table.get_number_of_nodes(), 2); +// assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); +// } + +#[test] +/// Test that an xpath expression finds the correct node and +/// that the class names are interpreted correctly. +fn class_names() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file02.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let context = RoContext::new(&doc).unwrap(); + + let p_result = context.evaluate("/html/body/p"); + assert!(p_result.is_ok()); + let p = p_result.unwrap(); + assert_eq!(p.get_number_of_nodes(), 1); + + let node = &p.get_nodes_as_vec()[0]; + let names = node.get_class_names(); + assert_eq!(names.len(), 2); + assert!(names.contains("paragraph")); + assert!(names.contains("important")); + assert!(!names.contains("nonsense")); +} + +#[test] +/// Test that an xpath string() function processed correctly +fn xpath_string_function() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file01.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let context = RoContext::new(&doc).unwrap(); + + let p_result = context.evaluate("string(//root//child[1]/@attribute)"); + assert!(p_result.is_ok()); + let p = p_result.unwrap(); + // Not a node really + assert_eq!(p.get_number_of_nodes(), 0); + let content = p.to_string(); + assert_eq!(content, "value"); +} + +#[test] +/// Test that the dual findnodes interfaces are operational +fn findnodes_interfaces() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file02.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + + // Xpath interface + let context = RoContext::new(&doc).unwrap(); + let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); + let p_result = context.findnodes("p", body.first()); + assert!(p_result.is_ok()); + let p = p_result.unwrap(); + assert_eq!(p.len(), 1); + + // Node interface + let body_node = body.first().unwrap(); + let p2_result = body_node.findnodes_readonly("p", &doc); + assert!(p2_result.is_ok()); + let p2 = p2_result.unwrap(); + assert_eq!(p2.len(), 1); +} + +#[test] +/// Clone is safe on Context objects +fn safe_context_clone() { + let parser = Parser::default_html(); + let doc_result = parser.parse_file("tests/resources/file02.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + + // Xpath interface + let context = RoContext::new(&doc).unwrap(); + let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); + assert_eq!(body.len(), 1); + let context2 = context.clone(); + let body2 = context2.evaluate("/html/body").unwrap().get_nodes_as_vec(); + assert_eq!(body2.len(), 1); +} + +// #[test] +// fn cleanup_safely_unlinked_xpath_nodes() { +// let p = Parser::default(); +// let doc_result = p.parse_string(r##" +// +// +// +// +// +// +// +// +// "##); +// assert!(doc_result.is_ok(), "successfully parsed SVG snippet"); +// let doc = doc_result.unwrap(); +// let mut xpath = libxml::xpath::Context::new(&doc).unwrap(); +// xpath +// .register_namespace("svg", "http://www.w3.org/2000/svg") +// .unwrap(); +// for mut k in xpath.findnodes("//svg:c", None).unwrap() { +// k.unlink_node(); +// } +// drop(xpath); +// drop(doc); +// assert!(true, "Drops went OK."); +// } + +#[test] +fn xpath_find_string_values() { + let parser = Parser::default(); + let doc_result = parser.parse_file("tests/resources/ids.xml"); + assert!(doc_result.is_ok()); + let doc = doc_result.unwrap().into_readonly(); + let xpath = RoContext::new(&doc).unwrap(); + match doc.get_root_element() { + Some(root) => { + let tests = root.get_child_elements(); + let empty_test = &tests[0]; + let ids_test = &tests[1]; + let empty_values = xpath.findvalues(".//@xml:id", Some(empty_test)); + assert_eq!(empty_values, Ok(Vec::new())); + let ids_values = xpath.findvalues(".//@xml:id", Some(ids_test)); + let expected_ids = Ok(vec![ + String::from("start"), + String::from("mid"), + String::from("end"), + ]); + assert_eq!(ids_values, expected_ids); + let node_ids_values = ids_test.findvalues_readonly(".//@xml:id", &doc); + assert_eq!(node_ids_values, expected_ids); + } + _ => { + panic!("Document fails to obtain root!"); + } + } +}