diff --git a/src/arena.rs b/src/arena.rs index 9695707..b29f9b1 100644 --- a/src/arena.rs +++ b/src/arena.rs @@ -1,46 +1,70 @@ -use crate::{Attrs, Expr, ExprKey, ExprPtr, ExprRef, Value}; -use rustc_hash::FxBuildHasher; +use crate::typing::{ArgsRef, Record, Type, TypeRef}; +use crate::{Attrs, ExprKey, ExprPtr, ExprRef, Field, RecRef, StrRef, Value, VecRef}; +use rustc_hash::{FxBuildHasher, FxHashMap}; use serde::Serialize; +use std::collections::hash_map::Entry; use std::hash::BuildHasher; -#[derive(Debug, Serialize)] -struct Slot { - attrs: Attrs, - value: Value, -} - -/// An arena-based allocator for EventQL expressions. +/// An arena-based allocator for interning strings. /// -/// The `ExprArena` provides a memory-efficient way to store and manage AST nodes -/// by using a flat vector and returning lightweight [`ExprRef`] handles. +/// Deduplicates strings by hash and returns lightweight [`StrRef`] handles for O(1) lookups. #[derive(Default, Serialize)] -pub struct ExprArena { +pub struct StringArena { #[serde(skip_serializing)] hasher: FxBuildHasher, - slots: Vec, + + cache: FxHashMap, + slots: Vec, +} + +impl StringArena { + /// Interns a string and returns its [`StrRef`]. Returns the existing reference if already interned. + pub fn alloc(&mut self, value: &str) -> StrRef { + match self.cache.entry(self.hasher.hash_one(value)) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let key = StrRef(self.slots.len()); + entry.insert(key); + self.slots.push(value.to_owned()); + + key + } + } + } + + /// Retrieves the string associated with the given [`StrRef`]. + pub fn get(&self, key: StrRef) -> &str { + &self.slots[key.0] + } + + /// Compares two interned strings for case-insensitive ASCII equality. + pub fn eq_ignore_ascii_case(&self, ka: StrRef, kb: StrRef) -> bool { + self.get(ka).eq_ignore_ascii_case(self.get(kb)) + } } -/// A view into a single node within an [`ExprArena`]. +/// An expression node stored in the [`ExprArena`]. /// -/// This struct provides access to the attributes and value of a node -/// without transferring ownership. It's typically obtained by calling [`ExprArena::get`]. -#[derive(Debug, Copy, Clone)] -pub struct Node<'a> { - /// Metadata about this expression (e.g., source position) +/// Combines the expression's metadata ([`Attrs`]) with its actual content ([`Value`]). +#[derive(Debug, Clone, Copy, Serialize)] +pub struct Expr { + /// Metadata including source position. pub attrs: Attrs, - /// The actual kind and value of the expression - pub value: &'a Value, - /// The stable reference to this node in the arena - pub node_ref: ExprRef, + /// The kind and content of this expression. + pub value: Value, } -impl<'a> Node<'a> { - pub fn as_expr(&self) -> Expr { - Expr { - attrs: self.attrs, - node_ref: self.node_ref, - } - } +/// An arena-based allocator for EventQL expressions. +/// +/// The `ExprArena` provides a memory-efficient way to store and manage AST nodes +/// by using a flat vector and returning lightweight [`ExprRef`] handles. +#[derive(Default, Serialize)] +pub struct ExprArena { + #[serde(skip_serializing)] + hasher: FxBuildHasher, + exprs: Vec, + vecs: Vec>, + recs: Vec>, } impl ExprArena { @@ -50,10 +74,10 @@ impl ExprArena { /// to create a stable [`ExprKey`], and stores it in the arena. It returns /// an [`ExprRef`] which can be used to retrieve the expression later. pub fn alloc(&mut self, attrs: Attrs, value: Value) -> ExprRef { - let key = ExprKey(self.hasher.hash_one(&value)); + let key = ExprKey(self.hasher.hash_one(value)); - let ptr = ExprPtr(self.slots.len()); - self.slots.push(Slot { attrs, value }); + let ptr = ExprPtr(self.exprs.len()); + self.exprs.push(Expr { attrs, value }); ExprRef { key, ptr } } @@ -64,12 +88,265 @@ impl ExprArena { /// /// Panics if the [`ExprRef`] contains an invalid pointer that is out of bounds /// of the arena's internal storage. - pub fn get(&self, node_ref: ExprRef) -> Node<'_> { - let slot = &self.slots[node_ref.ptr.0]; - Node { - attrs: slot.attrs, - value: &slot.value, - node_ref, + pub fn get(&self, node_ref: ExprRef) -> Expr { + self.exprs[node_ref.ptr.0] + } + + /// Allocates a vector of expression references and returns a [`VecRef`] handle. + pub fn alloc_vec(&mut self, values: Vec) -> VecRef { + let key = VecRef(self.vecs.len()); + self.vecs.push(values); + + key + } + + /// Allocates a vector of record fields and returns a [`RecRef`] handle. + pub fn alloc_rec(&mut self, values: Vec) -> RecRef { + let key = RecRef(self.recs.len()); + self.recs.push(values); + + key + } + + /// Returns the slice of expression references for the given [`VecRef`]. + pub fn vec(&self, ptr: VecRef) -> &[ExprRef] { + &self.vecs[ptr.0] + } + + /// Returns the expression reference at index `idx` within the given [`VecRef`]. + pub fn vec_get(&self, ptr: VecRef, idx: usize) -> ExprRef { + self.vecs[ptr.0][idx] + } + + /// Returns an iterator over valid indices for the given [`VecRef`]. + pub fn vec_idxes(&self, ptr: VecRef) -> impl Iterator + use<> { + 0..self.vec(ptr).len() + } + + /// Returns the vector of fields for the given [`RecRef`]. + pub fn rec(&self, ptr: RecRef) -> &Vec { + &self.recs[ptr.0] + } + + /// Returns the field at index `idx` within the given [`RecRef`]. + pub fn rec_get(&self, ptr: RecRef, idx: usize) -> Field { + self.recs[ptr.0][idx] + } + + /// Returns an iterator over valid indices for the given [`RecRef`]. + pub fn rec_idxes(&self, ptr: RecRef) -> impl Iterator + use<> { + 0..self.rec(ptr).len() + } +} + +/// An arena-based allocator for type information. +/// +/// Stores and deduplicates types, record definitions, and function argument lists. +/// Supports freezing to mark a baseline and freeing types allocated after the baseline. +#[derive(Default, Serialize)] +pub struct TypeArena { + #[serde(skip_serializing)] + args_hasher: FxBuildHasher, + + type_offset: usize, + rec_offset: usize, + + dedup_types: FxHashMap, + dedup_args: FxHashMap, + types: Vec, + pub(crate) records: Vec>, + pub(crate) args: Vec>, +} + +impl TypeArena { + /// Marks the current allocation state as the baseline. + /// + /// Subsequent calls to [`free_space`](TypeArena::free_space) will deallocate + /// only types and records allocated after this point. + pub fn freeze(&mut self) { + self.rec_offset = self.records.len(); + self.type_offset = self.types.len(); + } + + /// Frees types and records allocated after the last [`freeze`](TypeArena::freeze) call. + pub fn free_space(&mut self) { + for tpe in self.types.drain(self.type_offset..) { + self.dedup_types.remove(&tpe); + } + + for _ in self.records.drain(self.rec_offset..) {} + } + + /// Registers a type and returns a deduplicated [`TypeRef`]. Returns the existing reference if already registered. + pub fn register_type(&mut self, tpe: Type) -> TypeRef { + match self.dedup_types.entry(tpe) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let key = TypeRef(self.types.len()); + self.types.push(tpe); + entry.insert(key); + + key + } } } + + /// Allocates a fresh copy of a type. For records, this clones the record definition. + pub fn alloc_type(&mut self, tpe: Type) -> Type { + if let Type::Record(rec) = tpe { + let key = Record(self.records.len()); + // TODO: technically, a deep-clone is needed here, where properties that point to + // records should also be allocated as well. + self.records.push(self.records[rec.0].clone()); + + return Type::Record(key); + } + + tpe + } + + /// Creates an array type containing elements of the given type. + pub fn alloc_array_of(&mut self, tpe: Type) -> Type { + Type::Array(self.register_type(tpe)) + } + + /// Allocates a new record type from a map of field names to types. + pub fn alloc_record(&mut self, record: FxHashMap) -> Record { + let key = Record(self.records.len()); + self.records.push(record); + key + } + + /// Allocates a deduplicated list of function argument types and returns an [`ArgsRef`]. + pub fn alloc_args(&mut self, args: &[Type]) -> ArgsRef { + let hash = self.args_hasher.hash_one(args); + + match self.dedup_args.entry(hash) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let key = ArgsRef(self.args.len()); + entry.insert(key); + self.args.push(args.to_vec()); + + key + } + } + } + + /// Retrieves the type for the given [`TypeRef`]. + pub fn get_type(&self, key: TypeRef) -> Type { + self.types[key.0] + } + + /// Returns the field map for the given record. + pub fn get_record(&self, key: Record) -> &FxHashMap { + &self.records[key.0] + } + + /// Returns the argument type slice for the given [`ArgsRef`]. + pub fn get_args(&self, key: ArgsRef) -> &[Type] { + self.args[key.0].as_slice() + } + + /// Returns a mutable reference to the argument type slice for the given [`ArgsRef`]. + pub fn get_args_mut(&mut self, key: ArgsRef) -> &mut [Type] { + self.args[key.0].as_mut_slice() + } + + /// Returns an iterator over valid indices for the given [`ArgsRef`]. + pub fn args_idxes(&self, key: ArgsRef) -> impl Iterator + use<> { + 0..self.get_args(key).len() + } + + /// Returns the argument type at index `idx` for the given [`ArgsRef`]. + pub fn args_get(&self, key: ArgsRef, idx: usize) -> Type { + self.get_args(key)[idx] + } + + /// Returns the type of a field in the given record, or `None` if the field doesn't exist. + pub fn record_get(&self, record: Record, field: StrRef) -> Option { + self.records[record.0].get(&field).copied() + } + + /// Iterates over all (field name, type) pairs in the given record. + pub fn record_iter(&self, record: Record) -> impl Iterator { + self.records[record.0].iter().map(|(k, v)| (*k, *v)) + } + + /// Iterates over all field names in the given record. + pub fn record_keys(&self, record: Record) -> impl Iterator { + self.records[record.0].keys().copied() + } + + /// Checks whether two records have the exact same set of field names. + pub fn records_have_same_keys(&self, rec_a: Record, rec_b: Record) -> bool { + let rec_a = self.get_record(rec_a); + let rec_b = self.get_record(rec_b); + + if rec_a.is_empty() && rec_b.is_empty() { + return true; + } + + if rec_a.len() != rec_b.len() { + return false; + } + + for bk in rec_b.keys() { + if !rec_a.contains_key(bk) { + return false; + } + } + + true + } + + /// Creates an empty record type. + pub fn instantiate_record(&mut self) -> Record { + self.alloc_record(FxHashMap::default()) + } + + /// Returns `true` if the given field exists in the record. + pub fn record_field_exists(&self, record: Record, field: StrRef) -> bool { + self.records[record.0].contains_key(&field) + } + + /// Returns the hash map entry for a field in the given record, for in-place manipulation. + pub fn record_entry(&mut self, record: Record, key: StrRef) -> Entry<'_, StrRef, Type> { + self.records[record.0].entry(key) + } + + /// Sets the type of a field in the given record, inserting or updating as needed. + pub fn record_set(&mut self, record: Record, field: StrRef, value: Type) { + self.records[record.0].insert(field, value); + } + + /// Returns the number of fields in the given record. + pub fn record_len(&self, record: Record) -> usize { + self.records[record.0].len() + } + + /// Returns `true` if the given record has no fields. + pub fn record_is_empty(&self, record: Record) -> bool { + self.records[record.0].is_empty() + } +} + +/// Top-level arena that holds all memory pools for expressions, strings, and types. +#[derive(Default, Serialize)] +pub struct Arena { + pub(crate) exprs: ExprArena, + pub(crate) strings: StringArena, + pub(crate) types: TypeArena, +} + +impl Arena { + /// Freezes the type arena to mark the current state as baseline. + pub fn freeze(&mut self) { + self.types.freeze(); + } + + /// Frees types allocated after the last freeze, reclaiming memory for reuse. + pub fn free_space(&mut self) { + self.types.free_space(); + } } diff --git a/src/ast.rs b/src/ast.rs index 7b30c72..1a6c814 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -11,18 +11,10 @@ //! - [`Value`] - The various kinds of expression values (literals, operators, etc.) //! - [`Source`] - Data sources in FROM clauses //! -use crate::{ - error::AnalysisError, - token::{Operator, Token}, -}; +use crate::token::{Operator, Token}; use ordered_float::OrderedFloat; -use rustc_hash::FxHashMap; use serde::Serialize; use std::hash::{Hash, Hasher}; -use std::{ - fmt::{self, Display}, - mem, -}; /// Position information for source code locations. /// @@ -55,417 +47,6 @@ impl From> for Pos { } } -/// Represents function argument types with optional parameter support. -/// -/// This type allows defining functions that have both required and optional parameters. -/// The `needed` field specifies how many arguments are required, while `values` contains -/// all possible argument types (both required and optional). -/// -/// # Examples -/// -/// ``` -/// use eventql_parser::prelude::{FunArgs, Type}; -/// -/// // Function with all required parameters: (number, string) -/// let required = FunArgs::required(vec![Type::Number, Type::String]); -/// assert_eq!(required.needed, 2); -/// assert_eq!(required.values.len(), 2); -/// -/// // Function with optional parameters: (boolean, number?) -/// let optional = FunArgs { -/// values: vec![Type::Bool, Type::Number], -/// needed: 1, // Only first parameter is required -/// }; -/// assert!(optional.match_arg_count(1)); // Can call with just boolean -/// assert!(optional.match_arg_count(2)); // Can call with both -/// assert!(!optional.match_arg_count(3)); // Cannot call with 3 args -/// ``` -#[derive(Debug, Serialize, Clone)] -pub struct FunArgs { - /// All argument types, including both required and optional parameters - pub values: Vec, - /// Number of required arguments (must be <= values.len()) - pub needed: usize, -} - -impl FunArgs { - /// Creates a new `FunArgs` where all parameters are required. - /// - /// # Examples - /// - /// ``` - /// use eventql_parser::prelude::{FunArgs, Type}; - /// - /// let args = FunArgs::required(vec![Type::Number, Type::String]); - /// assert_eq!(args.needed, 2); - /// assert_eq!(args.values.len(), 2); - /// ``` - pub fn required(args: Vec) -> Self { - Self { - needed: args.len(), - values: args, - } - } - - /// Returns `true` if there are no argument types defined. - /// - /// # Examples - /// - /// ``` - /// use eventql_parser::prelude::{FunArgs, Type}; - /// - /// let empty = FunArgs::required(vec![]); - /// assert!(empty.is_empty()); - /// - /// let not_empty = FunArgs::required(vec![Type::Number]); - /// assert!(!not_empty.is_empty()); - /// ``` - pub fn is_empty(&self) -> bool { - self.values.is_empty() - } - - /// Checks if a given argument count is valid for this function signature. - /// - /// Returns `true` if the count is between `needed` (inclusive) and - /// `values.len()` (inclusive), meaning all required arguments are - /// provided and no extra arguments beyond the optional ones are given. - /// - /// # Examples - /// - /// ``` - /// use eventql_parser::prelude::{FunArgs, Type}; - /// - /// let args = FunArgs { - /// values: vec![Type::Bool, Type::Number, Type::String], - /// needed: 1, // Only first parameter is required - /// }; - /// - /// assert!(!args.match_arg_count(0)); // Missing required argument - /// assert!(args.match_arg_count(1)); // Required argument provided - /// assert!(args.match_arg_count(2)); // Required + one optional - /// assert!(args.match_arg_count(3)); // All arguments provided - /// assert!(!args.match_arg_count(4)); // Too many arguments - /// ``` - pub fn match_arg_count(&self, cnt: usize) -> bool { - cnt >= self.needed && cnt <= self.values.len() - } -} - -impl From> for FunArgs { - fn from(value: Vec) -> Self { - Self::required(value) - } -} - -/// Type information for expressions. -/// -/// This enum represents the type of an expression in the E -#[derive(Clone, Debug, Default, Serialize)] -pub enum Type { - /// Type has not been determined yet - #[default] - Unspecified, - /// Numeric type (f64) - Number, - /// String type - String, - /// Boolean type - Bool, - /// Array type - Array(Box), - /// Record (object) type - Record(FxHashMap), - /// Subject pattern type - Subject, - /// Function type with support for optional parameters. - /// - /// The `args` field uses [`FunArgs`] to support both required and optional parameters. - /// Optional parameters are indicated when `args.needed < args.values.len()`. - /// - /// # Examples - /// - /// ``` - /// use eventql_parser::prelude::{Type, FunArgs}; - /// - /// // Function with all required parameters: (number, string) -> boolean - /// let all_required = Type::App { - /// args: vec![Type::Number, Type::String].into(), - /// result: Box::new(Type::Bool), - /// aggregate: false, - /// }; - /// - /// // Aggregate function with optional parameter: (boolean?) => number - /// let with_optional = Type::App { - /// args: FunArgs { - /// values: vec![Type::Bool], - /// needed: 0, // All parameters are optional - /// }, - /// result: Box::new(Type::Number), - /// aggregate: true, - /// }; - /// ``` - App { - /// Function argument types, supporting optional parameters - args: FunArgs, - /// Return type of the function - result: Box, - /// Whether this is an aggregate function (operates on grouped data) - aggregate: bool, - }, - /// Date type (e.g., `2026-01-03`) - /// - /// Used when a field is explicitly converted to a date using the `AS DATE` syntax. - Date, - /// Time type (e.g., `13:45:39`) - /// - /// Used when a field is explicitly converted to a time using the `AS TIME` syntax. - Time, - /// DateTime type (e.g., `2026-01-01T13:45:39Z`) - /// - /// Used when a field is explicitly converted to a datetime using the `AS DATETIME` syntax. - DateTime, - /// Custom type not defined in the EventQL reference - /// - /// Used when a field is converted to a custom type registered in [`AnalysisOptions::custom_types`]. - /// The string contains the custom type name as it appears in the query. - /// - /// # Examples - /// - /// ``` - /// use eventql_parser::Session; - /// - /// let mut session = Session::builder() - /// .declare_custom_type("CustomTimestamp") - /// .build(); - /// let query = session.parse("FROM e IN events PROJECT INTO { ts: e.data.timestamp as CustomTimestamp }").unwrap(); - /// let typed_query = session.run_static_analysis(query).unwrap(); - /// ``` - Custom(String), -} - -/// Provides human-readable string formatting for types. -/// -/// Function types display optional parameters with a `?` suffix. For example, -/// a function with signature `(boolean, number?) -> string` accepts 1 or 2 arguments. -/// Aggregate functions use `=>` instead of `->` in their signature. -/// -/// # Examples -/// -/// ``` -/// use eventql_parser::prelude::{Type, FunArgs}; -/// -/// // Basic types -/// assert_eq!(Type::Number.to_string(), "number"); -/// assert_eq!(Type::String.to_string(), "string"); -/// assert_eq!(Type::Bool.to_string(), "boolean"); -/// -/// // Array type -/// let arr = Type::Array(Box::new(Type::Number)); -/// assert_eq!(arr.to_string(), "[]number"); -/// -/// // Function with all required parameters -/// let func = Type::App { -/// args: vec![Type::Number, Type::String].into(), -/// result: Box::new(Type::Bool), -/// aggregate: false, -/// }; -/// assert_eq!(func.to_string(), "(number, string) -> boolean"); -/// -/// // Function with optional parameters -/// let func_optional = Type::App { -/// args: FunArgs { -/// values: vec![Type::Bool, Type::Number], -/// needed: 1, -/// }, -/// result: Box::new(Type::String), -/// aggregate: false, -/// }; -/// assert_eq!(func_optional.to_string(), "(boolean, number?) -> string"); -/// -/// // Aggregate function -/// let agg = Type::App { -/// args: vec![Type::Number].into(), -/// result: Box::new(Type::Number), -/// aggregate: true, -/// }; -/// assert_eq!(agg.to_string(), "(number) => number"); -/// ``` -impl Display for Type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Type::Unspecified => write!(f, "any"), - Type::Number => write!(f, "number"), - Type::String => write!(f, "string"), - Type::Bool => write!(f, "boolean"), - Type::Array(tpe) => write!(f, "[]{tpe}"), - Type::Record(map) => { - write!(f, "{{ ")?; - - for (idx, (name, value)) in map.iter().enumerate() { - if idx != 0 { - write!(f, ", ")?; - } - - write!(f, "{name}: {value}")?; - } - - write!(f, " }}") - } - Type::Subject => write!(f, "subject"), - Type::App { - args, - result, - aggregate, - } => { - write!(f, "(")?; - - for (idx, arg) in args.values.iter().enumerate() { - if idx != 0 { - write!(f, ", ")?; - } - - write!(f, "{arg}")?; - - if idx + 1 > args.needed { - write!(f, "?")?; - } - } - - write!(f, ")")?; - - if *aggregate { - write!(f, " => ")?; - } else { - write!(f, " -> ")?; - } - - write!(f, "{result}") - } - Type::Date => write!(f, "date"), - Type::Time => write!(f, "time"), - Type::DateTime => write!(f, "datetime"), - Type::Custom(n) => write!(f, "{}", n.to_lowercase()), - } - } -} - -impl Type { - pub fn as_record_or_panic_mut(&mut self) -> &mut FxHashMap { - if let Self::Record(r) = self { - return r; - } - - panic!("expected record type, got {:?}", self); - } - - /// Checks if two types are the same. - /// - /// * If `self` is `Type::Unspecified` then `self` is updated to the more specific `Type`. - /// * If `self` is `Type::Subject` and is checked against a `Type::String` then `self` is updated to `Type::String` - pub fn check(self, attrs: Attrs, other: Type) -> Result { - match (self, other) { - (Self::Unspecified, other) => Ok(other), - (this, Self::Unspecified) => Ok(this), - (Self::Subject, Self::Subject) => Ok(Self::Subject), - - // Subjects are strings so there is no reason to reject a type - // when compared to a string. However, when it happens, we demote - // a subject to a string. - (Self::Subject, Self::String) => Ok(Self::String), - (Self::String, Self::Subject) => Ok(Self::String), - - (Self::Number, Self::Number) => Ok(Self::Number), - (Self::String, Self::String) => Ok(Self::String), - (Self::Bool, Self::Bool) => Ok(Self::Bool), - (Self::Date, Self::Date) => Ok(Self::Date), - (Self::Time, Self::Time) => Ok(Self::Time), - (Self::DateTime, Self::DateTime) => Ok(Self::DateTime), - - // `DateTime` can be implicitly cast to `Date` or `Time` - (Self::DateTime, Self::Date) => Ok(Self::Date), - (Self::Date, Self::DateTime) => Ok(Self::Date), - (Self::DateTime, Self::Time) => Ok(Self::Time), - (Self::Time, Self::DateTime) => Ok(Self::Time), - (Self::Custom(a), Self::Custom(b)) if a.eq_ignore_ascii_case(b.as_str()) => { - Ok(Self::Custom(a)) - } - (Self::Array(mut a), Self::Array(b)) => { - *a = a.as_ref().clone().check(attrs, *b)?; - Ok(Self::Array(a)) - } - - (Self::Record(mut a), Self::Record(b)) if a.len() == b.len() => { - if a.is_empty() { - return Ok(Self::Record(a)); - } - - for bk in b.keys() { - if !a.contains_key(bk) { - return Err(AnalysisError::TypeMismatch( - attrs.pos.line, - attrs.pos.col, - Self::Record(a), - Self::Record(b), - )); - } - } - - for (bk, bv) in b.into_iter() { - let av = a.get_mut(&bk).unwrap(); - let a = mem::take(av); - *av = a.check(attrs, bv)?; - } - - Ok(Self::Record(a)) - } - - ( - Self::App { - args: mut a_args, - result: mut a_res, - aggregate: a_agg, - }, - Self::App { - args: b_args, - result: b_res, - aggregate: b_agg, - }, - ) if a_args.values.len() == b_args.values.len() && a_agg == b_agg => { - if a_args.is_empty() { - let tmp = mem::take(a_res.as_mut()); - *a_res = tmp.check(attrs, *b_res)?; - return Ok(Self::App { - args: a_args, - result: a_res, - aggregate: a_agg, - }); - } - - for (a, b) in a_args.values.iter_mut().zip(b_args.values.into_iter()) { - let tmp = mem::take(a); - *a = tmp.check(attrs, b)?; - } - - let tmp = mem::take(a_res.as_mut()); - *a_res = tmp.check(attrs, *b_res)?; - - Ok(Self::App { - args: a_args, - result: a_res, - aggregate: a_agg, - }) - } - - (this, other) => Err(AnalysisError::TypeMismatch( - attrs.pos.line, - attrs.pos.col, - this, - other, - )), - } - } -} - /// Attributes attached to each expression node. /// /// These attributes provide metadata about an expression, including its @@ -489,6 +70,18 @@ impl<'a> From> for Attrs { } } +/// A reference to a string stored in the [`StringArena`](crate::arena::StringArena). +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)] +pub struct StrRef(pub(crate) usize); + +/// A reference to a vector of expressions stored in the [`ExprArena`](crate::arena::ExprArena). +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)] +pub struct VecRef(pub(crate) usize); + +/// A reference to a vector of record fields stored in the [`ExprArena`](crate::arena::ExprArena). +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)] +pub struct RecRef(pub(crate) usize); + /// Internal pointer to an expression in the arena. #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Serialize)] pub struct ExprPtr(pub(crate) usize); @@ -497,7 +90,7 @@ pub struct ExprPtr(pub(crate) usize); #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)] pub struct ExprKey(pub(crate) u64); -/// A reference to an expression stored in an [`ExprArena`]. +/// A reference to an expression stored in an [`ExprArena`](crate::arena::ExprArena). /// /// This is a lightweight handle that combines a hash key for fast comparison /// and a pointer for fast lookup. @@ -513,25 +106,6 @@ impl Hash for ExprRef { } } -/// An expression with metadata. -/// -/// This is the fundamental building block of the AST. Every expression -/// carries attributes (position, scope, type) and a value that determines -/// what kind of expression it is. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] -pub struct Expr { - /// Metadata about this expression - pub attrs: Attrs, - /// The value/kind of this expression - pub node_ref: ExprRef, -} - -impl Hash for Expr { - fn hash(&self, state: &mut H) { - self.node_ref.hash(state); - } -} - /// Field access expression (e.g., `e.data.price`). /// /// Represents accessing a field of a record or object using dot notation. @@ -541,12 +115,12 @@ impl Hash for Expr { /// /// In the query `WHERE e.data.user.id == 1`, the expression `e.data.user.id` /// is parsed as nested `Access` nodes. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] pub struct Access { /// The target expression being accessed pub target: ExprRef, /// The name of the field being accessed - pub field: String, + pub field: StrRef, } /// Function application (e.g., `sum(e.price)`, `count()`). @@ -556,23 +130,23 @@ pub struct Access { /// # Examples /// /// In the query `WHERE count(e.items) > 5`, the `count(e.items)` is an `App` node. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] pub struct App { /// Name of the function being called - pub func: String, + pub func: StrRef, /// Arguments passed to the function - pub args: Vec, + pub args: VecRef, } /// A field in a record literal (e.g., `{name: "Alice", age: 30}`). /// /// Represents a key-value pair in a record construction. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] pub struct Field { /// Field attributes pub attrs: Attrs, /// Field name - pub name: String, + pub name: StrRef, /// Field value expression pub expr: ExprRef, } @@ -615,20 +189,20 @@ pub struct Unary { /// /// This enum contains all the different types of expressions that can appear /// in an EventQL query, from simple literals to complex operations. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] pub enum Value { /// Numeric literal (e.g., `42`, `3.14`) Number(OrderedFloat), /// String literal (e.g., `"hello"`) - String(String), + String(StrRef), /// Boolean literal (`true` or `false`) Bool(bool), /// Identifier (e.g., variable name `e`, `x`) - Id(String), + Id(StrRef), /// Array literal (e.g., `[1, 2, 3]`) - Array(Vec), + Array(VecRef), /// Record literal (e.g., `{name: "Alice", age: 30}`) - Record(Vec), + Record(RecRef), /// Field access (e.g., `e.data.price`) Access(Access), /// Function application (e.g., `sum(e.price)`) diff --git a/src/error.rs b/src/error.rs index 220aa8a..7e2c3b3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,7 +4,7 @@ //! and parsing of EventQL queries. All errors include position information //! (line and column numbers) to help diagnose issues in query strings. -use crate::{Type, token::Symbol}; +use crate::token::Symbol; use serde::Serialize; use thiserror::Error; @@ -59,6 +59,9 @@ pub enum ParserError { #[error("{0}:{1}: expected identifier but got {2}")] ExpectedIdent(u32, u32, String), + /// The query is missing a required FROM statement. + /// + /// Fields: `(line, column)` #[error("{0}:{1}: missing FROM statement")] MissingFromStatement(u32, u32), @@ -132,7 +135,7 @@ pub enum AnalysisError { /// This occurs when an expression has a different type than what is /// required by its context (e.g., using a string where a number is expected). #[error("{0}:{1}: type mismatch: expected {2} but got {3} ")] - TypeMismatch(u32, u32, Type, Type), + TypeMismatch(u32, u32, String, String), /// A record field was accessed but doesn't exist in the record type. /// @@ -159,7 +162,7 @@ pub enum AnalysisError { /// This occurs when a record type is required (e.g., for field access) /// but a different type was found. #[error("{0}:{1}: expected record but got {2}")] - ExpectRecord(u32, u32, Type), + ExpectRecord(u32, u32, String), /// Expected a record or sourced-property but found a different type. /// @@ -168,7 +171,7 @@ pub enum AnalysisError { /// This occurs when checking a projection and the static analysis found /// out the project into clause doesn't return a record nor a sourced-based property. #[error("{0}:{1}: expected a record or a sourced-property but got {2}")] - ExpectRecordOrSourcedProperty(u32, u32, Type), + ExpectRecordOrSourcedProperty(u32, u32, String), /// Expected an array type but found a different type. /// @@ -176,7 +179,7 @@ pub enum AnalysisError { /// /// This occurs when an array type is required but a different type was found. #[error("{0}:{1}: expected an array but got {2}")] - ExpectArray(u32, u32, Type), + ExpectArray(u32, u32, String), /// Expected a field literal but found a different expression. /// diff --git a/src/lib.rs b/src/lib.rs index 458fcb0..f27f43f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,6 @@ //! This library provides a complete lexer and parser for EventQL (EQL), a query language //! designed for event sourcing systems. It allows you to parse EQL query strings into //! an abstract syntax tree (AST) that can be analyzed or executed. -mod analysis; pub mod arena; mod ast; mod error; @@ -12,10 +11,13 @@ mod parser; #[cfg(test)] mod tests; mod token; +mod typing; -use crate::arena::ExprArena; +use crate::arena::Arena; use crate::lexer::tokenize; -use crate::prelude::{AnalysisOptions, Typed, parse, static_analysis}; +use crate::prelude::{ + Analysis, AnalysisOptions, FunArgs, Type, Typed, display_type, name_to_type, parse, +}; use crate::token::Token; pub use ast::*; use rustc_hash::FxHashMap; @@ -26,13 +28,120 @@ use unicase::Ascii; /// This module provides a single import point for all the library's public API, /// including AST types, error types, lexer, parser, and token types. pub mod prelude { - pub use super::analysis::*; pub use super::ast::*; pub use super::error::*; pub use super::parser::*; pub use super::token::*; + pub use super::typing::analysis::*; + pub use super::typing::*; } +/// Builder for function argument specifications. +/// +/// Allows defining function signatures with both required and optional parameters. +/// When `required` equals the length of `args`, all parameters are required. +pub struct FunArgsBuilder<'a> { + args: &'a [Type], + required: usize, +} + +impl<'a> FunArgsBuilder<'a> { + /// Creates a new `FunArgsBuilder` with the given argument types and required count. + pub fn new(args: &'a [Type], required: usize) -> Self { + Self { args, required } + } +} + +impl<'a> From<&'a [Type]> for FunArgsBuilder<'a> { + fn from(args: &'a [Type]) -> Self { + Self { + args, + required: args.len(), + } + } +} + +impl<'a, const N: usize> From<&'a [Type; N]> for FunArgsBuilder<'a> { + fn from(value: &'a [Type; N]) -> Self { + Self { + args: value.as_slice(), + required: value.len(), + } + } +} + +/// Builder for configuring event type information on a [`SessionBuilder`]. +/// +/// Obtained by calling [`SessionBuilder::declare_event_type`]. Use [`record`](EventTypeBuilder::record) +/// to define a record-shaped event type or [`custom`](EventTypeBuilder::custom) for a named custom type. +pub struct EventTypeBuilder { + parent: SessionBuilder, +} + +impl EventTypeBuilder { + /// Starts building a record-shaped event type with named fields. + pub fn record(self) -> EventTypeRecordBuilder { + EventTypeRecordBuilder { + inner: self, + props: Default::default(), + } + } + + /// Declares a custom (non-record) event type by name. + pub fn custom(self, _name: &str) -> SessionBuilder { + todo!("deal with custom type later") + } +} + +/// Builder for defining the fields of a record-shaped event type. +/// +/// Obtained by calling [`EventTypeBuilder::record`]. Add fields with [`prop`](EventTypeRecordBuilder::prop) +/// and finalize with [`build`](EventTypeRecordBuilder::build) to return to the [`SessionBuilder`]. +pub struct EventTypeRecordBuilder { + inner: EventTypeBuilder, + props: FxHashMap, +} + +impl EventTypeRecordBuilder { + /// Conditionally adds a field to the event record type. + pub fn prop_when(mut self, test: bool, name: &str, tpe: Type) -> Self { + if test { + self.props + .insert(self.inner.parent.arena.strings.alloc(name), tpe); + } + + self + } + + /// Adds a field with the given name and type to the event record type. + pub fn prop(mut self, name: &str, tpe: Type) -> Self { + self.props + .insert(self.inner.parent.arena.strings.alloc(name), tpe); + self + } + + /// Conditionally adds a field with a custom type to the event record type. + pub fn prop_with_custom_when(mut self, test: bool, name: &str, tpe: &str) -> Self { + if test { + let tpe = self.inner.parent.arena.strings.alloc(tpe); + self.props.insert( + self.inner.parent.arena.strings.alloc(name), + Type::Custom(tpe), + ); + } + + self + } + + /// Finalizes the event record type and returns the [`SessionBuilder`]. + pub fn build(mut self) -> SessionBuilder { + let ptr = self.inner.parent.arena.types.alloc_record(self.props); + self.inner.parent.options.event_type_info = Type::Record(ptr); + self.inner.parent + } +} + +/// A specialized `Result` type for EventQL parser operations. pub type Result = std::result::Result; /// `SessionBuilder` is a builder for `Session` objects. @@ -41,6 +150,7 @@ pub type Result = std::result::Result; /// functions (both regular and aggregate), event types, and custom types, /// before building an `EventQL` parsing session. pub struct SessionBuilder { + arena: Arena, options: AnalysisOptions, } @@ -55,7 +165,12 @@ impl SessionBuilder { /// * `name` - The name of the function. /// * `args` - The arguments the function accepts, which can be converted into `FunArgs`. /// * `result` - The return type of the function. - pub fn declare_func(self, name: &str, args: impl Into, result: Type) -> Self { + pub fn declare_func<'a>( + self, + name: &'a str, + args: impl Into>, + result: Type, + ) -> Self { self.declare_func_when(true, name, args, result) } @@ -71,19 +186,25 @@ impl SessionBuilder { /// * `name` - The name of the function. /// * `args` - The arguments the function accepts, which can be converted into `FunArgs`. /// * `result` - The return type of the function. - pub fn declare_func_when( + pub fn declare_func_when<'a>( mut self, test: bool, - name: &str, - args: impl Into, + name: &'a str, + args: impl Into>, result: Type, ) -> Self { if test { + let builder = args.into(); + let args = self.arena.types.alloc_args(builder.args); + self.options.default_scope.entries.insert( name, Type::App { - args: args.into(), - result: Box::new(result), + args: FunArgs { + values: args, + needed: builder.required, + }, + result: self.arena.types.register_type(result), aggregate: false, }, ); @@ -102,7 +223,12 @@ impl SessionBuilder { /// * `name` - The name of the aggregate function. /// * `args` - The arguments the aggregate function accepts. /// * `result` - The return type of the aggregate function. - pub fn declare_agg_func(self, name: &str, args: impl Into, result: Type) -> Self { + pub fn declare_agg_func<'a>( + self, + name: &'a str, + args: impl Into>, + result: Type, + ) -> Self { self.declare_agg_func_when(true, name, args, result) } @@ -117,19 +243,25 @@ impl SessionBuilder { /// * `name` - The name of the aggregate function. /// * `args` - The arguments the aggregate function accepts. /// * `result` - The return type of the aggregate function. - pub fn declare_agg_func_when( + pub fn declare_agg_func_when<'a>( mut self, test: bool, - name: &str, - args: impl Into, + name: &'a str, + args: impl Into>, result: Type, ) -> Self { if test { + let builder = args.into(); + let args = self.arena.types.alloc_args(builder.args); + self.options.default_scope.entries.insert( name, Type::App { - args: args.into(), - result: Box::new(result), + args: FunArgs { + values: args, + needed: builder.required, + }, + result: self.arena.types.register_type(result), aggregate: true, }, ); @@ -164,9 +296,8 @@ impl SessionBuilder { /// # Arguments /// /// * `tpe` - The `Type` representing the structure of event records. - pub fn declare_event_type(mut self, tpe: Type) -> Self { - self.options.event_type_info = tpe; - self + pub fn declare_event_type(self) -> EventTypeBuilder { + EventTypeBuilder { parent: self } } /// Conditionally declares a custom type that can be used in EQL queries. @@ -212,88 +343,91 @@ impl SessionBuilder { /// `declare_func` and `declare_agg_func` for all standard library functions, /// and `declare_event_type` for the default event structure. pub fn use_stdlib(self) -> Self { - self.declare_func("ABS", vec![Type::Number], Type::Number) - .declare_func("CEIL", vec![Type::Number], Type::Number) - .declare_func("FLOOR", vec![Type::Number], Type::Number) - .declare_func("ROUND", vec![Type::Number], Type::Number) - .declare_func("COS", vec![Type::Number], Type::Number) - .declare_func("EXP", vec![Type::Number], Type::Number) - .declare_func("POW", vec![Type::Number, Type::Number], Type::Number) - .declare_func("SQRT", vec![Type::Number], Type::Number) - .declare_func("RAND", vec![], Type::Number) - .declare_func("PI", vec![Type::Number], Type::Number) - .declare_func("LOWER", vec![Type::String], Type::String) - .declare_func("UPPER", vec![Type::String], Type::String) - .declare_func("TRIM", vec![Type::String], Type::String) - .declare_func("LTRIM", vec![Type::String], Type::String) - .declare_func("RTRIM", vec![Type::String], Type::String) - .declare_func("LEN", vec![Type::String], Type::Number) - .declare_func("INSTR", vec![Type::String], Type::Number) + self.declare_func("ABS", &[Type::Number], Type::Number) + .declare_func("CEIL", &[Type::Number], Type::Number) + .declare_func("FLOOR", &[Type::Number], Type::Number) + .declare_func("ROUND", &[Type::Number], Type::Number) + .declare_func("COS", &[Type::Number], Type::Number) + .declare_func("EXP", &[Type::Number], Type::Number) + .declare_func("POW", &[Type::Number, Type::Number], Type::Number) + .declare_func("SQRT", &[Type::Number], Type::Number) + .declare_func("RAND", &[], Type::Number) + .declare_func("PI", &[Type::Number], Type::Number) + .declare_func("LOWER", &[Type::String], Type::String) + .declare_func("UPPER", &[Type::String], Type::String) + .declare_func("TRIM", &[Type::String], Type::String) + .declare_func("LTRIM", &[Type::String], Type::String) + .declare_func("RTRIM", &[Type::String], Type::String) + .declare_func("LEN", &[Type::String], Type::Number) + .declare_func("INSTR", &[Type::String], Type::Number) .declare_func( "SUBSTRING", - vec![Type::String, Type::Number, Type::Number], + &[Type::String, Type::Number, Type::Number], Type::String, ) .declare_func( "REPLACE", - vec![Type::String, Type::String, Type::String], + &[Type::String, Type::String, Type::String], Type::String, ) - .declare_func("STARTSWITH", vec![Type::String, Type::String], Type::Bool) - .declare_func("ENDSWITH", vec![Type::String, Type::String], Type::Bool) - .declare_func("NOW", vec![], Type::DateTime) - .declare_func("YEAR", vec![Type::Date], Type::Number) - .declare_func("MONTH", vec![Type::Date], Type::Number) - .declare_func("DAY", vec![Type::Date], Type::Number) - .declare_func("HOUR", vec![Type::Time], Type::Number) - .declare_func("MINUTE", vec![Type::Time], Type::Number) - .declare_func("SECOND", vec![Type::Time], Type::Number) - .declare_func("WEEKDAY", vec![Type::Date], Type::Number) + .declare_func("STARTSWITH", &[Type::String, Type::String], Type::Bool) + .declare_func("ENDSWITH", &[Type::String, Type::String], Type::Bool) + .declare_func("NOW", &[], Type::DateTime) + .declare_func("YEAR", &[Type::Date], Type::Number) + .declare_func("MONTH", &[Type::Date], Type::Number) + .declare_func("DAY", &[Type::Date], Type::Number) + .declare_func("HOUR", &[Type::Time], Type::Number) + .declare_func("MINUTE", &[Type::Time], Type::Number) + .declare_func("SECOND", &[Type::Time], Type::Number) + .declare_func("WEEKDAY", &[Type::Date], Type::Number) .declare_func( "IF", - vec![Type::Bool, Type::Unspecified, Type::Unspecified], + &[Type::Bool, Type::Unspecified, Type::Unspecified], Type::Unspecified, ) .declare_agg_func( "COUNT", - FunArgs { - values: vec![Type::Bool], - needed: 0, + FunArgsBuilder { + args: &[Type::Bool], + required: 0, }, Type::Number, ) - .declare_agg_func("SUM", vec![Type::Number], Type::Number) - .declare_agg_func("AVG", vec![Type::Number], Type::Number) - .declare_agg_func("MIN", vec![Type::Number], Type::Number) - .declare_agg_func("MAX", vec![Type::Number], Type::Number) - .declare_agg_func("MEDIAN", vec![Type::Number], Type::Number) - .declare_agg_func("STDDEV", vec![Type::Number], Type::Number) - .declare_agg_func("VARIANCE", vec![Type::Number], Type::Number) - .declare_agg_func("UNIQUE", vec![Type::Unspecified], Type::Unspecified) - .declare_event_type(Type::Record(FxHashMap::from_iter([ - ("specversion".to_owned(), Type::String), - ("id".to_owned(), Type::String), - ("time".to_owned(), Type::DateTime), - ("source".to_owned(), Type::String), - ("subject".to_owned(), Type::Subject), - ("type".to_owned(), Type::String), - ("datacontenttype".to_owned(), Type::String), - ("data".to_owned(), Type::Unspecified), - ("predecessorhash".to_owned(), Type::String), - ("hash".to_owned(), Type::String), - ("traceparent".to_owned(), Type::String), - ("tracestate".to_owned(), Type::String), - ("signature".to_owned(), Type::String), - ]))) + .declare_agg_func("SUM", &[Type::Number], Type::Number) + .declare_agg_func("AVG", &[Type::Number], Type::Number) + .declare_agg_func("MIN", &[Type::Number], Type::Number) + .declare_agg_func("MAX", &[Type::Number], Type::Number) + .declare_agg_func("MEDIAN", &[Type::Number], Type::Number) + .declare_agg_func("STDDEV", &[Type::Number], Type::Number) + .declare_agg_func("VARIANCE", &[Type::Number], Type::Number) + .declare_agg_func("UNIQUE", &[Type::Unspecified], Type::Unspecified) + .declare_event_type() + .record() + .prop("specversion", Type::String) + .prop("id", Type::String) + .prop("time", Type::DateTime) + .prop("source", Type::String) + .prop("subject", Type::Subject) + .prop("type", Type::String) + .prop("datacontenttype", Type::String) + .prop("data", Type::Unspecified) + .prop("predecessorhash", Type::String) + .prop("hash", Type::String) + .prop("traceparent", Type::String) + .prop("tracestate", Type::String) + .prop("signature", Type::String) + .build() } /// Builds the `Session` object with the configured analysis options. /// /// This consumes the `SessionBuilder` and returns a `Session` instance /// ready for tokenizing, parsing, and analyzing EventQL queries. - pub fn build(self) -> Session { + pub fn build(mut self) -> Session { + self.arena.types.freeze(); + Session { - arena: ExprArena::default(), + arena: self.arena, options: self.options, } } @@ -302,6 +436,7 @@ impl SessionBuilder { impl Default for SessionBuilder { fn default() -> Self { Self { + arena: Default::default(), options: AnalysisOptions::empty(), } } @@ -312,7 +447,7 @@ impl Default for SessionBuilder { /// It holds the necessary context, such as the expression arena and analysis options, /// to perform lexical analysis, parsing, and static analysis of EQL query strings. pub struct Session { - arena: ExprArena, + arena: Arena, options: AnalysisOptions, } @@ -387,7 +522,49 @@ impl Session { /// # Returns /// /// Returns a typed query on success, or an `AnalysisError` if type checking fails. - pub fn run_static_analysis(&self, query: Query) -> Result> { - Ok(static_analysis(&self.arena, &self.options, query)?) + pub fn run_static_analysis(&mut self, query: Query) -> Result> { + let mut analysis = self.analysis(); + Ok(analysis.analyze_query(query)?) + } + + /// Converts a type name string to its corresponding [`Type`] variant. + /// + /// This function performs case-insensitive matching for built-in type names and checks + /// against custom types defined in the analysis options. + /// + /// # Returns + /// + /// * `Some(Type)` - If the name matches a built-in type or custom type + /// * `None` - If the name doesn't match any known type + /// + /// # Built-in Type Mappings + /// + /// The following type names are recognized (case-insensitive): + /// - `"string"` → [`Type::String`] + /// - `"int"` or `"float64"` → [`Type::Number`] + /// - `"boolean"` → [`Type::Bool`] + /// - `"date"` → [`Type::Date`] + /// - `"time"` → [`Type::Time`] + /// - `"datetime"` → [`Type::DateTime`] + pub fn get_type_from_name(&mut self, name: &str) -> Option { + let str_ref = self.arena.strings.alloc(name); + name_to_type(&self.arena, &self.options, str_ref) + } + + /// Provides human-readable string formatting for types. + /// + /// Function types display optional parameters with a `?` suffix. For example, + /// a function with signature `(boolean, number?) -> string` accepts 1 or 2 arguments. + /// Aggregate functions use `=>` instead of `->` in their signature. + pub fn display_type(&self, tpe: &Type) -> String { + display_type(&self.arena, *tpe) + } + + /// Creates an [`Analysis`] instance for fine-grained control over static analysis. + /// + /// Use this when you need to analyze individual expressions or manage scopes manually, + /// rather than using [`run_static_analysis`](Session::run_static_analysis) for whole queries. + pub fn analysis(&mut self) -> Analysis<'_> { + Analysis::new(&mut self.arena, &self.options) } } diff --git a/src/parser.rs b/src/parser.rs index 7d181a4..16dc0c9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -8,14 +8,14 @@ //! //! - [`parse`] - Convert a slice of tokens into a Query AST -use crate::arena::ExprArena; +use crate::arena::Arena; use crate::ast::{ Access, App, Attrs, Binary, Field, Limit, Order, OrderBy, Query, Source, SourceKind, Unary, Value, }; use crate::error::ParserError; use crate::token::{Operator, Sym, Symbol, Token}; -use crate::{Binding, ExprRef, GroupBy, Raw}; +use crate::{Binding, ExprRef, GroupBy, Raw, StrRef}; /// Result type for parser operations. /// @@ -28,7 +28,7 @@ pub type ParseResult = Result; /// representing the structure of the EventQL query or expression. pub struct Parser<'a> { input: &'a [Token<'a>], - arena: &'a mut ExprArena, + arena: &'a mut Arena, offset: usize, } @@ -43,7 +43,7 @@ impl<'a> Parser<'a> { /// let session = Session::builder().build(); /// let tokens = session.tokenize("1 + 2").unwrap(); /// ``` - pub fn new(arena: &'a mut ExprArena, input: &'a [Token<'a>]) -> Self { + pub fn new(arena: &'a mut Arena, input: &'a [Token<'a>]) -> Self { Self { arena, input, @@ -65,11 +65,11 @@ impl<'a> Parser<'a> { res } - fn parse_ident(&mut self) -> ParseResult { + fn parse_ident(&mut self) -> ParseResult { let token = self.shift(); if let Sym::Id(id) = token.sym { - return Ok(id.to_owned()); + return Ok(self.arena.strings.alloc(id)); } Err(ParserError::ExpectedIdent( @@ -261,34 +261,36 @@ impl<'a> Parser<'a> { } expect_symbol(self.shift(), Symbol::CloseParen)?; + let args = self.arena.exprs.alloc_vec(args); Value::App(App { - func: name.to_owned(), + func: self.arena.strings.alloc(name), args, }) } else if matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) { self.shift(); let attrs = token.into(); + let name = self.arena.strings.alloc(name); let mut access = Access { - target: self.arena.alloc(attrs, Value::Id(name.to_owned())), + target: self.arena.exprs.alloc(attrs, Value::Id(name)), field: self.parse_ident()?, }; while matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) { self.shift(); access = Access { - target: self.arena.alloc(attrs, Value::Access(access)), + target: self.arena.exprs.alloc(attrs, Value::Access(access)), field: self.parse_ident()?, }; } Value::Access(access) } else { - Value::Id(name.to_owned()) + Value::Id(self.arena.strings.alloc(name)) } } - Sym::String(s) => Value::String(s.to_owned()), + Sym::String(s) => Value::String(self.arena.strings.alloc(s)), Sym::Number(n) => Value::Number(n.into()), Sym::Symbol(Symbol::OpenParen) => { @@ -312,7 +314,7 @@ impl<'a> Parser<'a> { expect_symbol(self.shift(), Symbol::CloseBracket)?; - Value::Array(elems) + Value::Array(self.arena.exprs.alloc_vec(elems)) } Sym::Symbol(Symbol::OpenBrace) => { @@ -348,7 +350,7 @@ impl<'a> Parser<'a> { expect_symbol(self.shift(), Symbol::CloseBrace)?; - Value::Record(fields) + Value::Record(self.arena.exprs.alloc_rec(fields)) } Sym::Operator(op) if matches!(op, Operator::Add | Operator::Sub | Operator::Not) => { @@ -369,12 +371,12 @@ impl<'a> Parser<'a> { let attrs = token.into(); - Ok(self.arena.alloc(attrs, value)) + Ok(self.arena.exprs.alloc(attrs, value)) } fn parse_binary(&mut self, min_bind: u64) -> ParseResult { let mut lhs = self.parse_primary()?; - let lhs_attrs = self.arena.get(lhs).attrs; + let lhs_attrs = self.arena.exprs.get(lhs).attrs; loop { let token = self.peek(); @@ -392,7 +394,7 @@ impl<'a> Parser<'a> { self.shift(); let rhs = self.parse_binary(rhs_bind)?; - let node = self.arena.get(rhs); + let node = self.arena.exprs.get(rhs); if matches!(operator, Operator::As) && !matches!(node.value, Value::Id(_)) { return Err(ParserError::ExpectedType( @@ -403,6 +405,7 @@ impl<'a> Parser<'a> { lhs = self .arena + .exprs .alloc(lhs_attrs, Value::Binary(Binary { lhs, operator, rhs })); } @@ -563,10 +566,7 @@ fn binding_pow(op: Operator) -> (u64, u64) { /// 3. Additive (`+`, `-`) /// 4. Comparison (`<`, `<=`, `>`, `>=`, `==`, `!=`) /// 5. Logical (`AND`, `OR`, `XOR`) -pub(crate) fn parse<'a>( - arena: &'a mut ExprArena, - input: &'a [Token<'a>], -) -> ParseResult> { +pub(crate) fn parse<'a>(arena: &'a mut Arena, input: &'a [Token<'a>]) -> ParseResult> { let mut parser = Parser::new(arena, input); parser.parse_query() diff --git a/src/tests/analysis.rs b/src/tests/analysis.rs index 9539ebd..112929a 100644 --- a/src/tests/analysis.rs +++ b/src/tests/analysis.rs @@ -1,8 +1,5 @@ -use crate::{ - Session, Type, - parser::Parser, - prelude::{Analysis, AnalysisContext}, -}; +use crate::typing::analysis::AnalysisContext; +use crate::{Session, Type, parser::Parser}; #[test] fn test_infer_wrong_where_clause_1() { @@ -207,12 +204,13 @@ fn test_typecheck_datetime_contravariance_1() { .parse_expr() .unwrap(); - let mut analysis = Analysis::new(&session.arena, &session.options); + let event_type = session.options.event_type_info; + let mut analysis = session.analysis(); analysis .scope_mut() .entries - .insert("e".to_string(), session.options.event_type_info.clone()); + .insert("e".to_string(), event_type); // `e.time` is a `Type::DateTime` but it will typecheck if a `Type::Date` is expected insta::assert_yaml_snapshot!(analysis.analyze_expr( @@ -230,7 +228,7 @@ fn test_typecheck_datetime_contravariance_2() { .parse_expr() .unwrap(); - let mut analysis = Analysis::new(&session.arena, &session.options); + let mut analysis = session.analysis(); // `NOW()` is a `Type::DateTime` but it will typecheck if a `Type::Time` is expected insta::assert_yaml_snapshot!(analysis.analyze_expr( @@ -248,7 +246,7 @@ fn test_typecheck_datetime_contravariance_3() { .parse_expr() .unwrap(); - let mut analysis = Analysis::new(&session.arena, &session.options); + let mut analysis = session.analysis(); insta::assert_yaml_snapshot!(analysis.analyze_expr( &mut AnalysisContext::default(), @@ -265,7 +263,7 @@ fn test_typecheck_datetime_contravariance_4() { .parse_expr() .unwrap(); - let mut analysis = Analysis::new(&session.arena, &session.options); + let mut analysis = session.analysis(); insta::assert_yaml_snapshot!(analysis.analyze_expr( &mut AnalysisContext::default(), diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 0f5bb8b..ad63a45 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,9 +1,12 @@ -use crate::arena::ExprArena; +use crate::arena::Arena; use crate::ast::{Binding, Limit, Order, Query}; +use crate::prelude::{Type, Typed}; use crate::token::Operator; -use crate::{Attrs, ExprRef, SourceKind, Value}; +use crate::{Attrs, ExprRef, Raw, SourceKind, Value}; use ordered_float::OrderedFloat; use serde::Serialize; +use std::collections::BTreeMap; +use std::fmt::Debug; mod analysis; mod lexer; @@ -107,32 +110,53 @@ pub struct OrderByView { } impl ExprRef { - pub fn view(self, arena: &ExprArena) -> ExprView { - let node = arena.get(self); + pub fn view(self, arena: &Arena) -> ExprView { + let node = arena.exprs.get(self); let value = match node.value { - Value::Number(n) => ValueView::Number(*n), - Value::String(s) => ValueView::String(s.clone()), - Value::Bool(b) => ValueView::Bool(*b), - Value::Id(id) => ValueView::Id(id.clone()), - Value::Array(arr) => ValueView::Array(arr.iter().map(|e| e.view(arena)).collect()), - Value::Record(fields) => ValueView::Record( - fields - .iter() - .map(|f| FieldView { - attrs: f.attrs, - name: f.name.clone(), - value: f.expr.view(arena), + Value::Number(n) => ValueView::Number(n), + Value::String(s) => ValueView::String(arena.strings.get(s).to_owned()), + Value::Bool(b) => ValueView::Bool(b), + Value::Id(id) => ValueView::Id(arena.strings.get(id).to_owned()), + Value::Array(arr) => { + let mut values = Vec::with_capacity(arena.exprs.vec(arr).len()); + for idx in arena.exprs.vec_idxes(arr) { + let expr = arena.exprs.vec_get(arr, idx); + values.push(expr.view(arena)); + } + + ValueView::Array(values) + } + Value::Record(fields) => { + let mut values = Vec::with_capacity(arena.exprs.rec(fields).len()); + + for idx in arena.exprs.rec_idxes(fields) { + let field = arena.exprs.rec_get(fields, idx); + values.push(FieldView { + attrs: field.attrs, + name: arena.strings.get(field.name).to_owned(), + value: field.expr.view(arena), }) - .collect(), - ), + } + + ValueView::Record(values) + } Value::Access(access) => ValueView::Access(AccessView { target: Box::new(access.target.view(arena)), - field: access.field.clone(), - }), - Value::App(app) => ValueView::App(AppView { - func: app.func.clone(), - args: app.args.iter().map(|e| e.view(arena)).collect(), + field: arena.strings.get(access.field).to_owned(), }), + Value::App(app) => { + let mut args = Vec::with_capacity(arena.exprs.vec(app.args).len()); + + for idx in arena.exprs.vec_idxes(app.args) { + let expr = arena.exprs.vec_get(app.args, idx); + args.push(expr.view(arena)); + } + + ValueView::App(AppView { + func: arena.strings.get(app.func).to_owned(), + args, + }) + } Value::Binary(binary) => ValueView::Binary(BinaryView { lhs: Box::new(binary.lhs.view(arena)), operator: binary.operator, @@ -150,7 +174,10 @@ impl ExprRef { } impl Query { - pub fn view(self, arena: &ExprArena) -> QueryView { + pub fn view(self, arena: &Arena) -> QueryView<::View> + where + A: ProjectMeta, + { QueryView { attrs: self.attrs, sources: self @@ -178,8 +205,107 @@ impl Query { }), limit: self.limit, projection: self.projection.view(arena), - meta: self.meta, + meta: self.meta.project_meta(arena), distinct: self.distinct, } } } + +#[derive(Debug, Serialize)] +pub enum TypeView { + Unspecified, + Number, + String, + Bool, + Subject, + Date, + Time, + DateTime, + Custom(String), + Array(Box), + Record(BTreeMap), + App { + args: Vec, + result: Box, + aggregate: bool, + }, +} + +pub trait ProjectMeta { + type View: Debug + Serialize; + fn project_meta(&self, arena: &Arena) -> Self::View; +} + +impl ProjectMeta for Raw { + type View = Raw; + + fn project_meta(&self, _arena: &Arena) -> Self::View { + *self + } +} + +#[derive(Debug, Serialize)] +pub struct MetaView { + project: TypeView, + aggregate: bool, +} + +impl ProjectMeta for Typed { + type View = MetaView; + + fn project_meta(&self, arena: &Arena) -> Self::View { + MetaView { + project: project_type(arena, self.project), + aggregate: self.aggregate, + } + } +} + +fn project_type(arena: &Arena, tpe: Type) -> TypeView { + match tpe { + Type::Unspecified => TypeView::Unspecified, + Type::Number => TypeView::Number, + Type::String => TypeView::String, + Type::Bool => TypeView::Bool, + Type::Subject => TypeView::Subject, + Type::Date => TypeView::Date, + Type::Time => TypeView::Time, + Type::DateTime => TypeView::DateTime, + Type::Custom(key) => TypeView::Custom(arena.strings.get(key).to_owned()), + + Type::Array(arr) => { + TypeView::Array(Box::new(project_type(arena, arena.types.get_type(arr)))) + } + + Type::Record(rec) => { + let mut props = BTreeMap::new(); + + for (key, val) in arena.types.get_record(rec) { + props.insert( + arena.strings.get(*key).to_owned(), + project_type(arena, *val), + ); + } + + TypeView::Record(props) + } + + Type::App { + args, + result, + aggregate, + } => { + let mut args_view = Vec::new(); + + for val in arena.types.get_args(args.values) { + args_view.push(project_type(arena, *val)); + } + + TypeView::App { + args: args_view, + result: Box::new(project_type(arena, arena.types.get_type(result))), + aggregate, + } + } + } +} diff --git a/src/analysis.rs b/src/typing/analysis.rs similarity index 57% rename from src/analysis.rs rename to src/typing/analysis.rs index abde3c9..a9e359c 100644 --- a/src/analysis.rs +++ b/src/typing/analysis.rs @@ -1,13 +1,13 @@ use case_insensitive_hashmap::CaseInsensitiveHashMap; use rustc_hash::FxHashMap; use serde::{Serialize, ser::SerializeMap}; -use std::collections::hash_map::Entry; use std::{borrow::Cow, collections::HashSet, mem}; use unicase::Ascii; -use crate::arena::ExprArena; +use crate::arena::Arena; +use crate::typing::{Record, Type}; use crate::{ - App, Attrs, Binary, ExprRef, Field, FunArgs, Query, Raw, Source, SourceKind, Type, Value, + App, Attrs, Binary, ExprRef, Field, Query, Raw, RecRef, Source, SourceKind, StrRef, Value, error::AnalysisError, token::Operator, }; @@ -43,7 +43,7 @@ pub struct Typed { /// /// This is a convenience type alias for `Result` used throughout /// the static analysis module. -pub type AnalysisResult = std::result::Result; +pub type AnalysisResult = Result; /// Configuration options for static analysis. /// @@ -60,15 +60,6 @@ pub struct AnalysisOptions { /// This set allows users to register custom type names that can be used /// in type conversion expressions (e.g., `field AS CustomType`). Custom /// type names are case-insensitive. - /// - /// # Examples - /// - /// ``` - /// use eventql_parser::prelude::AnalysisOptions; - /// - /// let options = AnalysisOptions::default() - /// .add_custom_type("Foobar"); - /// ``` pub custom_types: HashSet>, } @@ -86,16 +77,6 @@ impl AnalysisOptions { /// # Returns /// /// Returns `self` to allow for method chaining. - /// - /// # Examples - /// - /// ``` - /// use eventql_parser::prelude::AnalysisOptions; - /// - /// let options = AnalysisOptions::default() - /// .add_custom_type("Timestamp") - /// .add_custom_type("UUID"); - /// ``` pub fn add_custom_type<'a>(mut self, value: impl Into>) -> Self { match value.into() { Cow::Borrowed(t) => self.custom_types.insert(Ascii::new(t.to_owned())), @@ -105,6 +86,7 @@ impl AnalysisOptions { self } + /// Creates empty analysis options with no functions, no event type, and no custom types. pub fn empty() -> Self { Self { default_scope: Scope::default(), @@ -114,379 +96,6 @@ impl AnalysisOptions { } } -impl Default for AnalysisOptions { - fn default() -> Self { - Self { - default_scope: Scope { - entries: CaseInsensitiveHashMap::from_iter([ - ( - "ABS", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "CEIL", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "FLOOR", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "ROUND", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "COS", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "EXP", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "POW", - Type::App { - args: vec![Type::Number, Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "SQRT", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "RAND", - Type::App { - args: vec![].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "PI", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "LOWER", - Type::App { - args: vec![Type::String].into(), - result: Box::new(Type::String), - aggregate: false, - }, - ), - ( - "UPPER", - Type::App { - args: vec![Type::String].into(), - result: Box::new(Type::String), - aggregate: false, - }, - ), - ( - "TRIM", - Type::App { - args: vec![Type::String].into(), - result: Box::new(Type::String), - aggregate: false, - }, - ), - ( - "LTRIM", - Type::App { - args: vec![Type::String].into(), - result: Box::new(Type::String), - aggregate: false, - }, - ), - ( - "RTRIM", - Type::App { - args: vec![Type::String].into(), - result: Box::new(Type::String), - aggregate: false, - }, - ), - ( - "LEN", - Type::App { - args: vec![Type::String].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "INSTR", - Type::App { - args: vec![Type::String].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "SUBSTRING", - Type::App { - args: vec![Type::String, Type::Number, Type::Number].into(), - result: Box::new(Type::String), - aggregate: false, - }, - ), - ( - "REPLACE", - Type::App { - args: vec![Type::String, Type::String, Type::String].into(), - result: Box::new(Type::String), - aggregate: false, - }, - ), - ( - "STARTSWITH", - Type::App { - args: vec![Type::String, Type::String].into(), - result: Box::new(Type::Bool), - aggregate: false, - }, - ), - ( - "ENDSWITH", - Type::App { - args: vec![Type::String, Type::String].into(), - result: Box::new(Type::Bool), - aggregate: false, - }, - ), - ( - "NOW", - Type::App { - args: vec![].into(), - result: Box::new(Type::DateTime), - aggregate: false, - }, - ), - ( - "YEAR", - Type::App { - args: vec![Type::Date].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "MONTH", - Type::App { - args: vec![Type::Date].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "DAY", - Type::App { - args: vec![Type::Date].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "HOUR", - Type::App { - args: vec![Type::Time].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "MINUTE", - Type::App { - args: vec![Type::Time].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "SECOND", - Type::App { - args: vec![Type::Time].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "WEEKDAY", - Type::App { - args: vec![Type::Date].into(), - result: Box::new(Type::Number), - aggregate: false, - }, - ), - ( - "IF", - Type::App { - args: vec![Type::Bool, Type::Unspecified, Type::Unspecified].into(), - result: Box::new(Type::Unspecified), - aggregate: false, - }, - ), - ( - "COUNT", - Type::App { - args: FunArgs { - values: vec![Type::Bool], - needed: 0, - }, - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "SUM", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "AVG", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "MIN", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "MAX", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "MEDIAN", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "STDDEV", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "VARIANCE", - Type::App { - args: vec![Type::Number].into(), - result: Box::new(Type::Number), - aggregate: true, - }, - ), - ( - "UNIQUE", - Type::App { - args: vec![Type::Unspecified].into(), - result: Box::new(Type::Unspecified), - aggregate: true, - }, - ), - ]), - }, - event_type_info: Type::Record(FxHashMap::from_iter([ - ("specversion".to_owned(), Type::String), - ("id".to_owned(), Type::String), - ("time".to_owned(), Type::DateTime), - ("source".to_owned(), Type::String), - ("subject".to_owned(), Type::Subject), - ("type".to_owned(), Type::String), - ("datacontenttype".to_owned(), Type::String), - ("data".to_owned(), Type::Unspecified), - ("predecessorhash".to_owned(), Type::String), - ("hash".to_owned(), Type::String), - ("traceparent".to_owned(), Type::String), - ("tracestate".to_owned(), Type::String), - ("signature".to_owned(), Type::String), - ])), - custom_types: HashSet::default(), - } - } -} - -/// Performs static analysis on an EventQL query. -/// -/// This function takes a raw (untyped) query and performs type checking and -/// variable scoping analysis. It validates that: -/// - All variables are properly declared -/// - Types match expected types in expressions and operations -/// - Field accesses are valid for their record types -/// - Function calls have the correct argument types -/// - Aggregate functions are only used in PROJECT INTO clauses -/// - Aggregate functions are not mixed with source-bound fields in projections -/// - Aggregate function arguments are source-bound fields (not constants or function results) -/// - Record literals are non-empty in projection contexts -/// -/// # Arguments -/// -/// * `options` - Configuration containing type information and default scope -/// * `query` - The raw query to analyze -/// -/// # Returns -/// -/// Returns a typed query on success, or an `AnalysisError` if type checking fails. -pub(crate) fn static_analysis( - arena: &ExprArena, - options: &AnalysisOptions, - query: Query, -) -> AnalysisResult> { - let mut analysis = Analysis::new(arena, options); - - analysis.analyze_query(query) -} - /// Represents a variable scope during static analysis. /// /// A scope tracks the variables and their types that are currently in scope @@ -548,7 +157,7 @@ pub struct AnalysisContext { /// This struct maintains the analysis state including scopes and type information. /// It can be used to perform type checking on individual expressions or entire queries. pub struct Analysis<'a> { - arena: &'a ExprArena, + arena: &'a mut Arena, /// The analysis options containing type information for functions and event types. options: &'a AnalysisOptions, /// Stack of previous scopes for nested scope handling. @@ -559,7 +168,7 @@ pub struct Analysis<'a> { impl<'a> Analysis<'a> { /// Creates a new analysis instance with the given options. - pub fn new(arena: &'a ExprArena, options: &'a AnalysisOptions) -> Self { + pub fn new(arena: &'a mut Arena, options: &'a AnalysisOptions) -> Self { Self { arena, options, @@ -648,7 +257,7 @@ impl<'a> Analysis<'a> { } if let Some(group_by) = &query.group_by { - let node = self.arena.get(group_by.expr); + let node = self.arena.exprs.get(group_by.expr); if !matches!(node.value, Value::Access(_)) { return Err(AnalysisError::ExpectFieldLiteral( node.attrs.pos.line, @@ -659,11 +268,12 @@ impl<'a> Analysis<'a> { self.analyze_expr(&mut ctx, group_by.expr, Type::Unspecified)?; if let Some(expr) = group_by.predicate.as_ref().copied() { - let node = self.arena.get(expr); ctx.allow_agg_func = true; ctx.use_agg_funcs = true; self.analyze_expr(&mut ctx, expr, Type::Bool)?; + + let node = self.arena.exprs.get(expr); if !self.expect_agg_expr(expr)? { return Err(AnalysisError::ExpectAggExpr( node.attrs.pos.line, @@ -680,7 +290,7 @@ impl<'a> Analysis<'a> { if let Some(order_by) = &query.order_by { self.analyze_expr(&mut ctx, order_by.expr, Type::Unspecified)?; - let node = self.arena.get(order_by.expr); + let node = self.arena.exprs.get(order_by.expr); if query.group_by.is_none() && !matches!(node.value, Value::Access(_)) { return Err(AnalysisError::ExpectFieldLiteral( node.attrs.pos.line, @@ -713,7 +323,9 @@ impl<'a> Analysis<'a> { fn analyze_source(&mut self, source: Source) -> AnalysisResult> { let kind = self.analyze_source_kind(source.kind)?; let tpe = match &kind { - SourceKind::Name(_) | SourceKind::Subject(_) => self.options.event_type_info.clone(), + SourceKind::Name(_) | SourceKind::Subject(_) => { + self.arena.types.alloc_type(self.options.event_type_info) + } SourceKind::Subquery(query) => self.projection_type(query), }; @@ -752,10 +364,10 @@ impl<'a> Analysis<'a> { ctx: &mut AnalysisContext, expr: ExprRef, ) -> AnalysisResult { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { Value::Record(record) => { - if record.is_empty() { + if self.arena.exprs.rec(record).is_empty() { return Err(AnalysisError::EmptyRecord( node.attrs.pos.line, node.attrs.pos.col, @@ -763,20 +375,20 @@ impl<'a> Analysis<'a> { } ctx.allow_agg_func = true; - let tpe = self.analyze_expr(ctx, node.node_ref, Type::Unspecified)?; + let tpe = self.analyze_expr(ctx, expr, Type::Unspecified)?; let mut chk_ctx = CheckContext { use_agg_func: ctx.use_agg_funcs, ..Default::default() }; - self.check_projection_on_record(&mut chk_ctx, record.as_slice())?; + self.check_projection_on_record(&mut chk_ctx, record)?; Ok(tpe) } Value::App(app) => { ctx.allow_agg_func = true; - let tpe = self.analyze_expr(ctx, node.node_ref, Type::Unspecified)?; + let tpe = self.analyze_expr(ctx, expr, Type::Unspecified)?; if ctx.use_agg_funcs { let mut chk_ctx = CheckContext { @@ -786,7 +398,7 @@ impl<'a> Analysis<'a> { self.check_projection_on_field_expr(&mut chk_ctx, expr)?; } else { - self.reject_constant_func(node.attrs, app)?; + self.reject_constant_func(node.attrs, &app)?; } Ok(tpe) @@ -798,13 +410,13 @@ impl<'a> Analysis<'a> { )), Value::Id(id) => { - if let Some(tpe) = self.scope.entries.get(id.as_str()).cloned() { + if let Some(tpe) = self.scope.entries.get(self.arena.strings.get(id)).cloned() { Ok(tpe) } else { Err(AnalysisError::VariableUndeclared( node.attrs.pos.line, node.attrs.pos.col, - id.clone(), + self.arena.strings.get(id).to_owned(), )) } } @@ -815,23 +427,27 @@ impl<'a> Analysis<'a> { )), Value::Access(access) => { - let mut current = self.arena.get(access.target); + let mut current = self.arena.exprs.get(access.target); loop { match current.value { Value::Id(name) => { - if !self.scope.entries.contains_key(name.as_str()) { + if !self + .scope + .entries + .contains_key(self.arena.strings.get(name)) + { return Err(AnalysisError::VariableUndeclared( current.attrs.pos.line, current.attrs.pos.col, - name.clone(), + self.arena.strings.get(name).to_owned(), )); } break; } - Value::Access(next) => current = self.arena.get(next.target), + Value::Access(next) => current = self.arena.exprs.get(next.target), _ => unreachable!(), } } @@ -839,21 +455,27 @@ impl<'a> Analysis<'a> { self.analyze_expr(ctx, expr, Type::Unspecified) } - _ => Err(AnalysisError::ExpectRecordOrSourcedProperty( - node.attrs.pos.line, - node.attrs.pos.col, - self.project_type(expr), - )), + _ => { + let tpe = self.project_type(expr); + + Err(AnalysisError::ExpectRecordOrSourcedProperty( + node.attrs.pos.line, + node.attrs.pos.col, + display_type(self.arena, tpe), + )) + } } } fn check_projection_on_record( &mut self, ctx: &mut CheckContext, - record: &[Field], + record: RecRef, ) -> AnalysisResult<()> { - for field in record { - self.check_projection_on_field(ctx, field)?; + for idx in 0..self.arena.exprs.rec(record).len() { + let field = self.arena.exprs.rec_get(record, idx); + + self.check_projection_on_field(ctx, &field)?; } Ok(()) @@ -872,12 +494,12 @@ impl<'a> Analysis<'a> { ctx: &mut CheckContext, expr: ExprRef, ) -> AnalysisResult<()> { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { Value::Number(_) | Value::String(_) | Value::Bool(_) => Ok(()), Value::Id(id) => { - if self.scope.entries.contains_key(id.as_str()) { + if self.scope.entries.contains_key(self.arena.strings.get(id)) { if ctx.use_agg_func { return Err(AnalysisError::UnallowedAggFuncUsageWithSrcField( node.attrs.pos.line, @@ -892,7 +514,9 @@ impl<'a> Analysis<'a> { } Value::Array(exprs) => { - for expr in exprs.iter().copied() { + for idx in self.arena.exprs.vec_idxes(exprs) { + let expr = self.arena.exprs.vec_get(exprs, idx); + self.check_projection_on_field_expr(ctx, expr)?; } @@ -900,8 +524,10 @@ impl<'a> Analysis<'a> { } Value::Record(fields) => { - for field in fields { - self.check_projection_on_field(ctx, field)?; + for idx in self.arena.exprs.rec_idxes(fields) { + let field = self.arena.exprs.rec_get(fields, idx); + + self.check_projection_on_field(ctx, &field)?; } Ok(()) @@ -910,8 +536,11 @@ impl<'a> Analysis<'a> { Value::Access(access) => self.check_projection_on_field_expr(ctx, access.target), Value::App(app) => { - if let Some(Type::App { aggregate, .. }) = - self.options.default_scope.entries.get(app.func.as_str()) + if let Some(Type::App { aggregate, .. }) = self + .options + .default_scope + .entries + .get(self.arena.strings.get(app.func)) { ctx.use_agg_func |= *aggregate; @@ -926,7 +555,9 @@ impl<'a> Analysis<'a> { return self.expect_agg_func(expr); } - for arg in app.args.iter().copied() { + for idx in self.arena.exprs.vec_idxes(app.args) { + let arg = self.arena.exprs.vec_get(app.args, idx); + self.invalidate_agg_func_usage(arg)?; } } @@ -940,18 +571,24 @@ impl<'a> Analysis<'a> { } Value::Unary(unary) => self.check_projection_on_field_expr(ctx, unary.expr), - Value::Group(expr) => self.check_projection_on_field_expr(ctx, *expr), + Value::Group(expr) => self.check_projection_on_field_expr(ctx, expr), } } fn expect_agg_func(&self, expr: ExprRef) -> AnalysisResult<()> { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); if let Value::App(app) = node.value && let Some(Type::App { aggregate: true, .. - }) = self.options.default_scope.entries.get(app.func.as_str()) + }) = self + .options + .default_scope + .entries + .get(self.arena.strings.get(app.func)) { - for arg in app.args.iter().copied() { + for idx in 0..self.arena.exprs.vec(app.args).len() { + let arg = self.arena.exprs.vec_get(app.args, idx); + self.ensure_agg_param_is_source_bound(arg)?; self.invalidate_agg_func_usage(arg)?; } @@ -966,10 +603,10 @@ impl<'a> Analysis<'a> { } fn expect_agg_expr(&self, expr: ExprRef) -> AnalysisResult { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { Value::Id(id) => { - if self.scope.entries.contains_key(id.as_str()) { + if self.scope.entries.contains_key(self.arena.strings.get(id)) { return Err(AnalysisError::UnallowedAggFuncUsageWithSrcField( node.attrs.pos.line, node.attrs.pos.col, @@ -978,7 +615,7 @@ impl<'a> Analysis<'a> { Ok(false) } - Value::Group(expr) => self.expect_agg_expr(*expr), + Value::Group(expr) => self.expect_agg_expr(expr), Value::Binary(binary) => { let lhs = self.expect_agg_expr(binary.lhs)?; let rhs = self.expect_agg_expr(binary.rhs)?; @@ -1003,13 +640,19 @@ impl<'a> Analysis<'a> { } fn ensure_agg_param_is_source_bound(&self, expr: ExprRef) -> AnalysisResult<()> { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { - Value::Id(id) if !self.options.default_scope.entries.contains_key(id.as_str()) => { + Value::Id(id) + if !self + .options + .default_scope + .entries + .contains_key(self.arena.strings.get(id)) => + { Ok(()) } Value::Access(access) => self.ensure_agg_param_is_source_bound(access.target), - Value::Binary(binary) => self.ensure_agg_binary_op_is_source_bound(node.attrs, *binary), + Value::Binary(binary) => self.ensure_agg_binary_op_is_source_bound(node.attrs, binary), Value::Unary(unary) => self.ensure_agg_param_is_source_bound(unary.expr), _ => Err(AnalysisError::ExpectSourceBoundProperty( @@ -1037,44 +680,60 @@ impl<'a> Analysis<'a> { } fn ensure_agg_binary_op_branch_is_source_bound(&self, expr: ExprRef) -> bool { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { - Value::Id(id) => !self.options.default_scope.entries.contains_key(id.as_str()), + Value::Id(id) => !self + .options + .default_scope + .entries + .contains_key(self.arena.strings.get(id)), Value::Array(exprs) => { - if exprs.is_empty() { + if self.arena.exprs.vec(exprs).is_empty() { return false; } - exprs - .iter() - .copied() - .all(|expr| self.ensure_agg_binary_op_branch_is_source_bound(expr)) + for idx in 0..self.arena.exprs.vec(exprs).len() { + let expr = self.arena.exprs.vec_get(exprs, idx); + + if !self.ensure_agg_binary_op_branch_is_source_bound(expr) { + return false; + } + } + + true } Value::Record(fields) => { - if fields.is_empty() { + if self.arena.exprs.rec(fields).is_empty() { return false; } - fields - .iter() - .all(|field| self.ensure_agg_binary_op_branch_is_source_bound(field.expr)) + for idx in 0..self.arena.exprs.rec(fields).len() { + let field = self.arena.exprs.rec_get(fields, idx); + + if !self.ensure_agg_binary_op_branch_is_source_bound(field.expr) { + return false; + } + } + + true } + Value::Access(access) => { self.ensure_agg_binary_op_branch_is_source_bound(access.target) } Value::Binary(binary) => self - .ensure_agg_binary_op_is_source_bound(node.attrs, *binary) + .ensure_agg_binary_op_is_source_bound(node.attrs, binary) .is_ok(), Value::Unary(unary) => self.ensure_agg_binary_op_branch_is_source_bound(unary.expr), - Value::Group(expr) => self.ensure_agg_binary_op_branch_is_source_bound(*expr), + Value::Group(expr) => self.ensure_agg_binary_op_branch_is_source_bound(expr), Value::Number(_) | Value::String(_) | Value::Bool(_) | Value::App(_) => false, } } fn invalidate_agg_func_usage(&self, expr: ExprRef) -> AnalysisResult<()> { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { Value::Number(_) | Value::String(_) @@ -1083,7 +742,9 @@ impl<'a> Analysis<'a> { | Value::Access(_) => Ok(()), Value::Array(exprs) => { - for expr in exprs.iter().copied() { + for idx in 0..self.arena.exprs.vec(exprs).len() { + let expr = self.arena.exprs.vec_get(exprs, idx); + self.invalidate_agg_func_usage(expr)?; } @@ -1091,7 +752,9 @@ impl<'a> Analysis<'a> { } Value::Record(fields) => { - for field in fields { + for idx in 0..self.arena.exprs.rec(fields).len() { + let field = self.arena.exprs.rec_get(fields, idx); + self.invalidate_agg_func_usage(field.expr)?; } @@ -1099,18 +762,22 @@ impl<'a> Analysis<'a> { } Value::App(app) => { - if let Some(Type::App { aggregate, .. }) = - self.options.default_scope.entries.get(app.func.as_str()) + if let Some(Type::App { aggregate, .. }) = self + .options + .default_scope + .entries + .get(self.arena.strings.get(app.func)) && *aggregate { return Err(AnalysisError::WrongAggFunUsage( node.attrs.pos.line, node.attrs.pos.col, - app.func.clone(), + self.arena.strings.get(app.func).to_owned(), )); } - for arg in app.args.iter().copied() { + for idx in 0..self.arena.exprs.vec(app.args).len() { + let arg = self.arena.exprs.vec_get(app.args, idx); self.invalidate_agg_func_usage(arg)?; } @@ -1123,12 +790,12 @@ impl<'a> Analysis<'a> { } Value::Unary(unary) => self.invalidate_agg_func_usage(unary.expr), - Value::Group(expr) => self.invalidate_agg_func_usage(*expr), + Value::Group(expr) => self.invalidate_agg_func_usage(expr), } } fn reject_constant_func(&self, attrs: Attrs, app: &App) -> AnalysisResult<()> { - if app.args.is_empty() { + if self.arena.exprs.vec(app.args).is_empty() { return Err(AnalysisError::ConstantExprInProjectIntoClause( attrs.pos.line, attrs.pos.col, @@ -1136,7 +803,9 @@ impl<'a> Analysis<'a> { } let mut errored = None; - for arg in app.args.iter().copied() { + for idx in 0..self.arena.exprs.vec(app.args).len() { + let arg = self.arena.exprs.vec_get(app.args, idx); + if let Err(e) = self.reject_constant_expr(arg) { if errored.is_none() { errored = Some(e); @@ -1153,13 +822,15 @@ impl<'a> Analysis<'a> { } fn reject_constant_expr(&self, expr: ExprRef) -> AnalysisResult<()> { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { - Value::Id(id) if self.scope.entries.contains_key(id.as_str()) => Ok(()), + Value::Id(id) if self.scope.entries.contains_key(self.arena.strings.get(id)) => Ok(()), Value::Array(exprs) => { let mut errored = None; - for expr in exprs.iter().copied() { + for idx in 0..self.arena.exprs.vec(exprs).len() { + let expr = self.arena.exprs.vec_get(exprs, idx); + if let Err(e) = self.reject_constant_expr(expr) { if errored.is_none() { errored = Some(e); @@ -1177,7 +848,9 @@ impl<'a> Analysis<'a> { Value::Record(fields) => { let mut errored = None; - for field in fields { + for idx in 0..self.arena.exprs.rec(fields).len() { + let field = self.arena.exprs.rec_get(fields, idx); + if let Err(e) = self.reject_constant_expr(field.expr) { if errored.is_none() { errored = Some(e); @@ -1198,9 +871,9 @@ impl<'a> Analysis<'a> { .or_else(|e| self.reject_constant_expr(binary.rhs).map_err(|_| e)), Value::Access(access) => self.reject_constant_expr(access.target), - Value::App(app) => self.reject_constant_func(node.attrs, app), + Value::App(app) => self.reject_constant_func(node.attrs, &app), Value::Unary(unary) => self.reject_constant_expr(unary.expr), - Value::Group(expr) => self.reject_constant_expr(*expr), + Value::Group(expr) => self.reject_constant_expr(expr), _ => Err(AnalysisError::ConstantExprInProjectIntoClause( node.attrs.pos.line, @@ -1242,53 +915,66 @@ impl<'a> Analysis<'a> { expr: ExprRef, mut expect: Type, ) -> AnalysisResult { - let node = self.arena.get(expr); + let node = self.arena.exprs.get(expr); match node.value { - Value::Number(_) => expect.check(node.attrs, Type::Number), - Value::String(_) => expect.check(node.attrs, Type::String), - Value::Bool(_) => expect.check(node.attrs, Type::Bool), + Value::Number(_) => self.arena.type_check(node.attrs, expect, Type::Number), + Value::String(_) => self.arena.type_check(node.attrs, expect, Type::String), + Value::Bool(_) => self.arena.type_check(node.attrs, expect, Type::Bool), Value::Id(id) => { - if let Some(tpe) = self.options.default_scope.entries.get(id.as_str()) { - expect.check(node.attrs, tpe.clone()) - } else if let Some(tpe) = self.scope.entries.get_mut(id.as_str()) { - let tmp = mem::take(tpe); - *tpe = tmp.check(node.attrs, expect)?; + if let Some(tpe) = self + .options + .default_scope + .entries + .get(self.arena.strings.get(id)) + .copied() + { + self.arena.type_check(node.attrs, expect, tpe) + } else if let Some(tpe) = self.scope.entries.get_mut(self.arena.strings.get(id)) { + *tpe = self.arena.type_check(node.attrs, mem::take(tpe), expect)?; - Ok(tpe.clone()) + Ok(*tpe) } else { Err(AnalysisError::VariableUndeclared( node.attrs.pos.line, node.attrs.pos.col, - id.to_owned(), + self.arena.strings.get(id).to_owned(), )) } } Value::Array(exprs) => { if matches!(expect, Type::Unspecified) { - for expr in exprs.iter().copied() { + for idx in self.arena.exprs.vec_idxes(exprs) { + let expr = self.arena.exprs.vec_get(exprs, idx); + expect = self.analyze_expr(ctx, expr, expect)?; } - return Ok(Type::Array(Box::new(expect))); + return Ok(self.arena.types.alloc_array_of(expect)); } match expect { - Type::Array(mut expect) => { - for expr in exprs.iter().copied() { - *expect = self.analyze_expr(ctx, expr, expect.as_ref().clone())?; + Type::Array(expect) => { + let mut expect = self.arena.types.get_type(expect); + for idx in 0..self.arena.exprs.vec(exprs).len() { + let expr = self.arena.exprs.vec_get(exprs, idx); + expect = self.analyze_expr(ctx, expr, expect)?; } - Ok(Type::Array(expect)) + Ok(self.arena.types.alloc_array_of(expect)) } - expect => Err(AnalysisError::TypeMismatch( - node.attrs.pos.line, - node.attrs.pos.col, - expect, - self.project_type(expr), - )), + expect => { + let tpe = self.project_type(expr); + + Err(AnalysisError::TypeMismatch( + node.attrs.pos.line, + node.attrs.pos.col, + display_type(self.arena, expect), + display_type(self.arena, tpe), + )) + } } } @@ -1296,89 +982,108 @@ impl<'a> Analysis<'a> { if matches!(expect, Type::Unspecified) { let mut record = FxHashMap::default(); - for field in fields { + for idx in 0..self.arena.exprs.rec(fields).len() { + let field = self.arena.exprs.rec_get(fields, idx); + record.insert( - field.name.clone(), + field.name, self.analyze_expr(ctx, field.expr, Type::Unspecified)?, ); } - return Ok(Type::Record(record)); + return Ok(Type::Record(self.arena.types.alloc_record(record))); } - match expect { - Type::Record(mut types) if fields.len() == types.len() => { - for field in fields { - if let Some(tpe) = types.remove(field.name.as_str()) { - types.insert( - field.name.clone(), - self.analyze_expr(ctx, field.expr, tpe)?, - ); - } else { - return Err(AnalysisError::FieldUndeclared( - field.attrs.pos.line, - field.attrs.pos.col, - field.name.clone(), - )); - } + if let Type::Record(rec) = expect + && self.arena.types.record_len(rec) == self.arena.exprs.rec(fields).len() + { + for idx in self.arena.exprs.rec_idxes(fields) { + let field = self.arena.exprs.rec_get(fields, idx); + + if let Some(tpe) = self.arena.types.record_get(rec, field.name) { + let new_tpe = self.analyze_expr(ctx, field.expr, tpe)?; + self.arena.types.record_set(rec, field.name, new_tpe); + continue; } - Ok(Type::Record(types)) + return Err(AnalysisError::FieldUndeclared( + field.attrs.pos.line, + field.attrs.pos.col, + self.arena.strings.get(field.name).to_owned(), + )); } - expect => Err(AnalysisError::TypeMismatch( - node.attrs.pos.line, - node.attrs.pos.col, - expect, - self.project_type(expr), - )), + return Ok(expect); } + + let tpe = self.project_type(expr); + + Err(AnalysisError::TypeMismatch( + node.attrs.pos.line, + node.attrs.pos.col, + display_type(self.arena, expect), + display_type(self.arena, tpe), + )) } - Value::Access(_) => Ok(self.analyze_access(node.attrs, node.node_ref, expect)?), + Value::Access(_) => Ok(self.analyze_access(node.attrs, expr, expect)?), Value::App(app) => { - if let Some(tpe) = self.options.default_scope.entries.get(app.func.as_str()) + if let Some(tpe) = self + .options + .default_scope + .entries + .get(self.arena.strings.get(app.func)) + .copied() && let Type::App { args, result, aggregate, } = tpe { - if !args.match_arg_count(app.args.len()) { + let args_actual_len = self.arena.exprs.vec(app.args).len(); + let args_decl_len = self.arena.types.get_args(args.values).len(); + + if !(args_actual_len >= args.needed && args_actual_len <= args_decl_len) { return Err(AnalysisError::FunWrongArgumentCount( node.attrs.pos.line, node.attrs.pos.col, - app.func.clone(), + self.arena.strings.get(app.func).to_owned(), )); } - if *aggregate && !ctx.allow_agg_func { + if aggregate && !ctx.allow_agg_func { return Err(AnalysisError::WrongAggFunUsage( node.attrs.pos.line, node.attrs.pos.col, - app.func.clone(), + self.arena.strings.get(app.func).to_owned(), )); } - if *aggregate && ctx.allow_agg_func { + if aggregate && ctx.allow_agg_func { ctx.use_agg_funcs = true; } - for (arg, tpe) in app.args.iter().copied().zip(args.values.iter().cloned()) { + let arg_types = self.arena.types.args_idxes(args.values); + let args_idxes = self.arena.exprs.vec_idxes(app.args); + for (val_idx, tpe_idx) in args_idxes.zip(arg_types) { + let arg = self.arena.exprs.vec_get(app.args, val_idx); + let tpe = self.arena.types.args_get(args.values, tpe_idx); + self.analyze_expr(ctx, arg, tpe)?; } if matches!(expect, Type::Unspecified) { - Ok(result.as_ref().clone()) + Ok(self.arena.types.get_type(result)) } else { - expect.check(node.attrs, result.as_ref().clone()) + self.arena + .type_check(node.attrs, expect, self.arena.types.get_type(result)) } } else { Err(AnalysisError::FuncUndeclared( node.attrs.pos.line, node.attrs.pos.col, - app.func.clone(), + self.arena.strings.get(app.func).to_owned(), )) } } @@ -1387,7 +1092,7 @@ impl<'a> Analysis<'a> { Operator::Add | Operator::Sub | Operator::Mul | Operator::Div => { self.analyze_expr(ctx, binary.lhs, Type::Number)?; self.analyze_expr(ctx, binary.rhs, Type::Number)?; - expect.check(node.attrs, Type::Number) + self.arena.type_check(node.attrs, expect, Type::Number) } Operator::Eq @@ -1397,7 +1102,7 @@ impl<'a> Analysis<'a> { | Operator::Gt | Operator::Gte => { let lhs_expect = self.analyze_expr(ctx, binary.lhs, Type::Unspecified)?; - let rhs_expect = self.analyze_expr(ctx, binary.rhs, lhs_expect.clone())?; + let rhs_expect = self.analyze_expr(ctx, binary.rhs, lhs_expect)?; // If the left side didn't have enough type information while the other did, // we replay another typecheck pass on the left side if the right side was conclusive @@ -1407,58 +1112,55 @@ impl<'a> Analysis<'a> { self.analyze_expr(ctx, binary.lhs, rhs_expect)?; } - expect.check(node.attrs, Type::Bool) + self.arena.type_check(node.attrs, expect, Type::Bool) } Operator::Contains => { - let lhs_expect = self.analyze_expr( - ctx, - binary.lhs, - Type::Array(Box::new(Type::Unspecified)), - )?; + let new_expect = self.arena.types.alloc_array_of(Type::Unspecified); + let lhs_expect = self.analyze_expr(ctx, binary.lhs, new_expect)?; let lhs_assumption = match lhs_expect { - Type::Array(inner) => *inner, + Type::Array(inner) => self.arena.types.get_type(inner), other => { return Err(AnalysisError::ExpectArray( node.attrs.pos.line, node.attrs.pos.col, - other, + display_type(self.arena, other), )); } }; - let rhs_expect = self.analyze_expr(ctx, binary.rhs, lhs_assumption.clone())?; + let rhs_expect = self.analyze_expr(ctx, binary.rhs, lhs_assumption)?; // If the left side didn't have enough type information while the other did, // we replay another typecheck pass on the left side if the right side was conclusive if matches!(lhs_assumption, Type::Unspecified) && !matches!(rhs_expect, Type::Unspecified) { - self.analyze_expr(ctx, binary.lhs, Type::Array(Box::new(rhs_expect)))?; + let new_expect = self.arena.types.alloc_array_of(rhs_expect); + self.analyze_expr(ctx, binary.lhs, new_expect)?; } - expect.check(node.attrs, Type::Bool) + self.arena.type_check(node.attrs, expect, Type::Bool) } Operator::And | Operator::Or | Operator::Xor => { self.analyze_expr(ctx, binary.lhs, Type::Bool)?; self.analyze_expr(ctx, binary.rhs, Type::Bool)?; - - expect.check(node.attrs, Type::Bool) + self.arena.type_check(node.attrs, expect, Type::Bool) } Operator::As => { - let rhs = self.arena.get(binary.rhs); + let rhs = self.arena.exprs.get(binary.rhs); if let Value::Id(name) = rhs.value { - return if let Some(tpe) = name_to_type(self.options, name) { + return if let Some(tpe) = name_to_type(self.arena, self.options, name) { // NOTE - we could check if it's safe to convert the left branch to that type Ok(tpe) } else { Err(AnalysisError::UnsupportedCustomType( rhs.attrs.pos.line, rhs.attrs.pos.col, - name.clone(), + self.arena.strings.get(name).to_owned(), )) }; } @@ -1474,18 +1176,18 @@ impl<'a> Analysis<'a> { Value::Unary(unary) => match unary.operator { Operator::Add | Operator::Sub => { self.analyze_expr(ctx, unary.expr, Type::Number)?; - expect.check(node.attrs, Type::Number) + self.arena.type_check(node.attrs, expect, Type::Number) } Operator::Not => { self.analyze_expr(ctx, unary.expr, Type::Bool)?; - expect.check(node.attrs, Type::Bool) + self.arena.type_check(node.attrs, expect, Type::Bool) } _ => unreachable!(), }, - Value::Group(expr) => Ok(self.analyze_expr(ctx, *expr, expect)?), + Value::Group(expr) => Ok(self.analyze_expr(ctx, expr, expect)?), } } @@ -1495,15 +1197,15 @@ impl<'a> Analysis<'a> { access: ExprRef, expect: Type, ) -> AnalysisResult { - struct State { + struct State { depth: u8, /// When true means we are into dynamically type object. dynamic: bool, - definition: Def, + definition: Def, } - impl State { - fn new(definition: Def) -> Self { + impl State { + fn new(definition: Def) -> Self { Self { depth: 0, dynamic: false, @@ -1512,29 +1214,74 @@ impl<'a> Analysis<'a> { } } - enum Def { - User(A), - System(B), + #[derive(Copy, Clone)] + struct Parent { + record: Record, + field: Option, + } + + enum Def { + User { parent: Parent, tpe: Type }, + System(Type), } - fn go<'a>( - scope: &'a mut Scope, - arena: &'a ExprArena, - sys: &'a AnalysisOptions, + fn go<'global>( + scope: &mut Scope, + arena: &'global mut Arena, + sys: &'global AnalysisOptions, expr: ExprRef, - ) -> AnalysisResult> { - let node = arena.get(expr); + ) -> AnalysisResult { + let node = arena.exprs.get(expr); match node.value { Value::Id(id) => { - if let Some(tpe) = sys.default_scope.entries.get(id.as_str()) { - Ok(State::new(Def::System(tpe))) - } else if let Some(tpe) = scope.entries.get_mut(id.as_str()) { - Ok(State::new(Def::User(tpe))) + if let Some(tpe) = sys + .default_scope + .entries + .get(arena.strings.get(id)) + .copied() + { + if matches!(tpe, Type::Record(_)) { + Ok(State::new(Def::System(tpe))) + } else { + Err(AnalysisError::ExpectRecord( + node.attrs.pos.line, + node.attrs.pos.col, + display_type(arena, tpe), + )) + } + } else if let Some(tpe) = scope.entries.get(arena.strings.get(id)).copied() { + if matches!(tpe, Type::Unspecified) { + let record = arena.types.instantiate_record(); + scope + .entries + .insert(arena.strings.get(id), Type::Record(record)); + Ok(State::new(Def::User { + parent: Parent { + record, + field: None, + }, + tpe: Type::Record(record), + })) + } else if let Type::Record(record) = tpe { + Ok(State::new(Def::User { + parent: Parent { + record, + field: None, + }, + tpe, + })) + } else { + Err(AnalysisError::ExpectRecord( + node.attrs.pos.line, + node.attrs.pos.col, + display_type(arena, tpe), + )) + } } else { Err(AnalysisError::VariableUndeclared( node.attrs.pos.line, node.attrs.pos.col, - id.clone(), + arena.strings.get(id).to_owned(), )) } } @@ -1542,7 +1289,8 @@ impl<'a> Analysis<'a> { let mut state = go(scope, arena, sys, access.target)?; // TODO - we should consider make that field and depth configurable. - let is_data_field = state.depth == 0 && access.field == "data"; + let is_data_field = + state.depth == 0 && arena.strings.get(access.field) == "data"; // TODO - we should consider make that behavior configurable. // the `data` property is where the JSON payload is located, which means @@ -1552,57 +1300,81 @@ impl<'a> Analysis<'a> { } match state.definition { - Def::User(tpe) => { + Def::User { parent, tpe } => { if matches!(tpe, Type::Unspecified) && state.dynamic { - *tpe = Type::Record(FxHashMap::from_iter([( - access.field.clone(), - Type::Unspecified, - )])); + let record = arena.types.instantiate_record(); + arena + .types + .record_set(record, access.field, Type::Unspecified); + + // TODO - this is impossible. Should return a proper error instead of panicking + if let Some(field) = parent.field { + arena.types.record_set( + parent.record, + field, + Type::Record(record), + ); + } + return Ok(State { depth: state.depth + 1, - definition: Def::User( - tpe.as_record_or_panic_mut() - .get_mut(access.field.as_str()) - .unwrap(), - ), + definition: Def::User { + parent: Parent { + record, + field: Some(access.field), + }, + tpe: Type::Unspecified, + }, ..state }); - } - - if let Type::Record(fields) = tpe { - return match fields.entry(access.field.clone()) { - Entry::Vacant(entry) => { - if state.dynamic || is_data_field { - return Ok(State { - depth: state.depth + 1, - definition: Def::User( - entry.insert(Type::Unspecified), - ), - ..state - }); - } - - Err(AnalysisError::FieldUndeclared( - node.attrs.pos.line, - node.attrs.pos.col, - access.field.clone(), - )) - } - - Entry::Occupied(entry) => { + } else if let Type::Record(record) = tpe { + return if let Some(tpe) = + arena.types.record_get(record, access.field) + { + Ok(State { + depth: state.depth + 1, + definition: Def::User { + parent: Parent { + record, + field: Some(access.field), + }, + tpe, + }, + ..state + }) + } else { + // TODO - that test seems useless because it can't be the data field and not be dynamic + if state.dynamic || is_data_field { + arena.types.record_set( + record, + access.field, + Type::Unspecified, + ); return Ok(State { depth: state.depth + 1, - definition: Def::User(entry.into_mut()), + definition: Def::User { + parent: Parent { + record, + field: Some(access.field), + }, + tpe: Type::Unspecified, + }, ..state }); } + + Err(AnalysisError::FieldUndeclared( + node.attrs.pos.line, + node.attrs.pos.col, + arena.strings.get(access.field).to_owned(), + )) }; } Err(AnalysisError::ExpectRecord( node.attrs.pos.line, node.attrs.pos.col, - tpe.clone(), + display_type(arena, tpe), )) } @@ -1610,13 +1382,13 @@ impl<'a> Analysis<'a> { if matches!(tpe, Type::Unspecified) && state.dynamic { return Ok(State { depth: state.depth + 1, - definition: Def::System(&Type::Unspecified), + definition: Def::System(Type::Unspecified), ..state }); } - if let Type::Record(fields) = tpe { - if let Some(field) = fields.get(access.field.as_str()) { + if let Type::Record(rec) = tpe { + if let Some(field) = arena.types.record_get(rec, access.field) { return Ok(State { depth: state.depth + 1, definition: Def::System(field), @@ -1627,14 +1399,14 @@ impl<'a> Analysis<'a> { return Err(AnalysisError::FieldUndeclared( node.attrs.pos.line, node.attrs.pos.col, - access.field.clone(), + arena.strings.get(access.field).to_owned(), )); } Err(AnalysisError::ExpectRecord( node.attrs.pos.line, node.attrs.pos.col, - tpe.clone(), + display_type(arena, tpe), )) } } @@ -1654,31 +1426,42 @@ impl<'a> Analysis<'a> { let state = go(&mut self.scope, self.arena, self.options, access)?; match state.definition { - Def::User(tpe) => { - let tmp = mem::take(tpe); - *tpe = tmp.check(attrs, expect)?; + Def::User { parent, tpe } => { + let new_tpe = self.arena.type_check(attrs, tpe, expect)?; - Ok(tpe.clone()) + if let Some(field) = parent.field { + self.arena.types.record_set(parent.record, field, new_tpe); + } + + Ok(new_tpe) } - Def::System(tpe) => tpe.clone().check(attrs, expect), + Def::System(tpe) => self.arena.type_check(attrs, tpe, expect), } } - fn projection_type(&self, query: &Query) -> Type { + fn projection_type(&mut self, query: &Query) -> Type { self.project_type(query.projection) } - fn project_type(&self, node: ExprRef) -> Type { - match self.arena.get(node).value { + fn project_type(&mut self, node: ExprRef) -> Type { + match self.arena.exprs.get(node).value { Value::Number(_) => Type::Number, Value::String(_) => Type::String, Value::Bool(_) => Type::Bool, Value::Id(id) => { - if let Some(tpe) = self.options.default_scope.entries.get(id.as_str()) { - tpe.clone() - } else if let Some(tpe) = self.scope.entries.get(id.as_str()) { - tpe.clone() + if let Some(tpe) = self + .options + .default_scope + .entries + .get(self.arena.strings.get(id)) + .copied() + { + tpe + } else if let Some(tpe) = + self.scope.entries.get(self.arena.strings.get(id)).copied() + { + tpe } else { Type::Unspecified } @@ -1686,7 +1469,8 @@ impl<'a> Analysis<'a> { Value::Array(exprs) => { let mut project = Type::Unspecified; - for expr in exprs.iter().copied() { + for idx in self.arena.exprs.vec_idxes(exprs) { + let expr = self.arena.exprs.vec_get(exprs, idx); let tmp = self.project_type(expr); if !matches!(tmp, Type::Unspecified) { @@ -1695,20 +1479,25 @@ impl<'a> Analysis<'a> { } } - Type::Array(Box::new(project)) + self.arena.types.alloc_array_of(project) + } + Value::Record(fields) => { + let mut props = FxHashMap::default(); + + for idx in self.arena.exprs.rec_idxes(fields) { + let field = self.arena.exprs.rec_get(fields, idx); + let tpe = self.project_type(field.expr); + props.insert(field.name, tpe); + } + + Type::Record(self.arena.types.alloc_record(props)) } - Value::Record(fields) => Type::Record( - fields - .iter() - .map(|field| (field.name.clone(), self.project_type(field.expr))) - .collect(), - ), Value::Access(access) => { let tpe = self.project_type(access.target); - if let Type::Record(fields) = tpe { - fields - .get(access.field.as_str()) - .cloned() + if let Type::Record(record) = tpe { + self.arena + .types + .record_get(record, access.field) .unwrap_or_default() } else { Type::Unspecified @@ -1718,14 +1507,14 @@ impl<'a> Analysis<'a> { .options .default_scope .entries - .get(app.func.as_str()) - .cloned() + .get(self.arena.strings.get(app.func)) + .copied() .unwrap_or_default(), Value::Binary(binary) => match binary.operator { Operator::Add | Operator::Sub | Operator::Mul | Operator::Div => Type::Number, Operator::As => { - if let Value::Id(n) = self.arena.get(binary.rhs).value - && let Some(tpe) = name_to_type(self.options, n.as_str()) + if let Value::Id(n) = self.arena.exprs.get(binary.rhs).value + && let Some(tpe) = name_to_type(self.arena, self.options, n) { tpe } else { @@ -1761,7 +1550,125 @@ impl<'a> Analysis<'a> { | Operator::Contains | Operator::As => unreachable!(), }, - Value::Group(expr) => self.project_type(*expr), + Value::Group(expr) => self.project_type(expr), + } + } +} + +impl Arena { + /// Checks if two types are the same. + /// + /// * If `this` is `Type::Unspecified` then `self` is updated to the more specific `Type`. + /// * If `this` is `Type::Subject` and is checked against a `Type::String` then `self` is updated to `Type::String` + fn type_check(&mut self, attrs: Attrs, this: Type, other: Type) -> Result { + match (this, other) { + (Type::Unspecified, other) => Ok(other), + (this, Type::Unspecified) => Ok(this), + (Type::Subject, Type::Subject) => Ok(Type::Subject), + + // Subjects are strings so there is no reason to reject a type + // when compared to a string. However, when it happens, we demote + // a subject to a string. + (Type::Subject, Type::String) => Ok(Type::String), + (Type::String, Type::Subject) => Ok(Type::String), + + (Type::Number, Type::Number) => Ok(Type::Number), + (Type::String, Type::String) => Ok(Type::String), + (Type::Bool, Type::Bool) => Ok(Type::Bool), + (Type::Date, Type::Date) => Ok(Type::Date), + (Type::Time, Type::Time) => Ok(Type::Time), + (Type::DateTime, Type::DateTime) => Ok(Type::DateTime), + + // `DateTime` can be implicitly cast to `Date` or `Time` + (Type::DateTime, Type::Date) => Ok(Type::Date), + (Type::Date, Type::DateTime) => Ok(Type::Date), + (Type::DateTime, Type::Time) => Ok(Type::Time), + (Type::Time, Type::DateTime) => Ok(Type::Time), + (Type::Custom(a), Type::Custom(b)) if self.strings.eq_ignore_ascii_case(a, b) => { + Ok(Type::Custom(a)) + } + (Type::Array(a), Type::Array(b)) => { + let a = self.types.get_type(a); + let b = self.types.get_type(b); + let tpe = self.type_check(attrs, a, b)?; + + Ok(self.types.alloc_array_of(tpe)) + } + + (Type::Record(a), Type::Record(b)) if self.types.records_have_same_keys(a, b) => { + let mut map_a = mem::take(&mut self.types.records[a.0]); + let mut map_b = mem::take(&mut self.types.records[b.0]); + + for (bk, bv) in map_b.iter_mut() { + let av = map_a.get_mut(bk).unwrap(); + let new_tpe = self.type_check(attrs, *av, *bv)?; + + *av = new_tpe; + *bv = new_tpe; + } + + self.types.records[a.0] = map_a; + self.types.records[b.0] = map_b; + + Ok(Type::Record(a)) + } + + ( + Type::App { + args: a_args, + result: a_res, + aggregate: a_agg, + }, + Type::App { + args: b_args, + result: b_res, + aggregate: b_agg, + }, + ) if self.types.get_args(a_args.values).len() + == self.types.get_args(b_args.values).len() + && a_agg == b_agg => + { + if self.types.get_args(a_args.values).is_empty() { + let a = self.types.get_type(a_res); + let b = self.types.get_type(b_res); + let new_res = self.type_check(attrs, a, b)?; + + return Ok(Type::App { + args: a_args, + result: self.types.register_type(new_res), + aggregate: a_agg, + }); + } + + let mut vec_a = mem::take(&mut self.types.args[a_args.values.0]); + let mut vec_b = mem::take(&mut self.types.args[b_args.values.0]); + + for (a, b) in vec_a.iter_mut().zip(vec_b.iter_mut()) { + let new_tpe = self.type_check(attrs, *a, *b)?; + *a = new_tpe; + *b = new_tpe; + } + + self.types.args[a_args.values.0] = vec_a; + self.types.args[b_args.values.0] = vec_b; + + let res_a = self.types.get_type(a_res); + let res_b = self.types.get_type(b_res); + let new_tpe = self.type_check(attrs, res_a, res_b)?; + + Ok(Type::App { + args: a_args, + result: self.types.register_type(new_tpe), + aggregate: a_agg, + }) + } + + (this, other) => Err(AnalysisError::TypeMismatch( + attrs.pos.line, + attrs.pos.col, + display_type(self, this), + display_type(self, other), + )), } } } @@ -1785,19 +1692,13 @@ impl<'a> Analysis<'a> { /// - `"date"` → [`Type::Date`] /// - `"time"` → [`Type::Time`] /// - `"datetime"` → [`Type::DateTime`] -/// -/// # Examples -/// -/// ``` -/// use eventql_parser::Type; -/// use eventql_parser::prelude::{AnalysisOptions, name_to_type}; -/// -/// let opts = AnalysisOptions::default(); -/// assert!(matches!(name_to_type(&opts, "String"), Some(Type::String))); -/// assert!(matches!(name_to_type(&opts, "INT"), Some(Type::Number))); -/// assert!(name_to_type(&opts, "unknown").is_none()); -/// ``` -pub fn name_to_type(opts: &AnalysisOptions, name: &str) -> Option { +pub(crate) fn name_to_type( + arena: &Arena, + opts: &AnalysisOptions, + name_ref: StrRef, +) -> Option { + let name = arena.strings.get(name_ref); + if name.eq_ignore_ascii_case("string") { Some(Type::String) } else if name.eq_ignore_ascii_case("int") || name.eq_ignore_ascii_case("float64") { @@ -1812,8 +1713,85 @@ pub fn name_to_type(opts: &AnalysisOptions, name: &str) -> Option { Some(Type::DateTime) } else if opts.custom_types.contains(&Ascii::new(name.to_owned())) { // ^ Sad we have to allocate here for no reason - Some(Type::Custom(name.to_owned())) + + Some(Type::Custom(name_ref)) } else { None } } + +pub(crate) fn display_type(arena: &Arena, tpe: Type) -> String { + fn go(buffer: &mut String, arena: &Arena, tpe: Type) { + match tpe { + Type::Unspecified => buffer.push_str("Any"), + Type::Number => buffer.push_str("Number"), + Type::String => buffer.push_str("String"), + Type::Bool => buffer.push_str("Bool"), + Type::Subject => buffer.push_str("Subject"), + Type::Date => buffer.push_str("Date"), + Type::Time => buffer.push_str("Time"), + Type::DateTime => buffer.push_str("DateTime"), + Type::Custom(n) => buffer.push_str(arena.strings.get(n)), + + Type::Array(tpe) => { + buffer.push_str("[]"); + go(buffer, arena, arena.types.get_type(tpe)); + } + + Type::Record(map) => { + let map = arena.types.get_record(map); + + buffer.push_str("{ "); + + for (idx, (name, value)) in map.iter().enumerate() { + if idx != 0 { + buffer.push_str(", "); + } + + buffer.push_str(arena.strings.get(*name)); + buffer.push_str(": "); + + go(buffer, arena, *value); + } + + buffer.push_str(" }"); + } + + Type::App { + args, + result, + aggregate, + } => { + let fun_args = arena.types.get_args(args.values); + buffer.push('('); + + for (idx, arg) in fun_args.iter().copied().enumerate() { + if idx != 0 { + buffer.push_str(", "); + } + + go(buffer, arena, arg); + + if idx + 1 > args.needed { + buffer.push('?'); + } + } + + buffer.push(')'); + + if aggregate { + buffer.push_str(" => "); + } else { + buffer.push_str(" -> "); + } + + go(buffer, arena, arena.types.get_type(result)); + } + } + } + + let mut buffer = String::new(); + go(&mut buffer, arena, tpe); + + buffer +} diff --git a/src/typing/mod.rs b/src/typing/mod.rs new file mode 100644 index 0000000..4305319 --- /dev/null +++ b/src/typing/mod.rs @@ -0,0 +1,104 @@ +use crate::StrRef; +use serde::Serialize; + +pub mod analysis; + +/// A reference to a type stored in the [`TypeArena`](crate::arena::TypeArena). +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)] +pub struct TypeRef(pub(crate) usize); + +/// A reference to a record definition stored in the [`TypeArena`](crate::arena::TypeArena). +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)] +pub struct Record(pub(crate) usize); + +/// A reference to a function argument type list stored in the [`TypeArena`](crate::arena::TypeArena). +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)] +pub struct ArgsRef(pub(crate) usize); +/// Represents function argument types with optional parameter support. +/// +/// This type allows defining functions that have both required and optional parameters. +/// The `needed` field specifies how many arguments are required, while `values` contains +/// all possible argument types (both required and optional). +/// +#[derive(Debug, Serialize, PartialEq, Eq, Hash, Clone, Copy)] +pub struct FunArgs { + /// All argument types, including both required and optional parameters + pub values: ArgsRef, + /// Number of required arguments (must be <= values.len()) + pub needed: usize, +} + +/// Type information for expressions. +/// +/// This enum represents the type of expressions in the EventQL type system. +/// Types can be inferred during semantic analysis or left as `Unspecified`. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, Serialize)] +pub enum Type { + /// Type has not been determined yet + #[default] + Unspecified, + /// Numeric type (f64) + Number, + /// String type + String, + /// Boolean type + Bool, + /// Array type + Array(TypeRef), + /// Record (object) type + Record(Record), + /// Subject pattern type + Subject, + /// Function type with support for optional parameters. + /// + /// The `args` field uses [`FunArgs`] to support both required and optional parameters. + /// Optional parameters are indicated when `args.needed < args.values.len()`. + App { + /// Function argument types, supporting optional parameters + args: FunArgs, + /// Return type of the function + result: TypeRef, + /// Whether this is an aggregate function (operates on grouped data) + aggregate: bool, + }, + /// Date type (e.g., `2026-01-03`) + /// + /// Used when a field is explicitly converted to a date using the `AS DATE` syntax. + Date, + /// Time type (e.g., `13:45:39`) + /// + /// Used when a field is explicitly converted to a time using the `AS TIME` syntax. + Time, + /// DateTime type (e.g., `2026-01-01T13:45:39Z`) + /// + /// Used when a field is explicitly converted to a datetime using the `AS DATETIME` syntax. + DateTime, + /// Custom type not defined in the EventQL reference + /// + /// Used when a field is converted to a custom type registered in the analysis options. + /// The string contains the custom type name as it appears in the query. + /// + /// # Examples + /// + /// ``` + /// use eventql_parser::Session; + /// + /// let mut session = Session::builder() + /// .declare_custom_type("CustomTimestamp") + /// .build(); + /// let query = session.parse("FROM e IN events PROJECT INTO { ts: e.data.timestamp as CustomTimestamp }").unwrap(); + /// let typed_query = session.run_static_analysis(query).unwrap(); + /// ``` + Custom(StrRef), +} + +impl Type { + /// Returns the inner [`Record`] reference, panicking if this is not a `Type::Record`. + pub fn as_record_or_panic(&self) -> Record { + if let Self::Record(r) = self { + return *r; + } + + panic!("expected record type, got {:?}", self); + } +}