Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
b1e7e58
feat(string): implement RopeString with thin vtable and lazy flattening
akash-R-A-J Mar 11, 2026
10e9fce
fix: cross-platform path resolution and clippy warnings
akash-R-A-J Mar 12, 2026
e1c8c53
docs: fix broken intra-doc links in boa_string
akash-R-A-J Mar 12, 2026
8c117b4
feat(string): implement robust rope rebalancing and fix memory safety…
akash-R-A-J Mar 12, 2026
5bafd8e
fix(string): resolve seg fault, memory leak, and recursion limits
akash-R-A-J Mar 13, 2026
388c014
refactor(string): refine rope implementation and cleanup vtable integ…
akash-R-A-J Mar 14, 2026
6145e31
fix(string): escape brackets in rope doc comments
akash-R-A-J Mar 14, 2026
c12a38e
perf(string): switch OnceLock to OnceCell
akash-R-A-J Mar 14, 2026
6024453
fix(string): revert OnceCell to OnceLock due to reentrancy panic
akash-R-A-J Mar 14, 2026
3b0d198
fix(string): resolve rope SIGSEGV via raw-buffer cache
akash-R-A-J Mar 15, 2026
0d31e83
fix(string): ensure 32-bit compatibility for rope thresholds
akash-R-A-J Mar 15, 2026
b8cdbd0
refactor(string): unify JsStr as enum and support unflattened ropes
akash-R-A-J Mar 15, 2026
76acd1e
perf(string): optimize rope performance with leaf sinking
akash-R-A-J Mar 16, 2026
951b4da
docs: fix rustdoc intra-doc links for private items
akash-R-A-J Mar 16, 2026
fe779dc
fix(string): stabilize rope implementation and address edge cases
akash-R-A-J Mar 17, 2026
a0b8538
Merge branch 'main' into feat/rope-strings
akash-R-A-J Mar 26, 2026
0024baf
refactor(string): maintain rope laziness in date, number and regexp b…
akash-R-A-J Apr 1, 2026
f157f37
Merge branch 'feat/rope-strings' of https://github.com/akash-R-A-J/bo…
akash-R-A-J Apr 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions cli/src/debug/string.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use boa_engine::{
Context, JsNativeError, JsObject, JsResult, JsValue, NativeFunction, js_string,
object::ObjectInitializer, property::Attribute, string::JsStrVariant,
object::ObjectInitializer, property::Attribute, string::JsStr,
};

fn storage(_: &JsValue, args: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
Expand Down Expand Up @@ -34,9 +34,10 @@ fn encoding(_: &JsValue, args: &[JsValue], _: &mut Context) -> JsResult<JsValue>
};

let str = string.as_str();
let encoding = match str.variant() {
JsStrVariant::Latin1(_) => "latin1",
JsStrVariant::Utf16(_) => "utf16",
let encoding = match str {
JsStr::Latin1(_) => "latin1",
JsStr::Utf16(_) => "utf16",
JsStr::Rope(_) => "rope",
};
Ok(js_string!(encoding).into())
}
Expand All @@ -55,9 +56,10 @@ fn summary(_: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsV
};

let storage = if string.is_static() { "static" } else { "heap" };
let encoding = match string.as_str().variant() {
JsStrVariant::Latin1(_) => "latin1",
JsStrVariant::Utf16(_) => "utf16",
let encoding = match string.as_str() {
JsStr::Latin1(_) => "latin1",
JsStr::Utf16(_) => "utf16",
JsStr::Rope(_) => "rope",
};

let summary = ObjectInitializer::new(context)
Expand Down
15 changes: 10 additions & 5 deletions core/engine/src/builtins/date/utils.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::{JsStr, JsString, context::HostHooks, js_string, value::IntegerOrInfinity};
use boa_macros::js_str;
use boa_string::JsStrVariant;
use std::slice::Iter;
use std::str;
use std::{borrow::Cow, iter::Peekable};
Expand Down Expand Up @@ -753,22 +752,28 @@ pub(super) fn pad_six(t: u32, output: &mut [u8; 6]) -> JsStr<'_> {
/// [spec-format]: https://tc39.es/ecma262/#sec-date-time-string-format
pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option<i64> {
// All characters must be ASCII so we can return early if we find a non-ASCII character.
let owned_js_str = date.as_str();
let date = match owned_js_str.variant() {
JsStrVariant::Latin1(s) => {
let date = match date.as_str() {
JsStr::Latin1(s) => {
if !s.is_ascii() {
return None;
}
// SAFETY: Since all characters are ASCII we can safely convert this into str.
Cow::Borrowed(unsafe { str::from_utf8_unchecked(s) })
}
JsStrVariant::Utf16(s) => {
JsStr::Utf16(s) => {
let date = String::from_utf16(s).ok()?;
if !date.is_ascii() {
return None;
}
Cow::Owned(date)
}
JsStr::Rope(_) => {
let date = date.as_str().to_std_string().ok()?;
if !date.is_ascii() {
return None;
}
Cow::Owned(date)
}
};

// Date Time String Format: 'YYYY-MM-DDTHH:mm:ss.sssZ'
Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/builtins/intl/segmenter/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ impl SegmentIterator {
.downcast_ref::<Segmenter>()
.js_expect("segment iterator object should contain a segmenter")
.ok()?;
let mut segments = segmenter.native.segment(string.variant());
let mut segments = segmenter.native.segment(string.as_str());
// the first elem is always 0.
segments.next();
segments
Expand Down
15 changes: 8 additions & 7 deletions core/engine/src/builtins/intl/segmenter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::{
string::StaticJsStrings,
};
use boa_gc::{Finalize, Trace};
use boa_string::JsStrVariant;
use boa_string::JsStr;
use icu_collator::provider::CollationDiacriticsV1;
use icu_locale::Locale;
use icu_segmenter::{
Expand Down Expand Up @@ -63,12 +63,9 @@ impl NativeSegmenter {

/// Segment the passed string, returning an iterator with the index boundaries
/// of the segments.
pub(crate) fn segment<'l, 's>(
&'l self,
input: JsStrVariant<'s>,
) -> NativeSegmentIterator<'l, 's> {
pub(crate) fn segment<'l, 's>(&'l self, input: JsStr<'s>) -> NativeSegmentIterator<'l, 's> {
match input {
JsStrVariant::Latin1(input) => match self {
JsStr::Latin1(input) => match self {
Self::Grapheme(g) => {
NativeSegmentIterator::GraphemeLatin1(g.as_borrowed().segment_latin1(input))
}
Expand All @@ -79,7 +76,7 @@ impl NativeSegmenter {
NativeSegmentIterator::SentenceLatin1(s.as_borrowed().segment_latin1(input))
}
},
JsStrVariant::Utf16(input) => match self {
JsStr::Utf16(input) => match self {
Self::Grapheme(g) => {
NativeSegmentIterator::GraphemeUtf16(g.as_borrowed().segment_utf16(input))
}
Expand All @@ -90,6 +87,10 @@ impl NativeSegmenter {
NativeSegmentIterator::SentenceUtf16(s.as_borrowed().segment_utf16(input))
}
},
JsStr::Rope(rope) => {
let flat = boa_string::vtable::rope::flatten_rope(rope.header);
self.segment(flat.as_str())
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/builtins/intl/segmenter/segments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl Segments {
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
let (range, is_word_like) = {
let mut segments = segmenter.native.segment(segments.string.variant());
let mut segments = segmenter.native.segment(segments.string.as_str());
std::iter::from_fn(|| segments.next().map(|i| (i, segments.is_word_like())))
.tuple_windows()
.find(|((i, _), (j, _))| (*i..*j).contains(&n))
Expand Down
11 changes: 7 additions & 4 deletions core/engine/src/builtins/number/globals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::{
};

use boa_macros::js_str;
use boa_string::JsStrVariant;

/// Builtin javascript 'isFinite(number)' function.
///
Expand Down Expand Up @@ -359,13 +358,17 @@ pub(crate) fn parse_float(
return Ok(JsValue::nan());
}

let value = match trimmed_string.variant() {
JsStrVariant::Latin1(s) => fast_float2::parse_partial::<f64, _>(s),
JsStrVariant::Utf16(s) => {
let value = match input_string.as_str() {
JsStr::Latin1(s) => fast_float2::parse_partial::<f64, _>(s),
JsStr::Utf16(s) => {
// TODO: Explore adding direct UTF-16 parsing support to fast_float2.
let s = String::from_utf16_lossy(s);
fast_float2::parse_partial::<f64, _>(s.as_bytes())
}
JsStr::Rope(_) => {
let s = input_string.as_str().to_std_string_lossy();
fast_float2::parse_partial::<f64, _>(s.as_bytes())
}
};

Ok(value.map_or_else(
Expand Down
26 changes: 19 additions & 7 deletions core/engine/src/builtins/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::{
object::{CONSTRUCTOR, JsObject, internal_methods::get_prototype_from_constructor},
property::Attribute,
realm::Realm,
string::{CodePoint, CommonJsStringBuilder, JsStrVariant, StaticJsStrings},
string::{CodePoint, CommonJsStringBuilder, JsStr, StaticJsStrings},
symbol::JsSymbol,
value::JsValue,
};
Expand Down Expand Up @@ -1157,20 +1157,32 @@ impl RegExp {

// 13.b. Let inputIndex be the index into input of the character that was obtained from element lastIndex of S.
// 13.c. Let r be matcher(input, inputIndex).
let r: Option<regress::Match> = match (full_unicode, input.as_str().variant()) {
(true | false, JsStrVariant::Latin1(_)) => {
let r: Option<regress::Match> = match (full_unicode, input.as_str()) {
(true | false, JsStr::Latin1(input)) => {
// TODO: Currently regress does not support latin1 encoding.
let input = input.to_vec();

let input = input.iter().map(|&b| u16::from(b)).collect::<Vec<u16>>();
// NOTE: We can use the faster ucs2 variant since there will never be two byte unicode.
matcher.find_from_ucs2(&input, last_index as usize).next()
}
(true, JsStrVariant::Utf16(input)) => {
(true, JsStr::Utf16(input)) => {
matcher.find_from_utf16(input, last_index as usize).next()
}
(false, JsStrVariant::Utf16(input)) => {
(false, JsStr::Utf16(input)) => {
matcher.find_from_ucs2(input, last_index as usize).next()
}
(_, JsStr::Rope(_)) => {
// Collect rope into Vec<u16> via iterator (same pattern as Latin1)
let input_vec: Vec<u16> = input.iter().collect();
if full_unicode {
matcher
.find_from_utf16(&input_vec, last_index as usize)
.next()
} else {
matcher
.find_from_ucs2(&input_vec, last_index as usize)
.next()
}
}
};

let Some(match_value) = r else {
Expand Down
7 changes: 4 additions & 3 deletions core/engine/src/builtins/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -653,18 +653,19 @@ impl String {
let this = this.require_object_coercible()?;

// 2. Let S be ? ToString(O).
let mut string = this.to_string(context)?;
let mut strings = Vec::with_capacity(args.len() + 1);
strings.push(this.to_string(context)?);

// 3. Let R be S.
// 4. For each element next of args, do
for arg in args {
// a. Let nextString be ? ToString(next).
// b. Set R to the string-concatenation of R and nextString.
string = js_string!(&string, &arg.to_string(context)?);
strings.push(arg.to_string(context)?);
}

// 5. Return R.
Ok(JsValue::new(string))
Ok(JsValue::new(JsString::concat_array_strings(&strings)))
}

/// `String.prototype.repeat( count )`
Expand Down
4 changes: 3 additions & 1 deletion core/engine/src/module/loader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ use std::cell::RefCell;
use std::path::{Component, Path, PathBuf};
use std::rc::Rc;

#[cfg(target_family = "windows")]
use cow_utils::CowUtils;
use dynify::dynify;
use rustc_hash::FxHashMap;

Expand Down Expand Up @@ -62,7 +64,7 @@ pub fn resolve_module_specifier(

// On Windows, also replace `/` with `\`. JavaScript imports use `/` as path separator.
#[cfg(target_family = "windows")]
let specifier = cow_utils::CowUtils::cow_replace(&*specifier, '/', "\\");
let specifier = specifier.cow_replace('/', "\\");

let short_path = Path::new(&*specifier);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this have to do with rope strings?


Expand Down
10 changes: 5 additions & 5 deletions core/engine/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ macro_rules! js_string {
$crate::string::JsString::from($s)
};
( $x:expr, $y:expr ) => {
$crate::string::JsString::concat($crate::string::JsStr::from($x), $crate::string::JsStr::from($y))
$crate::string::JsString::concat(&$crate::string::JsString::from($x), &$crate::string::JsString::from($y))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Recursively calling js_string here, has the benefit of creating a static string instead of two dynamic strings. (i.e. js_string!(s, ";"))

Suggested change
$crate::string::JsString::concat(&$crate::string::JsString::from($x), &$crate::string::JsString::from($y))
$crate::string::JsString::concat(&crate::js_string!($x), &crate::js_string!($y))

};
( $( $s:expr ),+ ) => {
$crate::string::JsString::concat_array(&[ $( $crate::string::JsStr::from($s) ),+ ])
$crate::string::JsString::concat_array_strings(&[ $( $crate::string::JsString::from($s) ),+ ])
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
$crate::string::JsString::concat_array_strings(&[ $( $crate::string::JsString::from($s) ),+ ])
$crate::string::JsString::concat_array_strings(&[ $( $crate::js_string!($s) ),+ ])

};
}

Expand Down Expand Up @@ -166,15 +166,15 @@ mod tests {
let x = js_string!("hello");
let z = js_string!("world");

let xy = js_string!(&x, &JsString::from(Y));
let xy = js_string!(x.clone(), JsString::from(Y));
assert_eq!(&xy, utf16!("hello, "));
assert_eq!(xy.refcount(), Some(1));

let xyz = js_string!(&xy, &z);
let xyz = js_string!(xy.clone(), z.clone());
assert_eq!(&xyz, utf16!("hello, world"));
assert_eq!(xyz.refcount(), Some(1));

let xyzw = js_string!(&xyz, &JsString::from(W));
let xyzw = js_string!(xyz.clone(), JsString::from(W));
assert_eq!(&xyzw, utf16!("hello, world!"));
assert_eq!(xyzw.refcount(), Some(1));
}
Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/vm/opcode/concat/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl ConcatToString {
let val = context.vm.get_register(value.into()).clone();
strings.push(val.to_string(context)?);
}
let s = JsString::concat_array(&strings.iter().map(JsString::as_str).collect::<Vec<_>>());
let s = JsString::concat_array_strings(&strings);
context.vm.set_register(string.into(), s.into());
Ok(())
}
Expand Down
18 changes: 18 additions & 0 deletions core/runtime/src/text/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,17 @@ pub(crate) mod utf8 {
}

pub(crate) mod utf16le {
use boa_engine::string::JsStr;
use boa_engine::{JsString, js_string};

#[allow(dead_code)]
pub(crate) fn encode(input: &JsString) -> Vec<u8> {
match input.as_str() {
JsStr::Latin1(l) => l.iter().flat_map(|c| [*c, 0]).collect(),
JsStr::Utf16(s) => bytemuck::cast_slice(s).to_vec(),
JsStr::Rope(_) => input.iter().flat_map(u16::to_le_bytes).collect(),
}
}
pub(crate) fn decode(mut input: &[u8], strip_bom: bool) -> JsString {
if strip_bom {
input = input.strip_prefix(&[0xFF, 0xFE]).unwrap_or(input);
Expand All @@ -48,8 +57,17 @@ pub(crate) mod utf16le {
}

pub(crate) mod utf16be {
use boa_engine::string::JsStr;
use boa_engine::{JsString, js_string};

#[allow(dead_code)]
pub(crate) fn encode(input: &JsString) -> Vec<u8> {
match input.as_str() {
JsStr::Latin1(l) => l.iter().flat_map(|c| [0, *c]).collect(),
JsStr::Utf16(s) => s.iter().flat_map(|b| b.to_be_bytes()).collect::<Vec<_>>(),
JsStr::Rope(_) => input.iter().flat_map(u16::to_be_bytes).collect(),
}
}
pub(crate) fn decode(mut input: Vec<u8>, strip_bom: bool) -> JsString {
if strip_bom && input.starts_with(&[0xFE, 0xFF]) {
input.drain(..2);
Expand Down
Loading
Loading