Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 14 additions & 21 deletions tools/grammar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,14 @@ pub enum ExpressionKind {
/// `// Single line comment.`
Comment(String),
/// ``[`A`-`Z` `_` LF]``
Charset(Vec<Characters>),
///
/// This should only contain expressions that are valid inside brackets
/// (`Terminal`, `Nt`, and `CharacterRange`).
Charset(Vec<Expression>),
/// `` `A`-`Z` `` used in a character set.
///
/// This should only appear inside a `Charset`.
CharacterRange(Character, Character),
/// ``~[` ` LF]``
NegExpression(Box<Expression>),
/// `^ A B C`
Expand All @@ -108,16 +115,6 @@ impl Display for RangeLimit {
}
}

#[derive(Clone, Debug)]
pub enum Characters {
/// `LF`
Named(String),
/// `` `_` ``
Terminal(String),
/// `` `A`-`Z` ``
Range(Character, Character),
}

#[derive(Clone, Debug)]
pub enum Character {
Char(char),
Expand Down Expand Up @@ -176,27 +173,23 @@ impl Expression {
| ExpressionKind::Cut(e) => {
e.visit_nt(callback);
}
ExpressionKind::Alt(es) | ExpressionKind::Sequence(es) => {
ExpressionKind::Alt(es)
| ExpressionKind::Sequence(es)
| ExpressionKind::Charset(es) => {
for e in es {
e.visit_nt(callback);
}
}

ExpressionKind::Nt(nt) => {
callback(&nt);
}
ExpressionKind::Terminal(_)
| ExpressionKind::Prose(_)
| ExpressionKind::Break(_)
| ExpressionKind::Comment(_)
| ExpressionKind::Unicode(_) => {}
ExpressionKind::Charset(set) => {
for ch in set {
match ch {
Characters::Named(s) => callback(s),
Characters::Terminal(_) | Characters::Range(_, _) => {}
}
}
}
| ExpressionKind::Unicode(_)
| ExpressionKind::CharacterRange(..) => {}
}
}

Expand Down
42 changes: 21 additions & 21 deletions tools/grammar/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! A parser of the ENBF-like grammar.

use super::{Character, Characters, Expression, ExpressionKind, Grammar, Production, RangeLimit};
use super::{Character, Expression, ExpressionKind, Grammar, Production, RangeLimit};
use std::fmt;
use std::fmt::Display;
use std::path::Path;
Expand Down Expand Up @@ -309,7 +309,7 @@ impl Parser<'_> {
let Some(ch) = self.parse_characters()? else {
break;
};
characters.push(ch);
characters.push(Expression::new_kind(ch));
}
if characters.is_empty() {
bail!(self, "expected at least one character in character group");
Expand All @@ -321,24 +321,24 @@ impl Parser<'_> {

/// Parse an element of a character class, e.g.
/// `` `a`-`b` `` | `` `term` `` | `` NonTerminal ``.
fn parse_characters(&mut self) -> Result<Option<Characters>> {
fn parse_characters(&mut self) -> Result<Option<ExpressionKind>> {
if let Some(a) = self.parse_character()? {
if self.take_str("-") {
let Some(b) = self.parse_character()? else {
bail!(self, "expected character in range");
};
Ok(Some(Characters::Range(a, b)))
Ok(Some(ExpressionKind::CharacterRange(a, b)))
} else {
//~^ Parse terminal in backticks.
let t = match a {
Character::Char(ch) => ch.to_string(),
Character::Unicode(_) => bail!(self, "unicode not supported"),
};
Ok(Some(Characters::Terminal(t)))
Ok(Some(ExpressionKind::Terminal(t)))
}
} else if let Some(name) = self.parse_name() {
//~^ Parse nonterminal identifier.
Ok(Some(Characters::Named(name)))
Ok(Some(ExpressionKind::Nt(name)))
} else {
Ok(None)
}
Expand Down Expand Up @@ -573,7 +573,7 @@ fn translate_position(input: &str, index: usize) -> (&str, usize, usize) {
#[cfg(test)]
mod tests {
use crate::parser::{parse_grammar, translate_position};
use crate::{Character, Characters, ExpressionKind, Grammar, RangeLimit};
use crate::{Character, ExpressionKind, Grammar, RangeLimit};
use std::path::Path;

#[test]
Expand Down Expand Up @@ -824,8 +824,8 @@ mod tests {
panic!("expected Charset inside lookahead, got {:?}", inner.kind);
};
assert_eq!(chars.len(), 2);
assert!(matches!(&chars[0], Characters::Terminal(t) if t == "e"));
assert!(matches!(&chars[1], Characters::Terminal(t) if t == "E"));
assert!(matches!(&chars[0].kind, ExpressionKind::Terminal(t) if t == "e"));
assert!(matches!(&chars[1].kind, ExpressionKind::Terminal(t) if t == "E"));
}

#[test]
Expand Down Expand Up @@ -977,7 +977,7 @@ mod tests {
panic!("expected Charset, got {:?}", rule.expression.kind);
};
assert_eq!(chars.len(), 1);
let Characters::Range(a, b) = &chars[0] else {
let ExpressionKind::CharacterRange(a, b) = &chars[0].kind else {
panic!("expected Range, got {:?}", chars[0]);
};
assert!(matches!(a, Character::Unicode((ch, _)) if *ch == '\0'));
Expand All @@ -996,7 +996,7 @@ mod tests {
panic!("expected Charset, got {:?}", rule.expression.kind);
};
assert_eq!(chars.len(), 1);
let Characters::Range(a, b) = &chars[0] else {
let ExpressionKind::CharacterRange(a, b) = &chars[0].kind else {
panic!("expected Range, got {:?}", chars[0]);
};
assert!(matches!(a, Character::Char(ch) if *ch == 'a'));
Expand All @@ -1012,7 +1012,7 @@ mod tests {
panic!("expected Charset, got {:?}", rule.expression.kind);
};
assert_eq!(chars.len(), 1);
let Characters::Range(a, b) = &chars[0] else {
let ExpressionKind::CharacterRange(a, b) = &chars[0].kind else {
panic!("expected Range, got {:?}", chars[0]);
};
assert!(matches!(a, Character::Char(ch) if *ch == 'a'));
Expand All @@ -1031,12 +1031,12 @@ mod tests {
panic!("expected Charset, got {:?}", rule.expression.kind);
};
assert_eq!(chars.len(), 2);
let Characters::Range(a1, b1) = &chars[0] else {
let ExpressionKind::CharacterRange(a1, b1) = &chars[0].kind else {
panic!("expected Range, got {:?}", chars[0]);
};
assert!(matches!(a1, Character::Unicode((ch, _)) if *ch == '\0'));
assert!(matches!(b1, Character::Unicode((ch, _)) if *ch == '\u{D7FF}'));
let Characters::Range(a2, b2) = &chars[1] else {
let ExpressionKind::CharacterRange(a2, b2) = &chars[1].kind else {
panic!("expected Range, got {:?}", chars[1]);
};
assert!(matches!(a2, Character::Unicode((ch, _)) if *ch == '\u{E000}'));
Expand All @@ -1052,9 +1052,9 @@ mod tests {
panic!("expected Charset, got {:?}", rule.expression.kind);
};
assert_eq!(chars.len(), 3);
assert!(matches!(&chars[0], Characters::Terminal(t) if t == "a"));
assert!(matches!(&chars[1], Characters::Terminal(t) if t == "b"));
assert!(matches!(&chars[2], Characters::Named(n) if n == "Foo"));
assert!(matches!(&chars[0].kind, ExpressionKind::Terminal(t) if t == "a"));
assert!(matches!(&chars[1].kind, ExpressionKind::Terminal(t) if t == "b"));
assert!(matches!(&chars[2].kind, ExpressionKind::Nt(n) if n == "Foo"));
}

// --- Negative lookahead combined with charset ---
Expand All @@ -1076,9 +1076,9 @@ mod tests {
panic!("expected Charset, got {:?}", inner.kind);
};
assert_eq!(chars.len(), 3);
assert!(matches!(&chars[0], Characters::Terminal(t) if t == "x"));
assert!(matches!(&chars[1], Characters::Terminal(t) if t == "y"));
assert!(matches!(&chars[2], Characters::Named(n) if n == "LF"));
assert!(matches!(&chars[0].kind, ExpressionKind::Terminal(t) if t == "x"));
assert!(matches!(&chars[1].kind, ExpressionKind::Terminal(t) if t == "y"));
assert!(matches!(&chars[2].kind, ExpressionKind::Nt(n) if n == "LF"));
}

// --- Negative lookahead combined with Unicode ---
Expand All @@ -1098,7 +1098,7 @@ mod tests {
panic!("expected Charset, got {:?}", inner.kind);
};
assert_eq!(chars.len(), 1);
let Characters::Range(a, b) = &chars[0] else {
let ExpressionKind::CharacterRange(a, b) = &chars[0].kind else {
panic!("expected Range, got {:?}", chars[0]);
};
assert!(matches!(a, Character::Unicode((ch, _)) if *ch == '\0'));
Expand Down
81 changes: 35 additions & 46 deletions tools/mdbook-spec/src/grammar/render_markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use super::RenderCtx;
use crate::grammar::Grammar;
use anyhow::bail;
use grammar::{Character, Characters, Expression, ExpressionKind, Production};
use grammar::{Character, Expression, ExpressionKind, Production};
use regex::Regex;
use std::borrow::Cow;
use std::fmt::Write;
Expand Down Expand Up @@ -79,6 +79,7 @@ fn last_expr(expr: &Expression) -> &ExpressionKind {
| ExpressionKind::Break(_)
| ExpressionKind::Comment(_)
| ExpressionKind::Charset(_)
| ExpressionKind::CharacterRange(..)
| ExpressionKind::NegExpression(_)
| ExpressionKind::Cut(_)
| ExpressionKind::Unicode(_) => &expr.kind,
Expand Down Expand Up @@ -178,6 +179,20 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) {
write!(output, "<span class=\"grammar-comment\">// {s}</span>").unwrap();
}
ExpressionKind::Charset(set) => charset_render_markdown(cx, set, output),
ExpressionKind::CharacterRange(start, end) => {
let write_ch = |ch: &Character, output: &mut String| match ch {
Character::Char(ch) => write!(
output,
"<span class=\"grammar-literal\">{}</span>",
markdown_escape(&ch.to_string())
)
.unwrap(),
Character::Unicode((_, s)) => write!(output, "U+{s}").unwrap(),
};
write_ch(start, output);
output.push('-');
write_ch(end, output);
}
ExpressionKind::NegExpression(e) => {
output.push('~');
render_expression(e, cx, output);
Expand All @@ -203,47 +218,18 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) {
}
}

fn charset_render_markdown(cx: &RenderCtx, set: &[Characters], output: &mut String) {
fn charset_render_markdown(cx: &RenderCtx, set: &[Expression], output: &mut String) {
output.push_str("\\[");
let mut iter = set.iter().peekable();
while let Some(chars) = iter.next() {
render_characters(chars, cx, output);
while let Some(expr) = iter.next() {
render_expression(expr, cx, output);
if iter.peek().is_some() {
output.push(' ');
}
}
output.push(']');
}

fn render_characters(chars: &Characters, cx: &RenderCtx, output: &mut String) {
match chars {
Characters::Named(s) => {
let dest = cx.md_link_map.get(s).map_or("missing", |d| d.as_str());
write!(output, "[{s}]({dest})").unwrap();
}
Characters::Terminal(s) => write!(
output,
"<span class=\"grammar-literal\">{}</span>",
markdown_escape(s)
)
.unwrap(),
Characters::Range(a, b) => {
let write_ch = |ch: &Character, output: &mut String| match ch {
Character::Char(ch) => write!(
output,
"<span class=\"grammar-literal\">{}</span>",
markdown_escape(&ch.to_string())
)
.unwrap(),
Character::Unicode((_, s)) => write!(output, "U+{s}").unwrap(),
};
write_ch(a, output);
output.push('-');
write_ch(b, output);
}
}
}

/// Escapes characters that markdown would otherwise interpret.
fn markdown_escape(s: &str) -> Cow<'_, str> {
static ESC_RE: LazyLock<Regex> =
Expand Down Expand Up @@ -304,8 +290,8 @@ mod tests {
fn lookahead_charset() {
let result = render(ExpressionKind::NegativeLookahead(Box::new(
Expression::new_kind(ExpressionKind::Charset(vec![
Characters::Terminal("e".to_string()),
Characters::Terminal("E".to_string()),
Expression::new_kind(ExpressionKind::Terminal("e".to_string())),
Expression::new_kind(ExpressionKind::Terminal("E".to_string())),
])),
)));
assert!(result.starts_with("!"), "should start with `!`");
Expand Down Expand Up @@ -351,9 +337,11 @@ mod tests {

#[test]
fn charset_unicode_range() {
let result = render(ExpressionKind::Charset(vec![Characters::Range(
Character::Unicode(('\0', "0000".to_string())),
Character::Unicode(('\u{007F}', "007F".to_string())),
let result = render(ExpressionKind::Charset(vec![Expression::new_kind(
ExpressionKind::CharacterRange(
Character::Unicode(('\0', "0000".to_string())),
Character::Unicode(('\u{007F}', "007F".to_string())),
),
)]));
assert!(result.contains("\\["));
assert!(result.contains("U+0000"));
Expand All @@ -363,9 +351,8 @@ mod tests {

#[test]
fn charset_char_range() {
let result = render(ExpressionKind::Charset(vec![Characters::Range(
Character::Char('a'),
Character::Char('z'),
let result = render(ExpressionKind::Charset(vec![Expression::new_kind(
ExpressionKind::CharacterRange(Character::Char('a'), Character::Char('z')),
)]));
assert!(result.contains("\\["));
assert!(result.contains("grammar-literal"));
Expand All @@ -375,9 +362,11 @@ mod tests {
#[test]
fn charset_mixed_range() {
// [`a`-U+007A]
let result = render(ExpressionKind::Charset(vec![Characters::Range(
Character::Char('a'),
Character::Unicode(('\u{007A}', "007A".to_string())),
let result = render(ExpressionKind::Charset(vec![Expression::new_kind(
ExpressionKind::CharacterRange(
Character::Char('a'),
Character::Unicode(('\u{007A}', "007A".to_string())),
),
)]));
assert!(result.contains("grammar-literal"));
assert!(result.contains("U+007A"));
Expand All @@ -400,8 +389,8 @@ mod tests {
#[test]
fn neg_expression_rendering() {
let result = render(ExpressionKind::NegExpression(Box::new(
Expression::new_kind(ExpressionKind::Charset(vec![Characters::Terminal(
"a".to_string(),
Expression::new_kind(ExpressionKind::Charset(vec![Expression::new_kind(
ExpressionKind::Terminal("a".to_string()),
)])),
)));
assert!(
Expand Down
Loading