diff --git a/.gitignore b/.gitignore index e7ad679..e909882 100644 --- a/.gitignore +++ b/.gitignore @@ -8,13 +8,20 @@ # sbt specific .cache .history -.lib/ -dist/* -target/ -lib_managed/ -src_managed/ -project/boot/ -project/plugins/project/ - +.lib +dist +target +lib_managed +src_managed +project +.scala-build +# Tooling +.project +.metals +.bsp +.bloop +.vagrant +# Testing +test.png diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 0000000..54f7147 --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,8 @@ +version = "3.7.15" +runner.dialect = scala3 + +assumeStandardLibraryStripMargin = true +align.stripMargin = true + +binPack.unsafeCallSite = true +binPack.literalArgumentLists = true \ No newline at end of file diff --git a/Vagrantfile b/Vagrantfile index 293a4da..b4805af 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -15,22 +15,21 @@ Vagrant.configure(2) do |config| config.vm.provision "shell", inline: <<-SHELL - # Refresh sources + # Refresh system sudo apt-get update -y + sudo apt-get upgrade -y # Graphviz for printing parsers to graphs sudo apt-get install -y graphviz - # Java - sudo apt-get install -y openjdk-7-jdk - # Sbt - sudo mkdir -p /home/vagrant/bin - pushd /home/vagrant/bin/ - sudo wget https://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/0.13.8/sbt-launch.jar - sudo cp /home/vagrant/configs/sbt.sh /home/vagrant/bin/sbt - sudo chmod u+x /home/vagrant/bin/sbt - sudo chmod +x /home/vagrant/bin/sbt + sudo apt-get install -y apt-transport-https curl gnupg + echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list + echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list + curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo -H gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import + sudo chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg + sudo apt-get update -y + sudo apt-get install -y sbt popd SHELL diff --git a/artifact/build.sbt b/artifact/build.sbt index 157aac1..5d5e47f 100644 --- a/artifact/build.sbt +++ b/artifact/build.sbt @@ -1,21 +1,20 @@ name := "first-class-derivatives" -version := "1.0" +version := "2.0.0" -scalaVersion := "2.11.7" +scalaVersion := "3.7.4" scalacOptions ++= Seq("-feature", "-deprecation") -libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.4" % "test" +libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.19" % "test" -resolvers += "Sonatype OSS Snapshots" at - "https://oss.sonatype.org/content/repositories/releases" +resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/releases" -parallelExecution in Test := true +Test / parallelExecution := true -libraryDependencies += "org.scala-lang.modules" % "scala-xml_2.11" % "1.0.5" +libraryDependencies += "org.scala-lang.modules" %% "scala-xml" % "2.4.0" -initialCommands in console := """import fcd._; import fcd.DerivativeParsers._""" +console / initialCommands := """import fcd._; import fcd.DerivativeParsers._""" // For VM users on windows systems, please uncomment the following line: // target := file("/home/vagrant/target/") diff --git a/artifact/project/build.properties b/artifact/project/build.properties deleted file mode 100644 index 19623ba..0000000 --- a/artifact/project/build.properties +++ /dev/null @@ -1 +0,0 @@ -sbt.version = 0.13.8 diff --git a/artifact/src/main/scala/examples/PythonAst.scala b/artifact/src/main/scala/examples/PythonAst.scala index c8779d9..7bcb6c9 100644 --- a/artifact/src/main/scala/examples/PythonAst.scala +++ b/artifact/src/main/scala/examples/PythonAst.scala @@ -9,25 +9,24 @@ trait PythonAst { case class Decorated(decorators: Seq[Decorator], el: Any) extends Tree trait Def extends Tree - case class FuncDef(name: Any, params: Any, retAnnot: Option[Any], body: Any) extends Def - - trait Stmt extends Tree - case class Simple(small: Seq[Any]) extends Stmt - - case class Del(exprs: Seq[Any]) extends Stmt - case object Pass extends Stmt - case object Break extends Stmt - case object Continue extends Stmt - case class Return(expr: Option[Any]) extends Stmt - case class Raise(expr: Option[Any]) extends Stmt - case class ExprStmt(expr: Any) extends Stmt - case class Import(names: Any, from: Option[Any] = None) extends Stmt - - case class Global(ids: Seq[Any]) extends Stmt - case class Nonlocal(ids: Seq[Any]) extends Stmt - case class Assert(tests: Seq[Any]) extends Stmt - - case class For(exprs: Seq[Any], in: Any, body: Any, default: Any) extends Stmt + case class FuncDef(name: Any, params: Any, retAnnot: Option[Any], body: Any) + extends Def + + enum Stmt extends Tree { + case Simple(small: Seq[Any]) + case Del(exprs: Seq[Any]) + case Pass + case Break + case Continue + case Return(expr: Option[Any]) + case Raise(expr: Option[Any]) + case ExprStmt(expr: Any) + case Import(names: Any, from: Option[Any] = None) + case Global(ids: Seq[Any]) + case Nonlocal(ids: Seq[Any]) + case Assert(tests: Seq[Any]) + case For(exprs: Seq[Any], in: Any, body: Any, default: Any) + } trait Expr extends Tree case class BinOp(l: Any, op: Any, r: Any) extends Expr diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 46762b2..2889a5c 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -2,128 +2,133 @@ package fcd import scala.language.implicitConversions -/** - * Additional Case Study: Python Parser - * ==================================== - * This file contains an additional python parser implementation to support - * the claims in our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * Submitted to OOPSLA 2016. - * - * The parser is implemented on top of a very simple lexer. The lexer is - * completely indentation unaware and for instance should lex: - * - * while␣(True):\n - * ␣␣a␣*=␣a\n - * - * as - * - * KW("while"), WS, Punct("("), KW("True"), Punct(")"), Punct(":"), NL, - * WS, WS, Id("a"), Punct("*="), WS, Id("a"), NL - * - * Multiline strings should be lexed as instance of Str, with `value` including - * all of the spaces and newlines that appear in the multiline string. - * - * Python programs are then parsed with the parser `preprocess(file_input)`, - * where `preprocess` in turn is a parser combinator composed of the following - * three separately defined "stream preprocessing" parser combinators: - * - * 1. stripComments Removes all comment lexemes from the stream - * 2. explicitJoin Implements explicit line joining by dropping all - * NL tokens that are preceded by a Punct("\\") - * 3. implicitJoin Implements implicit line joining by dropping all - * NL tokens that occur inside pairs of parenthesis. - * - * Interestingly, `implicitJoin` itself is defined from components in the - * following way: - * - * 1. The Dyck language of balanced parenthesis is defined (`dyck`) - * 2. The input to `dyck` is transformed to filter out all non-parenthesis - * tokens (`extDyck`) - * 3. implicitJoin now delegates *all* tokens while it awaits an opening - * parenthesis. After seeing such opening parenthesis it filters out - * NL when delegating until `extDyck` is successful and thus all pairs of - * parens are closed. - * - * Indentation senstivity itself is handled in nonterminal `suite` the way it is - * described in the paper. - * - * The python grammar itself is a straightforward translation of: - * https://docs.python.org/3.5/reference/grammar.html - */ -trait PythonLexemes { self: Parsers with DerivedOps with Syntax => - - trait Lexeme - case class Str(value: String) extends Lexeme - case class Num(value: String) extends Lexeme - case class KW(name: String) extends Lexeme - case class Id(name: String) extends Lexeme - // Punctuation - case class Punct(sym: String) extends Lexeme - case object NL extends Lexeme - case object WS extends Lexeme // whitespace - case class Comment(content: String) extends Lexeme - case object EOS extends Lexeme +/** Additional Case Study: Python Parser + * + * This file contains an additional python parser implementation to support the + * claims in our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives + * Submitted to OOPSLA 2016. + * + * The parser is implemented on top of a very simple lexer. The lexer is + * completely indentation unaware and for instance should lex: + * + * while␣(True):\n ␣␣a␣*=␣a\n + * + * as + * + * KW("while"), WS, Punct("("), KW("True"), Punct(")"), Punct(":"), NL, WS, WS, + * Id("a"), Punct("*="), WS, Id("a"), NL + * + * Multiline strings should be lexed as instance of Str, with `value` including + * all of the spaces and newlines that appear in the multiline string. + * + * Python programs are then parsed with the parser `preprocess(file_input)`, + * where `preprocess` in turn is a parser combinator composed of the following + * three separately defined "stream preprocessing" parser combinators: + * + * 1. stripComments Removes all comment lexemes from the stream 2. + * explicitJoin Implements explicit line joining by dropping all NL tokens + * that are preceded by a Punct("\\") 3. implicitJoin Implements implicit + * line joining by dropping all NL tokens that occur inside pairs of + * parenthesis. + * + * Interestingly, `implicitJoin` itself is defined from components in the + * following way: + * + * 1. The Dyck language of balanced parenthesis is defined (`dyck`) 2. The + * input to `dyck` is transformed to filter out all non-parenthesis tokens + * (`extDyck`) 3. implicitJoin now delegates *all* tokens while it awaits + * an opening parenthesis. After seeing such opening parenthesis it + * filters out NL when delegating until `extDyck` is successful and thus + * all pairs of parens are closed. + * + * Indentation senstivity itself is handled in nonterminal `suite` the way it + * is described in the paper. + * + * The python grammar itself is a straightforward translation of: + * https://docs.python.org/3.5/reference/grammar.html + */ +trait PythonLexemes { self: Parsers & DerivedOps & Syntax => + enum Lexeme { + case Str(value: String) + case Num(value: String) + case KW(name: String) + case Id(name: String) + case Punct(sym: String) + case Comment(content: String) + case NL + case WS + case EOS + } + + import Lexeme._ type Elem = Lexeme - implicit def lex(lex: Elem): Parser[Elem] = accept(lex) - implicit def kw(kw: Symbol): Parser[Elem] = accept(KW(kw.name)) - implicit def punct(p: String): Parser[Elem] = accept(Punct(p)) + given lex: Conversion[Elem, Parser[Elem]] = accept(_) + given kw: Conversion[String, Parser[Elem]] = kw => accept(KW(kw)) + given punct: Conversion[Char, Parser[Elem]] = p => accept(Punct(p.toString)) - lazy val string: Parser[Str] = any flatMap { + lazy val string = any >> { case s: Str => succeed(s) - case _ => fail + case _ => fail } - lazy val number: Parser[Num] = any flatMap { + lazy val number = any >> { case n: Num => succeed(n) - case _ => fail + case _ => fail } - lazy val id: Parser[Id] = any flatMap { + lazy val id = any >> { case id: Id => succeed(id) - case _ => fail + case _ => fail } - lazy val comment: Parser[Comment] = any flatMap { + lazy val comment = any >> { case c: Comment => succeed(c) - case _ => fail + case _ => fail } def isComment: Lexeme => Boolean = _.isInstanceOf[Comment] def isNL: Lexeme => Boolean = _ == NL } -trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Syntax with DerivedOps => +trait PythonParsers extends PythonLexemes, PythonAst { + self: Parsers & Syntax & DerivedOps => + + import Stmt._ // general toolbox def no(els: Elem*): Parser[Elem] = acceptIf(el => !(els contains el)) - def no(els: Iterable[Elem]): Parser[Elem] = no(els.toSeq : _*) - def switch[T](p: Elem => Boolean, thn: Elem => Parser[T], els: Elem => Parser[T]): Parser[T] = - eat { c => if (p(c)) thn(c) else els(c) } + def no(els: Iterable[Elem]): Parser[Elem] = no(els.toSeq*) + def switch[T]( + p: Elem => Boolean, + thn: Elem => Parser[T], + els: Elem => Parser[T] + ) = eat { c => if (p(c)) thn(c) else els(c) } // Simply preprocesses the input stream and strips out comments - def stripComments[T]: Parser[T] => Parser[T] = { p => + def stripComments[T](p: Parser[T]): Parser[T] = { lazy val stripped: Parser[T] = done(p) | switch(isComment, _ => stripped, c => stripComments(p << c)) stripped } + import Lexeme._ + val pairs = Map[Elem, Elem]( Punct("(") -> Punct(")"), Punct("[") -> Punct("]"), - Punct("{") -> Punct("}")) + Punct("{") -> Punct("}") + ) val (opening, closing) = (pairs.keys, pairs.values) - def enclosed[T]: (=> Parser[T]) => Parser[T] = - p => oneOf(opening) >> { o => p <~ pairs(o) } + def enclosed[T](p: => Parser[T]) = oneOf(opening) >> { o => p <~ pairs(o) } // non empty Dyck language on these pairs lazy val dyck: Parser[Any] = enclosed(many(dyck)) // the repetition of enclosed is unfortunate - lazy val extDyck: Parser[Any] = enclosed(always) &> + lazy val extDyck = enclosed(always) &> filter((opening ++ closing).toSeq contains _)(dyck) // From the python reference manual: @@ -132,9 +137,8 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // over more than one physical line without using backslashes. // [...] Implicitly continued lines can carry comments. def implicitJoin[T]: Parser[T] => Parser[T] = repeat[T] { p => - ( extDyck &> filter(_ != NL)(delegate(p)) - | noneOf(opening ++ closing) &> delegate(p) - ) + (extDyck &> filter(_ != NL)(delegate(p)) + | noneOf(opening ++ closing) &> delegate(p)) } // Strips out newlines if they are preceeded by a backslash punctuation @@ -147,21 +151,21 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // backslash that is not part of a string literal or comment, it is joined // with the following forming a single logical line, deleting the backslash // and the following end-of-line character. - def explicitJoin[T]: Parser[T] => Parser[T] = p => { + def explicitJoin[T](p: Parser[T]): Parser[T] = { lazy val join: NT[T] = - done(p) | switch(_ == Punct("\\"), - bs => switch(_ == NL, - _ => join, - c => explicitJoin(p << bs << c)), - c => explicitJoin(p << c)) + done(p) | switch( + _ == Punct("\\"), + bs => switch(_ == NL, _ => join, c => explicitJoin(p << bs << c)), + c => explicitJoin(p << c) + ) join } - val line = many(no(NL)) ~ NL + val line = many(no(NL)) ~ NL val emptyLine = many(WS) ~ NL - def indentBy[T](indentation: Parser[Any]): Parser[T] => Parser[T] = repeat[T] { p => + def indentBy[T](indentation: Parser[Any]) = repeat[T] { p => // here we use (locally) biased choice to prevent ambiguities - biasedAlt ( + biasedAlt( // pass empty lines as NL to p emptyLine ^^ { _ => p << NL }, // first consume `n` spaces, then delegate to p @@ -169,33 +173,35 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy ) } - def indented[T](p: Parser[T]): Parser[T] = - consumed(some(WS)) >> { i => not(prefix(WS)) &> indentBy(acceptSeq(i))(p) <<< i } - - def preprocess[T] = stripComments[T] compose explicitJoin[T] compose implicitJoin[T] + def indented[T](p: Parser[T]) = + consumed(some(WS)) >> { i => + not(prefix(WS)) &> indentBy(acceptSeq(i))(p) <<< i + } + def preprocess[T] = + stripComments[T] compose explicitJoin[T] compose implicitJoin[T] - def binOp[T, S](p: Parser[T], op: Parser[S], f: (T, S, T) => T): Parser[T] = { - lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case l ~ op ~ r => f(l, op, r) } | p) + def binOp[T, S](p: Parser[T], op: Parser[S], f: (T, S, T) => T) = { + lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case ((l, op), r) => + f(l, op, r) + } | p) ps } // --- Space Helpers --- - lazy val whitespace = WS - lazy val linebreak = NL - lazy val space = whitespace | linebreak + val whitespace = WS + val linebreak = NL + val space = alt(whitespace, linebreak) lazy val spaces = many(whitespace) - implicit class SpaceHelpers[T, P <% Parser[T]](p: P) { - def ␣[U](q: => Parser[U]): Parser[T ~ U] = - p ~ (spaces ~> q) - def <␣[U](q: => Parser[U]): Parser[T] = - p <~ (spaces ~ q) - def ␣>[U](q: => Parser[U]): Parser[U] = - p ~> (spaces ~> q) + extension [T](p: Parser[T]) { + def ␣[U](q: => Parser[U]) = p ~ (spaces ~> q) + def <␣[U](q: => Parser[U]) = p <~ (spaces ~ q) + def ␣>[U](q: => Parser[U]) = p ~> (spaces ~> q) } - def listOf[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = + + def listOf[T](p: Parser[T], sep: Parser[Any]) = someSep(p, spaces ~ sep ~ spaces) <~ opt(spaces ~ sep) def optList[T](p: Parser[List[T]]) = p | succeed(Nil) @@ -205,201 +211,210 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // --- Python Grammar --- // see: https://docs.python.org/3.5/reference/grammar.html - lazy val file_input: NT[Program] = emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ Program + lazy val file_input: NT[Program] = + emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ Program.apply - lazy val decorator: Parser[Decorator] = - "@" ~> dotted_name ~ ("(" ~> optArgs <~ ")" | succeed(Nil)) <~ NL ^^ Decorator + lazy val decorator: Parser[Decorator] = + '@' ~> dotted_name ~ ('(' ~> optArgs <~ ')' | succeed( + Nil + )) <~ NL ^^ Decorator.apply lazy val decorators: Parser[List[Decorator]] = some(decorator) - lazy val decorated: Parser[Decorated] = - decorators ~ (classdef | funcdef | async_funcdef) ^^ Decorated - + lazy val decorated: Parser[Decorated] = + decorators ~ (classdef | funcdef | async_funcdef) ^^ Decorated.apply // --- Functions --- - lazy val async_funcdef: Parser[FuncDef] = 'async ␣> funcdef + lazy val async_funcdef: Parser[FuncDef] = "async" ␣> funcdef lazy val funcdef: Parser[FuncDef] = - 'def ␣> (id ␣ parameters ~ spacedOpt("->" ␣> test)) ␣ (":" ␣> suite) ^^ FuncDef - - lazy val parameters: Parser[Any] = "(" ~> spacedOpt(typedargslist) <␣ ")" + "def" ␣> (id ␣ parameters ~ spacedOpt( + Punct("->") ␣> test + )) ␣ (':' ␣> suite) ^^ FuncDef.apply + lazy val parameters = '(' ~> spacedOpt(typedargslist) <␣ ')' // ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef] def fpdef(p: Parser[Any]): Parser[Any] = - ( "*" ~ spacedOpt(p) - ~ spacedOpt("," ␣> testdefs(p)) - ~ spacedOpt("," ␣> ("**" ␣ p)) - | "**" ␣ p - ) - def testdefs(p: Parser[Any]): Parser[Any] = someSep(p ~ spacedOpt("=" ␣> test), ",") + ('*' ~ spacedOpt(p) + ~ spacedOpt(',' ␣> testdefs(p)) + ~ spacedOpt(',' ␣> (Punct("**") ␣ p)) + | Punct("**") ␣ p) + def testdefs(p: Parser[Any]) = someSep(p ~ spacedOpt('=' ␣> test), ',') - lazy val typedargslist: Parser[Any] = - testdefs(tfpdef) ~ spacedOpt("," ␣> fpdef(tfpdef)) | fpdef(tfpdef) + lazy val typedargslist = + testdefs(tfpdef) ~ spacedOpt(',' ␣> fpdef(tfpdef)) | fpdef(tfpdef) - lazy val varargslist: Parser[Any] = - testdefs(vfpdef) ~ spacedOpt("," ␣> fpdef(vfpdef)) | fpdef(vfpdef) + lazy val varargslist = + testdefs(vfpdef) ~ spacedOpt(',' ␣> fpdef(vfpdef)) | fpdef(vfpdef) - lazy val tfpdef: Parser[Any] = id ~ spacedOpt(":" ␣> test) - lazy val vfpdef: Parser[Any] = id + lazy val tfpdef = id ~ spacedOpt(':' ␣> test) + lazy val vfpdef = id // --- Statements --- - lazy val stmt: NT[Any] = simple_stmt | compound_stmt - lazy val simple_stmt: Parser[Any] = listOf(small_stmt, ";") <␣ NL ^^ Simple - lazy val small_stmt: Parser[Any] = - ( expr_stmt | del_stmt - | pass_stmt | flow_stmt | import_stmt - | global_stmt | nonlocal_stmt | assert_stmt - ) - - lazy val expr_stmt: Parser[Any] = - ( testlist_star_expr - | testlist_star_expr ␣ augassign ␣ ( yield_expr | testlist ) - | testlist_star_expr ~ some(spaces ~> "=" ␣> ( yield_expr | testlist_star_expr )) - ) ^^ ExprStmt - - lazy val testlist_star_expr: Parser[Any] = listOf(test | star_expr, ",") - - lazy val augassign: Parser[Any] = ( "+=" | "-=" | "*=" | "@=" | "/=" | "%=" - | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" - | "//=" - ) - lazy val del_stmt: Parser[Stmt] = 'del ␣> exprlist ^^ Del - lazy val pass_stmt: Parser[Stmt] = 'pass ^^^ Pass - lazy val flow_stmt: Parser[Stmt] = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt - lazy val break_stmt: Parser[Stmt] = 'break ^^^ Break - lazy val continue_stmt: Parser[Stmt] = 'continue ^^^ Continue - lazy val return_stmt: Parser[Stmt] = 'return ~> spacedOpt(testlist) ^^ Return - lazy val yield_stmt: Parser[Stmt] = yield_expr ^^ ExprStmt - lazy val raise_stmt: Parser[Stmt] = 'raise ~> spacedOpt(test ~ spacedOpt('from ␣ test)) ^^ Raise - lazy val import_stmt: Parser[Any] = import_name | import_from - lazy val import_name: Parser[Any] = 'import ␣> dotted_as_names ^^ { n => Import(n) } + lazy val stmt: NT[Any] = simple_stmt | compound_stmt + lazy val simple_stmt = + listOf(small_stmt, ';') <␣ NL ^^ Simple.apply + lazy val small_stmt = + (expr_stmt | del_stmt + | pass_stmt | flow_stmt | import_stmt + | global_stmt | nonlocal_stmt | assert_stmt) + + lazy val expr_stmt = + (testlist_star_expr + | testlist_star_expr ␣ augassign ␣ (yield_expr | testlist) + | testlist_star_expr ~ some( + spaces ~> '=' ␣> (yield_expr | testlist_star_expr) + )) ^^ ExprStmt.apply + + lazy val testlist_star_expr = listOf(test | star_expr, ',') + + lazy val augassign = (Punct("+=") | Punct("-=") | Punct("*=") | Punct( + "@=") | Punct("/=") | Punct("%=") + | Punct("&=") | Punct("|=") | Punct("^=") | Punct("<<=") | Punct( + ">>=") | Punct("**=") | Punct("//=")) + lazy val del_stmt = "del" ␣> exprlist ^^ Del.apply + lazy val pass_stmt = "pass" ^^^ Pass + lazy val flow_stmt = + break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt + lazy val break_stmt = "break" ^^^ Break + lazy val continue_stmt = "continue" ^^^ Continue + lazy val return_stmt = "return" ~> spacedOpt(testlist) ^^ Return.apply + lazy val yield_stmt = yield_expr ^^ ExprStmt.apply + lazy val raise_stmt = + "raise" ~> spacedOpt(test ~ spacedOpt("from" ␣ test)) ^^ Raise.apply + lazy val import_stmt = import_name | import_from + lazy val import_name = "import" ␣> dotted_as_names ^^ { Import(_) } // # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS - lazy val import_from: Parser[Any] = - ('from ~> (spacedMany("." | "...") ~ dotted_name | some("." | "...")) ␣ - 'import ␣> ("*" | "(" ␣> import_as_names <␣ ")" | import_as_names)) ^^ { - case (from, names) => Import(names, Some(from)) + lazy val import_from = + ("from" ~> (spacedMany('.' | Punct("...")) ~ dotted_name | some( + '.' | Punct("...") + )) ␣ + "import" + ␣> ('*' | '(' ␣> import_as_names <␣ ')' | import_as_names)) ^^ { + case (from, names) => Import(names, Some(from)) } - lazy val import_as_name: Parser[Any] = id ~ spacedOpt('as ␣ id) - lazy val dotted_as_name: Parser[Any] = dotted_name ~ spacedOpt('as ␣ id) - lazy val import_as_names: Parser[Any] = listOf(test | import_as_name, ",") - lazy val dotted_as_names: Parser[Any] = someSep(dotted_as_name, ",") - lazy val dotted_name: Parser[Any] = someSep(id, ".") - - lazy val global_stmt: Parser[Any] = 'global ␣> someSep(id, ",") ^^ Global - lazy val nonlocal_stmt: Parser[Any] = 'nonlocal ␣> someSep(id, ",") ^^ Nonlocal - lazy val assert_stmt: Parser[Any] = 'assert ␣> someSep(test, ",") ^^ Assert + lazy val import_as_name = id ~ spacedOpt("as" ␣ id) + lazy val dotted_as_name = dotted_name ~ spacedOpt("as" ␣ id) + lazy val import_as_names = listOf(test | import_as_name, ',') + lazy val dotted_as_names = someSep(dotted_as_name, ',') + lazy val dotted_name = someSep(id, '.') + lazy val global_stmt = "global" ␣> someSep(id, ',') ^^ Global.apply + lazy val nonlocal_stmt = "nonlocal" ␣> someSep(id, ',') ^^ Nonlocal.apply + lazy val assert_stmt = "assert" ␣> someSep(test, ',') ^^ Assert.apply - lazy val compound_stmt: Parser[Any] = + lazy val compound_stmt = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt - lazy val async_stmt: Parser[Any] = 'async ␣> (funcdef | with_stmt | for_stmt) - lazy val if_stmt: Parser[Any] = - 'if ␣> test ␣ (":" ␣> suite ~ - spacedMany('elif ␣> test ␣ (":" ␣> suite)) ~ - spacedOpt(('else ␣ ":") ␣> suite)) - lazy val while_stmt: Parser[Any] = - 'while ␣> test ␣ (":" ␣> suite ~ spacedOpt(('else ␣ ":") ␣> suite)) - lazy val for_stmt: Parser[Any] = - 'for ␣> exprlist ␣ ('in ␣> testlist ␣ (":" ␣> suite ~ spacedOpt(('else ␣> ":") ␣> suite))) ^^ { - case (exprs ~ (tests ~ (body ~ default))) => For(exprs, tests, body, default) + lazy val async_stmt = + "async" ␣> (funcdef | with_stmt | for_stmt) + lazy val if_stmt = + "if" ␣> test ␣ (':' ␣> suite ~ + spacedMany("elif" ␣> test ␣ (':' ␣> suite)) ~ + spacedOpt(("else" ␣ ':') ␣> suite)) + lazy val while_stmt = + "while" ␣> test ␣ (':' ␣> suite ~ spacedOpt(("else" ␣ ':') ␣> suite)) + lazy val for_stmt = + "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (':' ␣> suite ~ spacedOpt( + ("else" ␣> ':') ␣> suite + ))) ^^ { case (exprs, (tests, (body, default))) => + For(exprs, tests, body, default) } - lazy val try_stmt: Parser[Any] = - ('try ␣ ":") ␣> suite ␣ (some(except_clause ␣ (":" ␣> suite)) ~ - spacedOpt(('else ␣ ":") ␣> suite) ~ - spacedOpt(('finally ␣ ":") ␣> suite) - | (('finally ␣ ":") ␣> suite) - ) - lazy val with_stmt: Parser[Any] = 'with ␣> someSep(with_item, ",") ␣ (":" ␣> suite) - lazy val with_item: Parser[Any] = test ~ spacedOpt('as ␣> expr) + lazy val try_stmt = + ("try" ␣ ':') ␣> suite ␣ (some(except_clause ␣ (':' ␣> suite)) ~ + spacedOpt(("else" ␣ ':') ␣> suite) ~ + spacedOpt(("finally" ␣ ':') ␣> suite) + | (("finally" ␣ ':') ␣> suite)) + lazy val with_stmt = "with" ␣> someSep(with_item, ',') ␣ (':' ␣> suite) + lazy val with_item = test ~ spacedOpt("as" ␣> expr) // # NB compile.c makes sure that the default except clause is last - lazy val except_clause: Parser[Any] = 'except ~> spacedOpt(test ␣ opt('as ␣> id)) - + lazy val except_clause = "except" ~> spacedOpt(test ␣ opt("as" ␣> id)) // INDENTATION // changed to also allow empty lines - lazy val suite: Parser[Any] = simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) + lazy val suite = simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) // --- Expressions --- - lazy val test: NT[Any] = ( or_test ~ spacedOpt('if ␣> or_test ␣ ('else ␣> test)) - | lambdef - ) - lazy val test_nocond: NT[Any] = or_test | lambdef_nocond - lazy val lambdef: NT[Any] = 'lambda ~> spacedOpt(varargslist) ␣ (":" ␣> test) - lazy val lambdef_nocond: NT[Any] = 'lambda ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) - lazy val or_test: NT[Any] = someSep(and_test, 'or) - lazy val and_test: NT[Any] = someSep(not_test, 'and) - lazy val not_test: NT[Any] = 'not ␣> not_test | comparison - lazy val comparison: NT[Any] = someSep(expr, comp_op) + lazy val test: NT[Any] = + (or_test ~ spacedOpt("if" ␣> or_test ␣ ("else" ␣> test)) + | lambdef) + lazy val test_nocond: NT[Any] = or_test | lambdef_nocond + lazy val lambdef: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (':' ␣> test) + lazy val lambdef_nocond: NT[Any] = + "lambda" ~> spacedOpt(varargslist) ␣ (':' ␣> test_nocond) + lazy val or_test: NT[Any] = someSep(and_test, "or") + lazy val and_test: NT[Any] = someSep(not_test, "and") + lazy val not_test: NT[Any] = "not" ␣> not_test | comparison + lazy val comparison: NT[Any] = someSep(expr, comp_op) // # <> isn't actually a valid comparison operator in Python. It's here for the // # sake of a __future__ import described in PEP 401 (which really works :-) - lazy val comp_op: Parser[Any] = ( "<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" - |'in | 'not ␣ 'in | 'is | 'is ␣ 'not - ) - - lazy val expr: NT[Any] = binOp(xor_expr, "|", BinOp) - lazy val xor_expr: NT[Any] = binOp(and_expr, "^", BinOp) - lazy val and_expr: NT[Any] = binOp(shift_expr, "&", BinOp) - lazy val shift_expr: NT[Any] = binOp(arith_expr, "<<" | ">>", BinOp) - lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", BinOp) - lazy val term: NT[Any] = binOp(factor, "*" | "@" | "/" | "%" | "//", BinOp) - lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power - lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor - - lazy val atom_expr: Parser[Any] = opt('await ~ spaces) ~> atom ~ spacedMany(trailer) - lazy val atom: Parser[Any] = ( "(" ␣> ( yield_expr | testlist_comp) <␣ ")" - | "[" ~> spacedOpt(testlist_comp) <␣ "]" - | "{" ~> spacedOpt(dictorsetmaker) <␣ "}" - | id | number | some(string) | "..." - | 'None | 'True | 'False - ) - - - lazy val star_expr: Parser[Any] = "*" ␣ expr - lazy val yield_expr: Parser[Any] = 'yield ~ spacedOpt('from ␣ test | testlist) - - lazy val testlist_comp: Parser[Any] = ( listOf(test | star_expr, ",") - | (test | star_expr) ␣ comp_for - ) - - lazy val trailer: Parser[Any] = ( "(" ␣> optArgs <␣ ")" - | "[" ␣> subscriptlist <␣ "]" - | "." ␣> id - ) - lazy val subscriptlist: Parser[Any] = listOf(subscript, ",") - lazy val subscript: Parser[Any] = test | spacedOpt(test) ~ ":" ~ spacedOpt(test) ~ spacedOpt(":" ~> spacedOpt(test)) - lazy val exprlist: Parser[List[Any]] = listOf(expr | star_expr, ",") - lazy val testlist: Parser[Any] = listOf(test, ",") - - lazy val dictorsetmaker: Parser[Any] = - ( ( listOf(test ␣ (":" ␣> test) | "**" ␣> expr, ",") - | (test ␣ (":" ␣> test) | "**" ␣> expr) ␣ comp_for - ) - | ( listOf(test | star_expr, ",") - | (test | star_expr) ␣ comp_for - ) + lazy val comp_op = + Punct("<") | ">" | Punct("==") | Punct(">=") | Punct("<=") | Punct( + "<>") | Punct("!=") | "in" | "not" ␣ "in" | "is" | "is" ␣ "not" + + lazy val expr: NT[Any] = binOp(xor_expr, '|', BinOp.apply) + lazy val xor_expr: NT[Any] = binOp(and_expr, '^', BinOp.apply) + lazy val and_expr: NT[Any] = binOp(shift_expr, '&', BinOp.apply) + lazy val shift_expr: NT[Any] = + binOp(arith_expr, Punct("<<") | Punct(">>"), BinOp.apply) + lazy val arith_expr: NT[Any] = binOp(term, Punct("+") | "-", BinOp.apply) + lazy val term: NT[Any] = + binOp(factor, Punct("*") | '@' | '/' | '%' | Punct("//"), BinOp.apply) + lazy val factor: NT[Any] = (Punct("+") | '-' | '~') ␣ factor | power + lazy val power: NT[Any] = atom_expr | atom_expr ␣ Punct("**") ␣ factor + lazy val atom_expr = + opt("await" ~ spaces) ~> atom ~ spacedMany(trailer) + lazy val atom = ('(' ␣> (yield_expr | testlist_comp) <␣ ')' + | '[' ~> spacedOpt(testlist_comp) <␣ ']' + | '{' ~> spacedOpt(dictorsetmaker) <␣ '}' + | id | number | some(string) | Punct("...") + | "None" | "True" | "False") + + lazy val star_expr = '*' ␣ expr + lazy val yield_expr = "yield" ~ spacedOpt("from" ␣ test | testlist) + + lazy val testlist_comp = (listOf(test | star_expr, ',') + | (test | star_expr) ␣ comp_for) + + lazy val trailer = ('(' ␣> optArgs <␣ ')' + | '[' ␣> subscriptlist <␣ ']' + | '.' ␣> id) + lazy val subscriptlist = listOf(subscript, ',') + lazy val subscript = + test | spacedOpt(test) ~ ':' ~ spacedOpt(test) ~ spacedOpt( + ':' ~> spacedOpt(test) ) + lazy val exprlist = listOf(expr | star_expr, ',') + lazy val testlist = listOf(test, ',') + lazy val dictorsetmaker = + ((listOf(test ␣ (':' ␣> test) | Punct("**") ␣> expr, ',') + | (test ␣ (':' ␣> test) | Punct("**") ␣> expr) ␣ comp_for) + | (listOf(test | star_expr, ',') + | (test | star_expr) ␣ comp_for)) - lazy val classdef: Parser[Any] = - 'class ␣> (id ~ spacedOpt("(" ␣> optArgs <␣ ")" )) ␣ (":" ␣> suite) + lazy val classdef = + "class" ␣> (id ~ spacedOpt('(' ␣> optArgs <␣ ')')) ␣ (':' ␣> suite) - lazy val arglist: Parser[List[Any]] = listOf(argument, ",") - lazy val optArgs: Parser[List[Any]] = arglist | succeed(Nil) + lazy val arglist = listOf(argument, ',') + lazy val optArgs = arglist | succeed(Nil) - lazy val argument: Parser[Any] = - ( test ~ spacedOpt(comp_for) - | test ␣ "=" ␣ test - | "**" ␣ test - | "*" ␣ test - ) + lazy val argument = (test ~ spacedOpt(comp_for) + | test ␣ '=' ␣ test + | Punct("**") ␣ test + | '*' ␣ test) - lazy val comp_iter: NT[Any] = comp_for | comp_if - lazy val comp_for: NT[Any] = 'for ␣> exprlist ␣ ('in ␣> or_test ~ spacedOpt(comp_iter)) - lazy val comp_if: Parser[Any] = 'if ␣> test_nocond ~ spacedOpt(comp_iter) + lazy val comp_iter: NT[Any] = comp_for | comp_if + lazy val comp_for = + "for" ␣> exprlist ␣ ("in" ␣> or_test ~ spacedOpt(comp_iter)) + lazy val comp_if = "if" ␣> test_nocond ~ spacedOpt(comp_iter) } -object PythonParsers extends PythonParsers with DerivedOps with DerivativeParsers with Syntax { - override def accept(t: Elem): Parser[Elem] = acceptIf(_ == t) +object PythonParsers + extends PythonParsers, + DerivedOps, + DerivativeParsers, + Syntax { + override def accept(t: Elem) = acceptIf(_ == t) } diff --git a/artifact/src/main/scala/examples/paper/Paper.scala b/artifact/src/main/scala/examples/paper/Paper.scala index 6f4d40a..e4bacee 100644 --- a/artifact/src/main/scala/examples/paper/Paper.scala +++ b/artifact/src/main/scala/examples/paper/Paper.scala @@ -1,16 +1,14 @@ package fcd -/** - * This object instantiates the examples from section 3, 4 and 7 and makes them - * available in the REPL via: - * - * > import paper._ - */ -object paper extends Section3 with Section4 with Section7 { +/** This object instantiates the examples from section 3, 4 and 7 and makes them + * available in the REPL via: + * + * > import paper._ + */ - // Use the derivative based parsers for examples in the paper - type Parsers = DerivativeParsers.type - def _parsers: DerivativeParsers.type = DerivativeParsers - override lazy val parsers: DerivativeParsers.type = _parsers - -} +object paper + extends RichParsers + with DerivativeParsers + with Section3 + with Section4 + with Section7 diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index 2319e73..ecc70a2 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -1,47 +1,36 @@ package fcd -/** - * Section 3 - First-class Derivatives: Gaining - * Fine Grained Control over the Input Stream - * =========================================== - * This file contains all code examples from section 3 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * The examples are grouped by subsections. For every subsection with - * examples we introduced a corresponding Scala object below. - * - * You can experiment with the examples of this file in the REPL by: - * - * > console - * scala> import paper.section_3_2._ - * scala> number.parse("42") - * res0: Results[Int] = List(42) - * - * You can reach the Scala console by entering 'console' at the - * sbt prompt. - * - * Additional note: All examples are parametrized by the parser combinator - * library to allow experimenting with different implementations. This should - * also support future research and alternate implementations. - * - * All the traits containing paper examples are eventually combined and - * instantiated to an object `paper` in `Paper.scala`. - */ - -trait Section3 extends ParserUsage { - - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers - - // import all symbols from the library - import parsers._ - - /** - * Section 3.2 First-Class Derivatives - */ +import scala.language.implicitConversions + +/** Section 3 – Gaining Fine Grained Control over the Input Stream + * + * This file contains all code examples from section 3 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * The examples are grouped by subsections. For every subsection with examples + * we introduced a corresponding Scala object below. + * + * You can experiment with the examples of this file in the REPL by: + * + * > console scala> import paper.section_3_2._ scala> number.parse("42") res0: + * Results[Int] = List(42) + * + * You can reach the Scala console by entering 'console' at the sbt prompt. + * + * Additional note: All examples are parametrized by the parser combinator + * library to allow experimenting with different implementations. This should + * also support future research and alternate implementations. + * + * All the traits containing paper examples are eventually combined and + * instantiated to an object `paper` in `Paper.scala`. + */ + +trait Section3 { self: RichParsers => + + /** Section 3.2 First-Class Derivatives + */ object section_3_2 { // ### Example of Subsection 3.2: First-Class Derivatives (<<) @@ -89,7 +78,6 @@ trait Section3 extends ParserUsage { // This requires that you have graphviz installed on your computer. val q: Parser[List[Char]] = many('a') - // ### Example of Subsection 3.2: Combinator "nt" // // Difference: The combinator `nt` in the paper is called `nonterminal` in @@ -107,12 +95,13 @@ trait Section3 extends ParserUsage { // // The implicit conversions that wrap the production into `nonterminal` // calls are defined in the file Syntax.scala - val digit: Parser[Int] = acceptIf(_.isDigit) ^^ { s => Integer.valueOf(s.toString) } + val digit: Parser[Int] = acceptIf(_.isDigit) ^^ { s => + Integer.valueOf(s.toString) + } val number: Parser[Int] = - nonterminal( number ~ digit ^^ { case (n, d) => (n * 10) + d } - | digit - ) + nonterminal(number ~ digit ^^ { case (n, d) => (n * 10) + d } + | digit) // To get an overview of the available parser combinator refer to: // @@ -123,25 +112,23 @@ trait Section3 extends ParserUsage { } - /** - * Section 3.4 Implementation using First-Class Derivatives - */ + /** Section 3.4 Implementation using First-Class Derivatives + */ object section_3_4 { // Figure 4a. Definition of the combinator indented(p) in terms of <<. - def indented[T](p: Parser[T]): Parser[T] = + def indented[T](p: Parser[T]) = done(p) | (space ~ space) ~> readLine(p) def readLine[T](p: Parser[T]): Parser[T] = - ( no('\n') >> { c => readLine(p << c) } - | accept('\n') >> { c => indented(p << c) } - ) + (no('\n') >> { c => readLine(p << c) } + | accept('\n') >> { c => indented(p << c) }) // To inspect the virtual input stream of some parser `p` in `indented(p)` // one can use the following parser as kind of "mock-parser" // // It will accept all words and return the input stream it has processed. - val collect = consumed(many(any)) map (_.mkString) + val collect = consumed(many(any)) ^^ { _.mkString } // for instance, you can try the following in the REPL // @@ -158,27 +145,24 @@ trait Section3 extends ParserUsage { // please note the use of combinator `manyN(n, space)` which recognizes // n-many spaces. - def indentBy[T](n: Int): Parser[T] => Parser[T] = p => + def indentBy[T](n: Int)(p: Parser[T]) = done(p) | manyN(n, space) ~> readLine(n)(p) // Only change: pass the level of indentation as parameter around def readLine[T](n: Int)(p: Parser[T]): Parser[T] = - ( no('\n') >> { c => readLine(n)(p << c) } - | accept('\n') >> { c => indentBy(n)(p << c) } - ) + (no('\n') >> { c => readLine(n)(p << c) } + | accept('\n') >> { c => indentBy(n)(p << c) }) // Here we first read some spaces (at least one) and then invoke // `indentBy`. - def indented[T](p: Parser[T]): Parser[T] = consumed(some(space)) >> { case s => + def indented[T](p: Parser[T]) = consumed(some(space)) >> { s => // this simulates lookahead for greedy matching no(' ') >> { c => indentBy(s.size)(p) <<< s << c } } } - - /** - * Derived Combinators - */ + /** Derived Combinators + */ object section_3_5 { // Section 3.5 introduces `delegate` and `repeat`. The implementation of @@ -198,7 +182,6 @@ trait Section3 extends ParserUsage { def injectA[T](p: Parser[T]): Parser[T] = ((any ~ any) &> delegate(p)) >> { p2 => 'a' ~> p2 } - // Not in the paper: Example for usage of combinator `repeat`. // every two tokens recognize an intermediate token 'a'. // @@ -209,13 +192,13 @@ trait Section3 extends ParserUsage { // Please note, that since we repeatedly delimit with `any ~ any` the // resulting parser can only recognize words in { (xxa)* | x ∈ Σ } def injectAs[T] = repeat[T] { p => - ((any ~ any) &> delegate(p)) <~ 'a' + ((any ~ any) &> delegate(p)) <~ 'a' } // Figure 5b. Definition of the combinator `indented(p)` in terms of `delegate`. lazy val line = many(no('\n')) <~ '\n' def indented[T]: Parser[T] => Parser[T] = repeat[T] { p => - (space ~ space) ~> (line &> delegate(p)) + (space ~ space) ~> (line &> delegate(p)) } // To experiment with this implementation of indented you can selectively @@ -226,11 +209,9 @@ trait Section3 extends ParserUsage { // involving the indentation combinator. } - - /** - * Symmetrical to section_3_4 and section_3_4_improved we can define flexible - * indentation using delegate and repeat. - */ + /** Symmetrical to section_3_4 and section_3_4_improved we can define flexible + * indentation using delegate and repeat. + */ object section_3_5_improved { lazy val line = many(no('\n')) <~ '\n' @@ -238,7 +219,7 @@ trait Section3 extends ParserUsage { manyN(n, space) ~> (line &> delegate(p)) } - def indented[T](p: Parser[T]): Parser[T] = consumed(some(space)) >> { case s => + def indented[T](p: Parser[T]) = consumed(some(space)) >> { s => no(' ') >> { c => indentBy(s.size)(p) <<< s << c } } } diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index 7fe7c8a..855549a 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -1,29 +1,21 @@ package fcd -/** - * Section 4 - Applications - * ========================== - * This file contains all code examples from section 5 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * Section 4 gives additional applications and use cases where our approach - * results in a modular solution. - */ -trait Section4 extends ParserUsage { self: Section3 => - - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers - - // import all symbols from the library - import parsers._ - - - /** - * Section 4.1 - Increased Reuuse through Parser Selection - */ +import scala.language.implicitConversions + +/** Section 4 – Applications + * + * This file contains all code examples from section 4 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * Section 4 gives additional applications and use cases where our approach + * results in a modular solution. + */ +trait Section4 { self: Section3 & RichParsers => + + /** Section 4.1 - Increased Reuuse through Parser Selection + */ object section_4_1 { // very simplified grammar to illustrate parser selection @@ -31,8 +23,7 @@ trait Section4 extends ParserUsage { self: Section3 => lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) @@ -51,11 +42,10 @@ trait Section4 extends ParserUsage { self: Section3 => lazy val untilStmt = "until" ~> (stmt <<< "while") } - /** - * Section 4.2 Modular Definitions as Combinators - */ + /** Section 4.2 Modular Definitions as Combinators + */ object section_4_2 { - def unescChar(c: Char): String = StringContext treatEscapes s"\\$c" + def unescChar(c: Char) = StringContext processEscapes s"\\$c" // ### Example. Preprocessor that unescapes backslash escaped characters // @@ -64,9 +54,10 @@ trait Section4 extends ParserUsage { self: Section3 => // unescape(many("\n" | "a")) parse "\\na\\n\\naaa" def unescape[T](p: Parser[T]): Parser[T] = done(p) | eat { - case '\\' => char >> { c => - unescape( p <<< unescChar(c) ) - } + case '\\' => + char >> { c => + unescape(p <<< unescChar(c)) + } case c => unescape(p << c) } @@ -74,19 +65,17 @@ trait Section4 extends ParserUsage { self: Section3 => // ### Example Figure 6a. Combinators for interleaved parsing of fenced code // blocks. - val marker: Parser[Any] = lineEnd ~ "~~~" ~ lineEnd + val marker = lineEnd ~ "~~~" ~ lineEnd // We have two states: Inside the code block and outside the code block def inCode[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = - ( marker ~> inText(text, code) - | eat { c => inCode(text, code << c) } - ) + (marker ~> inText(text, code) + | eat { c => inCode(text, code << c) }) def inText[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = - ( done(text & code) - | marker ~> inCode(text, code) - | eat { c => inText(text << c, code) } - ) + (done(text & code) + | marker ~> inCode(text, code) + | eat { c => inText(text << c, code) }) // Simple variant of balanced parenthesis lazy val parens: NT[Any] = '(' ~ parens ~ ')' | succeed(()) @@ -98,7 +87,7 @@ trait Section4 extends ParserUsage { self: Section3 => // // aaaaa // aaaaa - val as: Parser[Any] = some(many('a') <~ lineEnd) + val as = some(many('a') <~ lineEnd) // Now we can retroactively combine the two parsers `parens` and `as` by // The resulting parser can parse for instance words like @@ -128,12 +117,11 @@ trait Section4 extends ParserUsage { self: Section3 => // arbitrary positions. // // We will use this combinator in the following example - def spaced[T]: Parser[T] => Parser[T] = p => - done(p) | eat { - case ' ' => spaced(p) - case '\n' => spaced(p) - case c => spaced(p << c) - } + def spaced[T](p: Parser[T]): Parser[T] = done(p) | eat { + case ' ' => spaced(p) + case '\n' => spaced(p) + case c => spaced(p << c) + } // ### Example Figure 6c. Modular definition of a parser combinator for // ASCII-tables. @@ -146,25 +134,31 @@ trait Section4 extends ParserUsage { self: Section3 => (head <~ lineEnd) >> { layout => body(layout, cell) } // a parser computing the table layout - def head: Parser[Layout] = some('+'~> manyCount('-')) <~ '+' + def head: Parser[Layout] = some('+' ~> manyCount('-')) <~ '+' - def body[T](layout: Layout, cell: Parser[T]): Parser[List[List[T]]] = + def body[T](layout: Layout, cell: Parser[T]) = many(rowLine(layout, layout.map(n => cell)) <~ rowSeparator(layout)) // given a layout, creates a parser for row separators - def rowSeparator(layout: Layout): Parser[Any] = - layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_+_) ~ lineEnd + def rowSeparator(layout: Layout) = + layout + .map { n => List.fill(n)('-').mkString + "+" } + .foldLeft("+")(_ + _) ~ lineEnd // either read another rowLine or quit cell parsers and collect results def rowLine[T](layout: Layout, cells: List[Parser[T]]): Parser[List[T]] = - ( ('|' ~> distr(delegateCells(layout, cells)) <~ lineEnd) >> { cs => rowLine(layout, cs) } - | collect(cells) - ) + (('|' ~> distr(delegateCells(layout, cells)) <~ lineEnd) >> { cs => + rowLine(layout, cs) + } + | collect(cells)) // first feed n tokens to every cell parser, then feed newline and read a pipe - def delegateCells[T](layout: Layout, cells: List[Parser[T]]): List[Parser[Parser[T]]] = - layout.zip(cells).map { - case (n, p) => delegateN(n, p).map(_ << '\n') <~ '|' + def delegateCells[T]( + layout: Layout, + cells: List[Parser[T]] + ) = + layout.zip(cells).map { case (n, p) => + map(delegateN(n, p), (_ << '\n')) <~ '|' } // We can use the table combinator recursively to parse nested tables. @@ -184,7 +178,7 @@ trait Section4 extends ParserUsage { self: Section3 => // |~~~ | // |aaaa| // +----+ - lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) + lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) lazy val asAndTables: NT[Any] = as | table(combined) // Again, some more examples of words that are recognized by `combined` can diff --git a/artifact/src/main/scala/examples/paper/Section7.scala b/artifact/src/main/scala/examples/paper/Section7.scala index 151478a..25ac506 100644 --- a/artifact/src/main/scala/examples/paper/Section7.scala +++ b/artifact/src/main/scala/examples/paper/Section7.scala @@ -1,55 +1,46 @@ package fcd -/** - * Section 7 - Implementation - * ========================== - * This file contains all code examples from section 7 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * Section 7 introduces the implementation of our parser combinator library. In - * addition to repeating the few examples from the paper in this file we explain - * the relation between the implementation in the paper and in the artifact. - * - * As described in the paper, the core of the implementation builds on - * derivative based parsing as described by Matt Might et al, translated to an - * object oriented setting. - */ -trait Section7 extends ParserUsage { +import scala.language.implicitConversions - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers +/** Section 7 – Implementation This file contains all code examples from section + * 7 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * Section 7 introduces the implementation of our parser combinator library. In + * addition to repeating the few examples from the paper in this file we + * explain the relation between the implementation in the paper and in the + * artifact. + * + * As described in the paper, the core of the implementation builds on + * derivative based parsing as described by Matt Might et al, translated to an + * object oriented setting. + */ - // import all symbols from the library - import parsers._ +trait Section7 { self: RichParsers => - /** - * Section 7.1, introduces the concrete type of a parser as - * - * trait P[+R] { - * def results: Res[R] - * def derive: Elem => P[R] - * } - * - * The corresponding concrete type of this artifact can be found in - * `DerivativeParsers.scala` (corresponding to Figure 10) which contains the - * implementation of the interface defined in `Parsers.scala` - * (corresponding to Figure 1a.). - * - * Please note the following important differences: - * - `derive` is called `consume` in this artifact. - * - the trait `Parser[+R]` has default implementations for the various - * combinators. This corresponds to the later developments in Section 7.4 - * "Compaction by Dynamic Dispatch". - * - Instead of anonymous subclasses (such as `def fail[R] = new P[R] {...}`) - * the various combinators are implemented by named classes / objects - * (that is, `object Fail extends P[Nothing] { ... }`). - * - We added a special primitive parser `always` which is bisimilar to - * `many(any)` and thus dual (in some sense) to `fail`. Having it as a - * primitive gives rise to some optimizations. - */ + /** Section 7.1, introduces the concrete type of a parser as + * + * trait P[+R] { def results: Res[R] def derive: Elem => P[R] } + * + * The corresponding concrete type of this artifact can be found in + * `DerivativeParsers.scala` (corresponding to Figure 10) which contains the + * implementation of the interface defined in `Parsers.scala` (corresponding + * to Figure 1a.). + * + * Please note the following important differences: + * - `derive` is called `consume` in this artifact. + * - the trait `Parser[+R]` has default implementations for the various + * combinators. This corresponds to the later developments in Section 7.4 + * "Compaction by Dynamic Dispatch". + * - Instead of anonymous subclasses (such as `def fail[R] = new P[R] + * {...}`) the various combinators are implemented by named classes / + * objects (that is, `object Fail extends P[Nothing] { ... }`). + * - We added a special primitive parser `always` which is bisimilar to + * `many(any)` and thus dual (in some sense) to `fail`. Having it as a + * primitive gives rise to some optimizations. + */ object section_7 { // ### Example. Derivative of some(a) @@ -59,7 +50,6 @@ trait Section7 extends ParserUsage { // > (as << 'a').printToFile("as_derive_a.png") val as = some('a') - // ### Example. Derivative with compaction // // You can observe the result of derivation an compaction by comparing @@ -85,5 +75,4 @@ trait Section7 extends ParserUsage { // grammars. Thus, it might be instructive to also inspect the tests in // `test/scala/LeftrecTests.scala`. } - } diff --git a/artifact/src/main/scala/library/Attributed.scala b/artifact/src/main/scala/library/Attributed.scala index ed83272..6c66055 100644 --- a/artifact/src/main/scala/library/Attributed.scala +++ b/artifact/src/main/scala/library/Attributed.scala @@ -1,161 +1,155 @@ package might -/** -============================================================================================= -The contents of this file are taken (adapted) from Matt Might's implementation of -parsing with derivatives. The original implementation can be found online at: +/* + The contents of this file are taken (adapted) from Matt Might's + implementation of parsing with derivatives. The original implementation can + be found online at: - http://matt.might.net/articles/parsing-with-derivatives/ - -============================================================================================= -*/ - -/** - A collection of attributes which must be computed by iteration to a fixed point. + http://matt.might.net/articles/parsing-with-derivatives/ */ -trait Attributed { - private var generation = -1 ; - private var stabilized = false ; - /** - An attribute computable by fixed point. - - @param bottom the bottom of the attribute's lattice. - @param join the lub operation on the lattice. - @param wt the partial order on the lattice. - - */ - abstract class Attribute[A](bottom : A, join : (A,A) => A, wt : (A,A) => Boolean) - { - private var currentValue : A = bottom - private var compute : () => A = null +/** A collection of attributes which must be computed by iteration to a fixed + * point. + */ +trait Attributed { + private var generation = -1 + private var stabilized = false + + /** An attribute computable by fixed point. + * + * @param bottom + * the bottom of the attribute's lattice. + * @param join + * the lub operation on the lattice. + * @param wt + * the partial order on the lattice. + */ + abstract class Attribute[A]( + bottom: A, + join: (A, A) => A, + wt: (A, A) => Boolean + ) { + private var currentValue: A = bottom + private var compute: () => A = null private var fixed = false - /** - Sets the computation the updates this attribute. - - @param computation the computation that updates this attribute. - */ - def := (computation : => A) { - compute = (() => computation) - } - - /** - Permanently fixes the value of this attribute. - - @param value the value of this attribute. - - */ - def :== (value : A) { + /** Sets the computation the updates this attribute. + * + * @param computation + * the computation that updates this attribute. + */ + def :=(computation: => A) = { compute = (() => computation) } + + /** Permanently fixes the value of this attribute. + * + * @param value + * the value of this attribute. + */ + def :==(value: A) = { currentValue = value fixed = true } - /** - Recomputes the value of this attribute. - */ - def update() { - if (fixed) - return ; + /** Recomputes the value of this attribute. + */ + def update(): Unit = { + if (fixed) return val old = currentValue val newValue = compute() - if (!wt(newValue,currentValue)) { - currentValue = join(newValue,currentValue) + if (!wt(newValue, currentValue)) { + currentValue = join(newValue, currentValue) FixedPoint.changed = true } } - /** - The current value of this attribute. - */ - def value : A = { - // When the value of this attribute is requested, there are - // three possible cases: - // - // (1) It's already been computed (this.stabilized); - // (2) It's been manually set (this.fixed); or - // (3) It needs to be computed (generation < FixedPoint.generation). - if (fixed || stabilized || (generation == FixedPoint.generation)) + /** The current value of this attribute. + */ + def value: A = { + /* + When the value of this attribute is requested, there are + three possible cases: + (1) It's already been computed (this.stabilized); + (2) It's been manually set (this.fixed); or + (3) It needs to be computed (generation < FixedPoint.generation). + */ + if (fixed || stabilized || generation == FixedPoint.generation) return currentValue - else - // Run or continue the fixed-point computation: - fix() - - if (FixedPoint.stabilized) - stabilized = true - return currentValue + fix() + if (FixedPoint.stabilized) stabilized = true + currentValue } } // Subsumption tests for attributes: - protected[this] def implies (a : Boolean, b : Boolean) = (!a) || b - protected[this] def follows (a : Boolean, b : Boolean) = (!b) || a - protected[this] def updateAttributes(): Unit + protected def implies(a: Boolean, b: Boolean) = !a || b + protected def follows(a: Boolean, b: Boolean) = !b || a + protected def updateAttributes(): Unit - private def fix() { + private def fix() = { this.generation = FixedPoint.generation if (FixedPoint.master eq null) { - FixedPoint.master = this ; - do { + FixedPoint.master = this + + FixedPoint.generation += 1 + FixedPoint.changed = false + updateAttributes() + while (FixedPoint.changed) { FixedPoint.generation += 1 FixedPoint.changed = false updateAttributes() - } while (FixedPoint.changed) ; - FixedPoint.stabilized = true ; + } + + FixedPoint.stabilized = true FixedPoint.generation += 1 updateAttributes() FixedPoint.reset() - } else { - updateAttributes() - } + } else updateAttributes() } } - -/** - FixedPoint tracks the state of a fixed point algorithm for the attributes of a grammar. - - In case there are fixed points running in multiple threads, each attribute is thread-local. - */ - +/** FixedPoint tracks the state of a fixed point algorithm for the attributes of + * a grammar. + * + * In case there are fixed points running in multiple threads, each attribute + * is thread-local. + */ private object FixedPoint { - private val _stabilized = new ThreadLocal[Boolean] + private val _stabilized = ThreadLocal[Boolean]() _stabilized.set(false) - def stabilized = _stabilized.get ; - def stabilized_= (v : Boolean) { _stabilized.set(v) } + def stabilized = _stabilized.get + def stabilized_=(v: Boolean) = { _stabilized.set(v) } - private val _running = new ThreadLocal[Boolean] + private val _running = ThreadLocal[Boolean]() _running.set(false) - def running = _running.get ; - def running_= (v : Boolean) { _running.set(v) } + def running = _running.get + def running_=(v: Boolean) = { _running.set(v) } - private val _changed = new ThreadLocal[Boolean] + private val _changed = ThreadLocal[Boolean]() _changed.set(false) - def changed = _changed.get ; - def changed_= (v : Boolean) { _changed.set(v) } + def changed = _changed.get + def changed_=(v: Boolean) = { _changed.set(v) } - private val _generation = new ThreadLocal[Int] + private val _generation = ThreadLocal[Int]() _generation.set(0) - def generation = _generation.get ; - def generation_= (v : Int) { _generation.set(v) } + def generation = _generation.get + def generation_=(v: Int) = { _generation.set(v) } - private val _master = new ThreadLocal[Object] + private val _master = ThreadLocal[Object]() _master.set(null) - def master = _master.get ; - def master_= (v : Object) { _master.set(v) } - - /** - Resets all of the fixed point variables for this thread. - */ - def reset () { - this.stabilized = false ; - this.running = false ; - this.master = null ; - this.changed = false ; - this.generation = 0 ; + def master = _master.get + def master_=(v: Object) = { _master.set(v) } + + /** Resets all of the fixed point variables for this thread. + */ + def reset() = { + this.stabilized = false + this.running = false + this.master = null + this.changed = false + this.generation = 0 } } diff --git a/artifact/src/main/scala/library/CharSyntax.scala b/artifact/src/main/scala/library/CharSyntax.scala index a1ed600..d42dfc2 100644 --- a/artifact/src/main/scala/library/CharSyntax.scala +++ b/artifact/src/main/scala/library/CharSyntax.scala @@ -1,61 +1,44 @@ package fcd -import language.implicitConversions - -trait CharSyntax { self: Parsers with DerivedOps with Syntax => - +trait CharSyntax { self: Parsers & DerivedOps & Syntax => type Elem = Char - implicit def charParser(c: Char): Parser[Char] = accept(c) - def notChar(c: Char): Parser[Char] = acceptIf(_ != c) - val char = any - val letter = acceptIf(_.isLetter) - val upper = acceptIf(_.isUpper) - val lower = acceptIf(_.isLower) - val whitespace = acceptIf(_.isWhitespace) - val digit = acceptIf(_.isDigit) + val char = any + val letter = acceptIf(_.isLetter) + val upper = acceptIf(_.isUpper) + val lower = acceptIf(_.isLower) + val whitespace = acceptIf(_.isWhitespace) + val digit = acceptIf(_.isDigit) val letterOrDigit = acceptIf(_.isLetterOrDigit) - val space = acceptIf(_.isSpaceChar) - val spaces = many(space) - val newline = acceptIf(_ == '\n') + val space = acceptIf(_.isSpaceChar) + val spaces = many(space) + val newline = acceptIf(_ == '\n') - def charRange(from: Char, to: Char) = acceptIf { c => c >= from && c <= to } + def charRange(from: Char, to: Char) = acceptIf(c => c >= from && c <= to) - val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') + val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') - def string(s: String): Parser[String] = (acceptSeq(s) map (_.mkString)) + def string(s: String): Parser[String] = acceptSeq(s) ^^ (_.mkString) - sealed trait Stringable[T] { - def apply: T => String - } - object Stringable { - implicit val char: Stringable[Char] = new Stringable[Char] { - def apply = _.toString - } - implicit val charList: Stringable[List[Char]] = new Stringable[List[Char]] { - def apply = _.mkString - } - implicit val string: Stringable[String] = new Stringable[String] { - def apply = identity - } - implicit val stringList: Stringable[List[String]] = new Stringable[List[String]] { - def apply = _.mkString - } - implicit def seq[T: Stringable, U: Stringable]: Stringable[T ~ U] = new Stringable[T ~ U] { - def apply = { case l ~ r => - implicitly[Stringable[T]].apply(l) + implicitly[Stringable[U]].apply(r) - } - } + sealed trait Stringable[T] { def apply: T => String } + + given Stringable[Char] { def apply = _.toString } + given Stringable[List[Char]] { def apply = _.mkString } + given Stringable[String] { def apply = identity } + given stringList: Stringable[List[String]] { def apply = _.mkString } + given [T, U](using st: Stringable[T], su: Stringable[U]): Stringable[(T, U)] + with { + def apply = { case (l, r) => st.apply(l) ++ su.apply(r) } } - implicit def liftString(s: String): Parser[String] = string(s) + given Conversion[String, Parser[String]] = string + given Conversion[List[Char], String] = _.mkString - implicit def charString(cs: List[Char]): String = cs.mkString + given [T](using st: Stringable[T]): Conversion[Parser[T], Parser[String]] = _ ^^ st.apply - implicit def stringParser[T: Stringable](p: Parser[T]): Parser[String] = - p map { v => implicitly[Stringable[T]].apply(v) } + given Conversion[Char, Parser[Char]] = accept def noneOf(s: String): Parser[Char] = acceptIf(t => !(s contains t)) } diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 47b02fa..6916fde 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -16,89 +16,82 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def accepts: Boolean def failed: Boolean - def alt[U >: R](q: Parser[U]): Parser[U] = q alt2 p - def alt2[U >: R](q: Parser[U]): Parser[U] = new Alt(q, p) - def and[U](q: Parser[U]): Parser[(R, U)] = q and2 p - def and2[U](q: Parser[U]): Parser[(U, R)] = new And(q, p) - def seq[U](q: Parser[U]): Parser[R ~ U] = q seq2 p - def seq2[U](q: Parser[U]): Parser[U ~ R] = new Seq(q, p) - def flatMap[U](f: R => Parser[U]): Parser[U] = new FlatMap(p, f) + infix def alt[U >: R](q: Parser[U]): Parser[U] = Alt(p, q) + infix def and[U](q: Parser[U]): Parser[(R, U)] = And(p, q) + infix def seq[U](q: Parser[U]): Parser[(R, U)] = new Seq(p, q) + infix def flatMap[U](f: R => Parser[U]): Parser[U] = FlatMap(p, f) def done: Parser[R] = if (accepts) Succeed(p.results) else fail - def not: Parser[Unit] = new Not(p) + def not: Parser[Unit] = Not(p) // the map family - def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = new MapResults(p, f) - def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } - def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) + infix def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = + MapResults(p, f) + infix def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } + infix def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) // for optimization of biased choice def prefix: Parser[Unit] = { - if (accepts) { - always - } else { - eat { el => (p consume el).prefix } - } + if (accepts) always + else eat { el => (p consume el).prefix } } } object Fail extends NullaryPrintable("∅") with Parser[Nothing] { - override def results = List.empty - override def failed = true + override def results = List() + override def failed = true override def accepts = false - override def consume: Elem => this.type = in => this - - override def alt[U >: Nothing](q: Parser[U]): q.type = q - override def alt2[U >: Nothing](q: Parser[U]): q.type = q - override def seq[U](q: Parser[U]): this.type = this - override def seq2[U](q: Parser[U]): this.type = this - override def and[U](q: Parser[U]): this.type = this - override def and2[U](q: Parser[U]): this.type = this - override def map[U](f: Nothing => U): this.type = this - override def flatMap[U](g: Nothing => Parser[U]): this.type = this - override def mapResults[U](f: (=> Results[Nothing]) => Results[U]): this.type = this + override def consume = _ => this + + override def alt[U >: Nothing](q: Parser[U]) = q + override def seq[U](q: Parser[U]) = this + override def and[U](q: Parser[U]) = this + override def map[U](f: Nothing => U) = this + override def flatMap[U](g: Nothing => Parser[U]) = this + override def mapResults[U]( + f: (=> Results[Nothing]) => Results[U] + ) = this override def done = this - override def not: Parser[Unit] = Always + override def not = Always override def prefix = this override def toString: String = "∅" } object Always extends NullaryPrintable("∞") with Parser[Unit] { override def results = List(()) - override def failed = false + override def failed = false override def accepts = true - override def consume = in => Always - override def not: Parser[Unit] = fail - override def and[U](q: Parser[U]): Parser[(Unit, U)] = q map { r => ((), r) } - override def and2[U](q: Parser[U]): Parser[(U, Unit)] = q map { r => (r, ()) } + override def consume = _ => this + override def not = Fail + override def and[U](q: Parser[U]) = q map { ((), _) } // this is a valid optimization, however it almost never occurs. override def alt[U >: Unit](q: Parser[U]) = this - override def alt2[U >: Unit](q: Parser[U]) = this override def toString = "always" } - case class Succeed[R](ress: Results[R]) extends NullaryPrintable("ε") with Parser[R] { p => + case class Succeed[R](ress: Results[R]) + extends NullaryPrintable("ε") + with Parser[R] { p => override def results = ress - override def failed = false + override def failed = false override def accepts = true override def consume = (in: Elem) => fail override def toString = s"ε($ress)" override def done: Parser[R] = this - override def mapResults[T](f: (=> Results[R]) => Results[T]): Parser[T] = Succeed(f(ress)) + override def mapResults[T](f: (=> Results[R]) => Results[T]): Parser[T] = + Succeed(f(ress)) override def seq[U](q: Parser[U]): Parser[R ~ U] = q mapResults { ress2 => for (r <- ress; r2 <- ress2) yield (r, r2) } - override def seq2[U](q: Parser[U]): Parser[U ~ R] = q mapResults { ress2 => - for (r <- ress; r2 <- ress2) yield (r2, r) - } - override def flatMap[U](f: R => Parser[U]): Parser[U] = ress.map(f).reduce(_ alt _) + override def flatMap[U](f: R => Parser[U]): Parser[U] = + ress.map(f).reduce(_ alt _) } case class Accept(elem: Elem) extends Parser[Elem] { - def results = List.empty - def failed = false + def results = List() + def failed = false def accepts = false def consume = (in: Elem) => if (in == elem) { @@ -109,12 +102,15 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => lazy val name = "'" + escape(elem) + "'" def printNode = s"""$id [label="$name", shape=circle]""" - private def escape(c: Elem): String = c.toString.replace("\\", "\\\\").replace("\"", "\\\"") + private def escape(c: Elem): String = + c.toString.replace("\\", "\\\\").replace("\"", "\\\"") } - class AcceptIf(f: Elem => Boolean) extends NullaryPrintable("acceptIf") with Parser[Elem] { - def results = List.empty - def failed = false + class AcceptIf(f: Elem => Boolean) + extends NullaryPrintable("acceptIf") + with Parser[Elem] { + def results = List() + def failed = false def accepts = false def consume = (in: Elem) => if (f(in)) { @@ -124,18 +120,22 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => } } - class Not[R](val p: Parser[R]) extends UnaryPrintable("not", p) with Parser[Unit] { - def results = (if (p.results.isEmpty) List(()) else List.empty) - def failed = false // we never know, this is a conservative approx. + class Not[R](val p: Parser[R]) + extends UnaryPrintable("not", p) + with Parser[Unit] { + def results = (if (p.results.isEmpty) List(()) else List()) + def failed = false // we never know, this is a conservative approx. def accepts = !p.accepts def consume: Elem => Parser[Unit] = in => (p consume in).not override def not = p withResults List(()) override def toString = s"not($p)" } - class Alt[R, U >: R](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("|", p, q) with Parser[U] { + class Alt[R, U >: R](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("|", p, q) + with Parser[U] { def results = (p.results ++ q.results).distinct - def failed = p.failed && q.failed + def failed = p.failed && q.failed def accepts = p.accepts || q.accepts def consume = (in: Elem) => (p consume in) alt (q consume in) @@ -144,42 +144,50 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def toString = s"($p | $q)" } - class Seq[R, U](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("~", p, q) with Parser[R ~ U] { + class Seq[R, U](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("~", p, q) + with Parser[R ~ U] { - def results = (for { r <- p.results; u <- q.results } yield (new ~(r, u))).distinct + def results = (for { r <- p.results; u <- q.results } yield (r, u)).distinct // q.failed forces q, which might not terminate for grammars with // infinite many nonterminals, like: // def foo(p) = 'a' ~ foo(p << 'a') // so we approximate similar to flatmap. - def failed = p.failed // || q.failed + def failed = p.failed // || q.failed def accepts = p.accepts && q.accepts - def consume = (in: Elem) => ((p consume in) seq q) alt (p.done seq (q consume in)) + def consume = (in: Elem) => + ((p consume in) seq q) alt (p.done seq (q consume in)) override def toString = s"($p ~ $q)" // canonicalization rule (1) from PLDI 2016 override def seq[T](r: Parser[T]): Parser[(R ~ U) ~ T] = - (p seq (q seq r)) map { - case (rr ~ (ru ~ rt)) => ((rr, ru), rt) - } + (p seq (q seq r)) map { case (rr, (ru, rt)) => ((rr, ru), rt) } } - class Done[R](val p: Parser[R]) extends UnaryPrintable(s"done", p) with Parser[R] { + class Done[R](val p: Parser[R]) + extends UnaryPrintable(s"done", p) + with Parser[R] { def results = p.results - def failed = p.failed + def failed = p.failed def accepts = p.accepts def consume = (el: Elem) => fail override def done = this override def toString = s"done($p)" } - class MapResults[R, U](val p: Parser[R], f: (=> Results[R]) => Results[U]) extends UnaryPrintable(s"mapResults", p) with Parser[U] { + class MapResults[R, U](val p: Parser[R], f: (=> Results[R]) => Results[U]) + extends UnaryPrintable(s"mapResults", p) + with Parser[U] { // preserve whether p actually has results (f might ignore its argument...) def results = if (p.results.isEmpty) List() else f(p.results).distinct - def failed = p.failed + def failed = p.failed def accepts = p.accepts def consume = (el: Elem) => (p consume el) mapResults f - override def mapResults[T](g: (=> Results[U]) => Results[T]): Parser[T] = p mapResults { res => g(f(res)) } - override def map[T](g: U => T): Parser[T] = p mapResults { res => f(res) map g } + override def mapResults[T](g: (=> Results[U]) => Results[T]): Parser[T] = + p mapResults { res => g(f(res)) } + override def map[T](g: U => T): Parser[T] = p mapResults { res => + f(res) map g + } override def done = p.done mapResults f // we can forget the results here. @@ -189,36 +197,34 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // canonicalization rule (2) from PLDI 2016 // allows for instance rewriting (always.map(f) & p) -> p.map(...f...) override def seq[S](q: Parser[S]): Parser[U ~ S] = - (p seq q).mapResults(rss => rss.unzip match { - case (us, ss) => f(us) zip ss - }) - override def seq2[S](q: Parser[S]): Parser[S ~ U] = - (p seq2 q).mapResults(rss => rss.unzip match { - case (ss, us) => ss zip f(us) - }) + (p seq q).mapResults(rss => + rss.unzip match { case (us, ss) => f(us) zip ss } + ) override def and[S](q: Parser[S]): Parser[(U, S)] = - (p and q).mapResults(rss => rss.unzip match { - case (us, ss) => f(us) zip ss - }) - override def and2[S](q: Parser[S]): Parser[(S, U)] = - (p and2 q).mapResults(rss => rss.unzip match { - case (ss, us) => ss zip f(us) - }) + (p and q).mapResults(rss => + rss.unzip match { case (us, ss) => f(us) zip ss } + ) } - class And[R, U](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("&", p, q) with Parser[(R, U)] { - def results = (for { r <- p.results; u <- q.results } yield ((r, u))).distinct - def failed = p.failed || q.failed + class And[R, U](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("&", p, q) + with Parser[(R, U)] { + def results = + (for { r <- p.results; u <- q.results } yield ((r, u))).distinct + def failed = p.failed || q.failed def accepts = p.accepts && q.accepts def consume = (in: Elem) => (p consume in) and (q consume in) override def not = p.not alt q.not override def toString = s"($p & $q)" } - class FlatMap[R, U](val p: Parser[R], f: R => Parser[U]) extends UnaryPrintable("flatMap", p) with Parser[U] { - def results = ((p.results map f) flatMap (_.results)).distinct //res().distinct + class FlatMap[R, U](val p: Parser[R], f: R => Parser[U]) + extends UnaryPrintable("flatMap", p) + with Parser[U] { + def results = + ((p.results map f) flatMap (_.results)).distinct // res().distinct def accepts = !results.isEmpty - def failed = p.failed // that's the best we know + def failed = p.failed // that's the best we know def consume: Elem => Parser[U] = in => { val next = (p consume in) flatMap f @@ -228,49 +234,50 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def toString = "flatMap" } - class Nonterminal[+R](_p: => Parser[R]) extends Parser[R] { + class Nonterminal[R](_p: => Parser[R]) extends Parser[R] { lazy val p = _p - def accepts: Boolean = propertiesFix.nullable.value - def failed: Boolean = propertiesFix.empty.value + def accepts: Boolean = propertiesFix.nullable.value + def failed: Boolean = propertiesFix.empty.value def results: Results[R] = resultsFix.results.value // This separation into two fixed points is essential to // prevent excessive recomputation. - protected[this] object propertiesFix extends Attributed { - object nullable extends Attribute[Boolean](false,_ || _,implies) - object empty extends Attribute[Boolean](true,_ && _,follows) + private object propertiesFix extends Attributed { + object nullable extends Attribute[Boolean](false, _ || _, implies) + object empty extends Attribute[Boolean](true, _ && _, follows) - empty := p.failed - nullable := p.accepts + empty := p.failed + nullable := p.accepts - override protected[this] def updateAttributes() { + override protected def updateAttributes() = { empty.update() nullable.update() } } - protected[this] object resultsFix extends Attributed { - object results extends Attribute[List[R]]( - List.empty, - (nw, ol) => (nw ++ ol).distinct, - (nw, ol) => nw.toSet.subsetOf(ol.toSet)) + private object resultsFix extends Attributed { + object results + extends Attribute[List[R]]( + List(), + (nw, ol) => (nw ++ ol).distinct, + (nw, ol) => nw.toSet.subsetOf(ol.toSet) + ) results := p.results - override protected[this] def updateAttributes() { - results.update() - } + override protected def updateAttributes() = results.update() } - private[this] val cache: mutable.ListMap[Elem, Parser[R]] = mutable.ListMap.empty + private val cache: mutable.HashMap[Elem, Parser[R]] = mutable.HashMap() // Wrapping in `nonterminal` is cecessary for left-recursive // grammars and for grammars like "DerivativeParsers / preprocessor" // that recursively derive. Optimizing the nonterminal node away causes // divergence on these grammars. Worse, in the latter case // forcing `next` will already cause divergence. override def consume: Elem => Parser[R] = el => - cache.getOrElseUpdate(el, + cache.getOrElseUpdate( + el, if (p.failed) fail else @@ -282,7 +289,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => this } var name = "nt" - private val rec = new DynamicVariable[Boolean](false) + private val rec = DynamicVariable[Boolean](false) override def toString = if (rec.value) s"nt(${System.identityHashCode(this)})" @@ -304,7 +311,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => val fail: Parser[Nothing] = Fail val always: Parser[Unit] = Always def succeed[R](res: R): Parser[R] = Succeed(List(res)) - def acceptIf(cond: Elem => Boolean): Parser[Elem] = new AcceptIf(cond) + def acceptIf(cond: Elem => Boolean): Parser[Elem] = AcceptIf(cond) // combinators with parser arguments def not[R](p: Parser[R]): Parser[Unit] = p.not @@ -313,7 +320,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def alt[R, U >: R](p: Parser[R], q: Parser[U]) = p alt q def seq[R, U](p: Parser[R], q: Parser[U]) = p seq q - def and[R, U](p: Parser[R], q: Parser[U]): Parser[(R, U)] = p and q + def and[R, U](p: Parser[R], q: Parser[U]) = p and q def feed[R](in: Elem, p: => Parser[R]) = p consume in @@ -321,11 +328,14 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def done[T](p: Parser[T]): Parser[T] = p.done - override def nonterminal[R](_p: => Parser[R]): Nonterminal[R] = new Nonterminal(_p) - def nonterminal[R](name: String)(_p: => Parser[R]): Nonterminal[R] = new Nonterminal(_p).named(name) + override def nonterminal[R](_p: => Parser[R]): Nonterminal[R] = + Nonterminal(_p) + def nonterminal[R](name: String)(_p: => Parser[R]): Nonterminal[R] = + Nonterminal(_p).named(name) def feed[R](p: Parser[R], in: Elem) = p.consume(in) - def parse[R](p: Parser[R], in: Iterable[Elem]): Results[R] = feedAll(p, in).results + def parse[R](p: Parser[R], in: Iterable[Elem]): Results[R] = + feedAll(p, in).results // for testing override def isSuccess[R](p: Parser[R]): Boolean = p.accepts @@ -333,9 +343,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // optimization: Once p accepts, p as a prefix will always accept. // often used to implement biased choice: (not(prefix(p)) &> q - override def prefix: Parser[Any] => Parser[Unit] = p => p.prefix + override def prefix: Parser[Any] => Parser[Unit] = _.prefix } -object DerivativeParsers extends RichParsers with DerivativeParsers { - override type Elem = Char -} +object DerivativeParsers extends RichParsers with DerivativeParsers diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 37ed254..9c4329d 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,6 +1,8 @@ package fcd -trait DerivedOps { self: Parsers with Syntax => +import scala.language.implicitConversions + +trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) @@ -8,19 +10,20 @@ trait DerivedOps { self: Parsers with Syntax => def no(t: Elem): Parser[Elem] = acceptIf(_ != t) - def acceptSeq[ES <% Iterable[Elem]](es: ES): Parser[List[Elem]] = + def acceptSeq(es: Iterable[Elem]): Parser[List[Elem]] = es.foldRight[Parser[List[Elem]]](succeed(Nil)) { (x, pxs) => - accept(x) ~ pxs map mkList + accept(x) ~ pxs ^^ mkList } def some[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v = seq(p, many_v) ^^ mkList some_v } + def many[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v = seq(p, many_v) ^^ mkList many_v } @@ -29,25 +32,25 @@ trait DerivedOps { self: Parsers with Syntax => // def always[T](t: T): Parser[T] = // many(any) map { _ => t } - def oneOf[ES <% Iterable[Elem]](s: ES): Parser[Elem] = acceptIf { - t => s.exists(_ == t) + def oneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => + s.exists(_ == t) } - def noneOf[ES <% Iterable[Elem]](s: ES): Parser[Elem] = acceptIf { - t => s.forall(_ != t) + def noneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => + s.forall(_ != t) } def opt[T](p: Parser[T]): Parser[Option[T]] = - alt(p map { r => Some(r) }, succeed(None)) + alt(p ^^ { r => Some(r) }, succeed(None)) def manyN[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else p ~ manyN(n - 1, p) map { case r ~ rs => r :: rs } + else p ~ manyN(n - 1, p) ^^ mkList } def atMost[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else (p ~ atMost(n - 1, p) map { case r ~ rs => r :: rs }) | succeed(Nil) + else (p ~ atMost(n - 1, p) ^^ mkList) | succeed(Nil) } def manySep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { @@ -57,45 +60,36 @@ trait DerivedOps { self: Parsers with Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v = seq(p, many_v) ^^ mkList some_v } - def manyCount(p: Parser[Any]): Parser[Int] = - many(p) map { _.size } - - def someCount(p: Parser[Any]): Parser[Int] = - some(p) map { _.size } + def manyCount(p: Parser[Any]): Parser[Int] = many(p) ^^ { _.size } + def someCount(p: Parser[Any]): Parser[Int] = some(p) ^^ { _.size } // distributive law - chains a list of parsers // --> in Haskell one would use `traverse` def distr[T](ps: List[Parser[T]]): Parser[List[T]] = - ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - (p ~ l) map { case a ~ b => a :: b } - } + ps.foldRight(succeed[List[T]](Nil)) { (p, l) => (p ~ l) ^^ mkList } - def join[T](p: Parser[Parser[T]]): Parser[T] = p flatMap done + def join[T](p: Parser[Parser[T]]): Parser[T] = p >> done // A parser that captures the tokens consumed by `p` - def consumed[T](p: Parser[T]): Parser[List[Elem]] = - many(any) <& p + def consumed[T](p: Parser[T]): Parser[List[Elem]] = many(any) <& p - def eat[R](f: Elem => Parser[R]): Parser[R] = - any >> f + def eat[R](f: Elem => Parser[R]): Parser[R] = any >> f def delegate[T](p: Parser[T]): Parser[Parser[T]] = succeed(p) | eat { c => delegate(p << c) } def delegateN[T](n: Int, p: Parser[T]): Parser[Parser[T]] = - if (n <= 0) - succeed(p) - else - eat { c => delegateN(n - 1, p << c) } + if (n <= 0) succeed(p) + else eat { c => delegateN(n - 1, p << c) } // collects the results of parsers def collect[T](ps: List[Parser[T]]): Parser[List[T]] = ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - done(p) >> { r => l.map(r :: _) } + done(p) >> { r => l ^^ (r :: _) } } def includes[T](p: Parser[T]): Parser[T] = @@ -105,20 +99,22 @@ trait DerivedOps { self: Parsers with Syntax => // described by the function `f`. def repeat[T](f: Parser[T] => Parser[Parser[T]]): Parser[T] => Parser[T] = { val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - done(p) | nonterminal(f(p) >> rec) - }) + def rec(p: Parser[T]): Parser[T] = + cache.getOrElseUpdate( + p, + { done(p) | nonterminal(f(p) >> rec) } + ) rec } // repeat is just an instance of repeatAll - def repeatAll[T](f: List[Parser[T]] => Parser[List[Parser[T]]]): List[Parser[T]] => Parser[List[T]] = ps => - collect(ps) | f(ps) >> repeatAll(f) + def repeatAll[T](f: List[Parser[T]] => Parser[List[Parser[T]]])( + ps: List[Parser[T]] + ): Parser[List[T]] = collect(ps) | f(ps) >> repeatAll(f) - private def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } + private def mkList[T](xs: (T, List[T])) = xs._1 :: xs._2 - val succeedForever: NT[Unit] = - succeed(()) | (any ~> succeedForever) + lazy val succeedForever: NT[Unit] = succeed(()) | (any ~> succeedForever) def rightDerivative[R](p: Parser[R], elem: Elem): Parser[R] = done(p << elem) | eat { c => rightDerivative(p << c, elem) } @@ -132,12 +128,10 @@ trait DerivedOps { self: Parsers with Syntax => def lookahead[T](p: Parser[Any], q: Parser[T]): Parser[T] = not(prefix(p)) &> q - //consumed(p) >> { in => q <<< in } - + // consumed(p) >> { in => q <<< in } // some extension point for optimization - def prefix: Parser[Any] => Parser[Unit] = p => p ~> always - + def prefix: Parser[Any] => Parser[Unit] = _ ~> always // per-element action performed on p def rep[T](f: Elem => Parser[T] => Parser[T]) = @@ -147,8 +141,7 @@ trait DerivedOps { self: Parsers with Syntax => def filter[T](pred: Elem => Boolean): Parser[T] => Parser[T] = rep(el => p => if (pred(el)) (p << el) else p) - def skip[T]: Parser[T] => Parser[T] = - rep(el => p => p) + def skip[T]: Parser[T] => Parser[T] = rep(el => p => p) def mapIn[T](f: Elem => Elem): Parser[T] => Parser[T] = rep(el => p => p << f(el)) @@ -156,37 +149,39 @@ trait DerivedOps { self: Parsers with Syntax => def mapInPartial[T](f: PartialFunction[Elem, Elem]): Parser[T] => Parser[T] = mapIn(f orElse { case x => x }) - def inRegion[T](region: Parser[Any], f: Parser[Parser[T]] => Parser[Parser[T]]): Parser[T] => Parser[T] = { + def inRegion[T]( + region: Parser[Any], + f: Parser[Parser[T]] => Parser[Parser[T]] + ): Parser[T] => Parser[T] = { - // to prevent accessive re-parsing we introduce some caching on this - // parser combinator here. - val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - - lazy val dp = delegate(p) - nonterminal ( - done(p) | biasedAlt( - region &> f(dp) >> rec, - (any &> dp) >> rec)) - }) - rec - } + // to prevent accessive re-parsing we introduce some caching on this + // parser combinator here. + val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] + def rec(p: Parser[T]): Parser[T] = + cache.getOrElseUpdate( + p, { + lazy val dp = delegate(p) + nonterminal( + done(p) | biasedAlt(region &> f(dp) >> rec, (any &> dp) >> rec) + ) + } + ) + rec + } // Greedy repetition - def greedyMany[T](p: Parser[T]): Parser[List[T]] = greedySome(p) | succeed(Nil) + def greedyMany[T](p: Parser[T]) = greedySome(p) | succeed(Nil) // Instead of a class use a closure: def greedySome[T]: Parser[T] => NT[List[T]] = { p => - def withNext(p: Parser[T], ps: Parser[List[T]]): Parser[List[T]] = - done(p) ~ ps ^^ { case t ~ ts => t :: ts } + def withNext(p: Parser[T], ps: Parser[List[T]]) = + done(p) ~ ps ^^ mkList def forceRead(curr: Parser[T]): Parser[List[T]] = withNext(curr, succeed(Nil)) | eat { el => - biasedAlt( forceRead(curr << el), - withNext(curr, greedySome(p) << el)) + biasedAlt(forceRead(curr << el), withNext(curr, greedySome(p) << el)) } forceRead(p) diff --git a/artifact/src/main/scala/library/Parsers.scala b/artifact/src/main/scala/library/Parsers.scala index 14c0bdb..d5fb305 100644 --- a/artifact/src/main/scala/library/Parsers.scala +++ b/artifact/src/main/scala/library/Parsers.scala @@ -1,7 +1,5 @@ package fcd -import language.higherKinds - trait Parsers { // the token type (`Elem`) and the type of the results are left abstract @@ -42,19 +40,12 @@ trait Parsers { // For testing def isSuccess[R](p: Parser[R]): Boolean = !isFailure(p) def isFailure[R](p: Parser[R]): Boolean = !isSuccess(p) - def accepts[R, ES <% Iterable[Elem]](p: Parser[R], s: ES): Boolean = isSuccess(feedAll(p, s)) + def accepts[R](p: Parser[R], s: Iterable[Elem]): Boolean = isSuccess( + feedAll(p, s) + ) // As optimization def always: Parser[Unit] } -trait RichParsers extends Parsers with Syntax with DerivedOps with CharSyntax - -// A trait to bake parsers in a nested cake -trait ParserUsage { - // Override _parsers in concrete tests suites with the - // appropriate parser implementation. - type Parsers - def _parsers: Parsers - lazy val parsers: Parsers = _parsers -} +trait RichParsers extends Parsers, Syntax, DerivedOps, CharSyntax diff --git a/artifact/src/main/scala/library/Printable.scala b/artifact/src/main/scala/library/Printable.scala index 52651b6..abd8654 100644 --- a/artifact/src/main/scala/library/Printable.scala +++ b/artifact/src/main/scala/library/Printable.scala @@ -18,7 +18,6 @@ trait Printable { results{results.toSet.mkString(", ")} - private lazy val printGraph: String = s"""strict digraph G { | ${printNode} @@ -26,7 +25,7 @@ trait Printable { |""".stripMargin('|') def printToFile(path: String): Unit = { - val is = new ByteArrayInputStream(printGraph.getBytes("UTF-8")) + val is = ByteArrayInputStream(printGraph.getBytes("UTF-8")) (s"dot -Tpng -o $path" #< is) ! } @@ -39,7 +38,8 @@ abstract class NullaryPrintable(val name: String) extends Printable { def printNode = s"""$id [label="$name", shape=circle]""" } -abstract class UnaryPrintable(val name: String, _p: => Printable) extends Printable { +abstract class UnaryPrintable(val name: String, _p: => Printable) + extends Printable { private lazy val p = _p def printNode = s""" ${id} [shape=none, fontsize=8, fontname=mono, label=<$table>]; @@ -47,7 +47,8 @@ abstract class UnaryPrintable(val name: String, _p: => Printable) extends Printa |${p.printNode}""".stripMargin('|') } -abstract class BinaryPrintable(val name: String, p: Printable, q: Printable) extends Printable { +abstract class BinaryPrintable(val name: String, p: Printable, q: Printable) + extends Printable { def printNode = s""" ${id} [shape=none, fontsize=8, fontname=mono, label=<$table>]; | ${id}:sw -> ${p.id} diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 1a59b9e..9ff53a8 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -1,62 +1,67 @@ package fcd -import language.implicitConversions - -trait Syntax { self: Parsers with DerivedOps => - - implicit class ParserOps[R, P <% Parser[R]](p: P) { - def <<(in: Elem): Parser[R] = self.feed(p, in) - def <<<(in: Seq[Elem]): Parser[R] = self.feedAll(p, in) - def parse(s: Seq[Elem]) = self.parse(p, s) - - def map[U](f: R => U): Parser[U] = self.map(p, f) - def flatMap[U](f: R => Parser[U]): Parser[U] = self.flatMap(p, f) - +trait Syntax { self: Parsers & DerivedOps => + extension [R](p: Parser[R]) { + def <<(in: Elem) = feed(p, in) + def <<<(in: Seq[Elem]) = feedAll(p, in) def ~[U](q: Parser[U]) = seq(p, q) - def ~>[U](q: Parser[U]) = seq(p, q) map { case (a, b) => b } - def <~[U](q: Parser[U]) = seq(p, q) map { case (a, b) => a } - + def <~[U](q: Parser[U]) = map(seq(p, q), _._1) + def ~>[U](q: Parser[U]) = map(seq(p, q), _._2) def |[U >: R](q: Parser[U]) = alt(p, q) - def &[U](q: Parser[U]) = and(p, q) - def <&[U](q: Parser[U]) = and(p, q) map { _._1 } - def &>[U](q: Parser[U]) = and(p, q) map { _._2 } + def <&[U](q: Parser[U]) = map(and(p, q), _._1) + def &>[U](q: Parser[U]) = map(and(p, q), _._2) // biased Alternative def <|[U >: R](q: Parser[U]) = biasedAlt(p, q) def |>[U >: R](q: Parser[U]) = biasedAlt(q, p) - def ^^[U](f: R => U): Parser[U] = p map f - def ^^^[U](u: => U): Parser[U] = p map { _ => u } - - def >>[U](f: R => Parser[U]): Parser[U] = p flatMap f + def ^^[U](f: R => U) = map(p, f) + def ^^^[U](u: => U) = map(p, _ => u) + def >>[U](f: R => Parser[U]) = flatMap(p, f) def ? = opt(p) def * = many(p) def + = some(p) } - implicit def liftToParsers[R, U](p: Parser[R])(implicit conv: R => U): Parser[U] = - p map { conv } + given liftToParser[R, U](using + conv: R => U + ): Conversion[Parser[R], Parser[U]] = map(_, conv) // tag nonterminals - this allows automatic insertion of nt-markers final case class NT[+R](parser: Parser[R]) - implicit def toParser[R](nt: NT[R]): Parser[R] = nt.parser + given [R]: Conversion[NT[R], Parser[R]] = _.parser + + import scala.language.implicitConversions implicit def toNT[R](parser: => Parser[R]): NT[R] = NT(nonterminal(parser)) - implicit def tupleSeq2[T1, T2, O](f: (T1, T2) => O): (T1 ~ T2) => O = { - case t1 ~ t2 => f(t1, t2) - } - implicit def tupleSeq3[T1, T2, T3, O](f: (T1, T2, T3) => O): (T1 ~ T2 ~ T3) => O = { - case t1 ~ t2 ~ t3 => f(t1, t2, t3) + given tupleSeq3[T1, T2, T3, O] + : Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { + def apply(f: (T1, T2, T3) => O) = { case ((t1, t2), t3) => f(t1, t2, t3) } } - implicit def tupleSeq4[T1, T2, T3, T4, O](f: (T1, T2, T3, T4) => O): (T1 ~ T2 ~ T3 ~ T4) => O = { - case t1 ~ t2 ~ t3 ~ t4 => f(t1, t2, t3, t4) + + given tupleSeq4[T1, T2, T3, T4, O] + : Conversion[(T1, T2, T3, T4) => O, (T1 ~ T2 ~ T3 ~ T4) => O] with { + def apply(f: (T1, T2, T3, T4) => O) = { case (((t1, t2), t3), t4) => + f(t1, t2, t3, t4) + } } - implicit def tupleSeq5[T1, T2, T3, T4, T5, O](f: (T1, T2, T3, T4, T5) => O): (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O = { - case t1 ~ t2 ~ t3 ~ t4 ~ t5 => f(t1, t2, t3, t4, t5) + + given tupleSeq5[T1, T2, T3, T4, T5, O] + : Conversion[(T1, T2, T3, T4, T5) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O] + with { + def apply(f: (T1, T2, T3, T4, T5) => O) = { + case ((((t1, t2), t3), t4), t5) => f(t1, t2, t3, t4, t5) + } } - implicit def tupleSeq6[T1, T2, T3, T4, T5, T6, O](f: (T1, T2, T3, T4, T5, T6) => O): (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O = { - case t1 ~ t2 ~ t3 ~ t4 ~ t5 ~ t6 => f(t1, t2, t3, t4, t5, t6) + + given tupleSeq6[T1, T2, T3, T4, T5, T6, O]: Conversion[ + (T1, T2, T3, T4, T5, T6) => O, + (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O + ] with { + def apply(f: (T1, T2, T3, T4, T5, T6) => O) = { + case (((((t1, t2), t3), t4), t5), t6) => f(t1, t2, t3, t4, t5, t6) + } } } diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index b6706f7..3ef7466 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -1,11 +1,13 @@ package fcd package test -import org.scalatest._ +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -trait BasicCombinatorTests extends CustomMatchers { self: FunSpec with Matchers => +trait BasicCombinatorTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => - import parsers._ + import parsers.{succeed as succ, *} describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' @@ -24,7 +26,7 @@ trait BasicCombinatorTests extends CustomMatchers { self: FunSpec with Matchers } describe("parser \"baaa | ba\"") { - val p: Parser[_] = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' + val p = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' p shouldParse "baaa" p shouldParse "ba" ((p ~ 'c' ~ 'o') | (p ~ 'c')) shouldParse "bac" @@ -32,29 +34,29 @@ trait BasicCombinatorTests extends CustomMatchers { self: FunSpec with Matchers } describe("parser \"(baaa | ba) aa\"") { - val p: Parser[_] = ("baaa" | "ba") ~ "aa" + val p = ("baaa" | "ba") ~ "aa" p shouldParse "baaaaa" p shouldParse "baaa" } describe("parser \"succeed(a) b\"") { - val p = succeed('a') ~ 'b' + val p = succ('a') ~ 'b' p shouldParse "b" p shouldNotParse "" } describe("parser \"succeed(a) succeed(b)\"") { - val p = succeed('a') ~ succeed('b') + val p = succ('a') ~ succ('b') p shouldParse "" } describe("parser \"succeed(a) | succeed(b)\"") { - val p = succeed('a') | succeed('b') + val p = succ('a') | succ('b') p shouldParse "" } describe("parser \"(a a a | a a)+") { - val p: Parser[_] = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' + val p = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' describe("some(_)") { some(p) shouldParse "aaaa" } describe("_ ~ 'b'") { (p ~ 'b') shouldParse "aaab" } describe("some(_) ~ 'b'") { @@ -71,14 +73,14 @@ trait BasicCombinatorTests extends CustomMatchers { self: FunSpec with Matchers describe("parser \"'a'+\"") { val p = some('a') - val largeInput = "a" * 100 + val largeInput = List.fill(100)('a').mkString p shouldParse "a" p shouldParse "aaaaaa" p shouldParse largeInput p shouldNotParse "" - p shouldNotParse ("b" + largeInput) - p shouldNotParse (largeInput + "b") + p shouldNotParse "b" + largeInput + p shouldNotParse largeInput + "b" } } diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index d5b0f79..73d775d 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -1,40 +1,38 @@ package fcd package test -import org.scalatest._ -import org.scalatest.matchers._ +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.matchers.{BeMatcher, MatchResult} +import org.scalatest.Tag -trait CustomMatchers { self: FunSpec with Matchers => +trait CustomMatchers[+P <: Parsers](val parsers: P) extends Matchers { + self: AnyFunSpec => - // Due to initialization problems we have to use this pattern - // of def and lazy val. - // - // Override _parsers in concrete tests suites with the - // appropriate parser implementation. - type Parsers = RichParsers - def _parsers: RichParsers - lazy val parsers = _parsers - import parsers.{ Results, isSuccess, Parser, accepts, Elem } + import parsers.{Elem, Parser, accepts, isSuccess, parse} - implicit class ParserTests[T, P <% Parser[T]](p: => P) { - def shouldParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should parse "$s" """, tags:_*) { + extension [T](p: => Parser[T]) { + infix def shouldParse(s: Iterable[Elem], tags: Tag*) = + it(s"""should parse "$s" """, tags*) { accepts(p, s) shouldBe true } - def shouldNotParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should not parse "$s" """, tags:_*) { + infix def shouldNotParse(s: Iterable[Elem], tags: Tag*) = + it(s"""should not parse "$s" """, tags*) { accepts(p, s) shouldBe false } + // for unambiguous parses + infix def shouldParseWith(s: Iterable[Elem], result: T) = + it(s"""should parse "$s" with correct result""") { + parse(p, s) shouldBe List(result) + } } - class SuccessMatcher extends BeMatcher[Parser[_]] { - def apply(left: Parser[_]) = + class SuccessMatcher[T] extends BeMatcher[Parser[T]] { + def apply(left: Parser[T]) = MatchResult( isSuccess(left), left.toString + " was not successful", left.toString + " was successful" ) } - lazy val successful = new SuccessMatcher - lazy val failure = not (successful) } diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index c1fc23d..41ccd5b 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -1,24 +1,25 @@ package fcd package test -import org.scalatest._ import scala.language.higherKinds import language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers +class DerivativeParsersTests + extends AnyFunSpec + with CustomMatchers(paper) with BasicCombinatorTests with NegationTests - with LeftrecTests - with Section3 with Section4 with Section7 { - - def _parsers: DerivativeParsers.type = DerivativeParsers - override lazy val parsers: DerivativeParsers.type = _parsers - - import parsers._ - - // it is necessary to rename some combinators since names are already - // bound by scala test. - import parsers.{ fail => err, noneOf => nonOf, oneOf => one, not => neg } + with LeftrecTests { + + import parsers.{ + succeed as succ, + not as neg, + fail as err, + noneOf as nonOf, + oneOf as onOf, + * + } // This test illustrates how to write graph representations of the // parsers to a file. (To execute it replace `ignore` by `describe` and @@ -26,7 +27,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers describe("printing graph representations of parsers") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A A.printToFile("test.png") } @@ -45,8 +46,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) @@ -63,8 +63,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) @@ -77,20 +76,18 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val xs = many(some('x') ~ '\n') - table(xs) shouldParse """+---+ - ^|xxx| - ^+---+ - ^""".stripMargin('^') - - table(xs) shouldParse """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') + table(xs) shouldParse "+---+\n|xxx|\n+---+\n" + + table(xs) shouldParse + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') } describe("Table parser with delegation") { @@ -98,92 +95,99 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers type Layout = List[Int] // A parser computing the table layout - lazy val head: Parser[Layout] = some('+'~> manyCount('-')) <~ '+' <~ '\n' - - - def table[T](content: Parser[T]): Parser[List[List[T]]] = head >> { layout => - // After knowing the layout the row-separators are fixed - val rowSeparator = layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_+_) ~ '\n' - val initCells = layout.map { _ => content } - - // one line of a cell, given a fixed width. - def cell: Int => Parser[T] => Parser[Parser[T]] = width => p => - (delegateN(width, p) <~ '|') ^^ { p => p << '\n' } - - // repeatAll is like repeat, but with a list of parsers as the state. - val row = repeatAll[T] { ps => - '|' ~> distr(zipWith(layout map cell, ps)) <~ '\n' - } - - some(row(initCells) <~ rowSeparator) + lazy val head: Parser[Layout] = some('+' ~> manyCount('-')) <~ '+' <~ '\n' + + def table[T](content: Parser[T]): Parser[List[List[T]]] = head >> { + layout => + // After knowing the layout the row-separators are fixed + val rowSeparator = + layout + .map { n => List.fill(n)('-').mkString + "+" } + .foldLeft("+")(_ + _) ~ '\n' + val initCells = layout.map { _ => content } + + // one line of a cell, given a fixed width. + def cell: Int => Parser[T] => Parser[Parser[T]] = + width => p => (delegateN(width, p) <~ '|') ^^ { p => p << '\n' } + + // repeatAll is like repeat, but with a list of parsers as the state. + val row = repeatAll[T] { ps => + '|' ~> distr(zipWith(layout map cell, ps)) <~ '\n' + } + + some(row(initCells) <~ rowSeparator) } lazy val xs = many(some('x') ~ '\n') - table(xs) shouldParse """+---+ - ^|xxx| - ^+---+ - ^""".stripMargin('^') - - table(xs) shouldParse """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - - table(xs) shouldNotParse """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---x--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - + table(xs) shouldParse + """+---+ + ^|xxx| + ^+---+ + ^""".stripMargin('^') + + table(xs) shouldParse + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') + + table(xs) shouldNotParse + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---x--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') lazy val nestedTables: NT[Any] = table(xs | nestedTables) - nestedTables shouldParse """+---+--------+------------+ - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^|xxx||x|xxxx||xxxxxxxxxxxx| - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - - nestedTables shouldNotParse """+---+--------+------------+ - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^|xxx||x|oxxx||xxxxxxxxxxxx| - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - + nestedTables shouldParse + """+---+--------+------------+ + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^|xxx||x|xxxx||xxxxxxxxxxxx| + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') + + nestedTables shouldNotParse + """+---+--------+------------+ + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^|xxx||x|oxxx||xxxxxxxxxxxx| + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') // helper that should be in the stdlib - def zipWith[A,B](l1: List[A => B], l2: List[A]): List[B] = + def zipWith[A, B](l1: List[A => B], l2: List[A]): List[B] = (l1 zip l2).map { case (f, x) => f(x) } } describe("flatMap uses fixed point computation") { - lazy val fm: NT[Int] = succeed(1) | fm.flatMap { n => if (n < 5) succeed(n + 1) else err } + lazy val fm: NT[Int] = succ(1) | fm.flatMap { n => + if (n < 5) succ(n + 1) else err + } - fm.results.toSet shouldBe Set(1,2,3,4,5) + fm.results.toSet shouldBe Set(1, 2, 3, 4, 5) } - describe("Stream preprocessing") { - lazy val ones: NT[Any] = succeed(()) | '1' ~ ones - lazy val zeros: NT[Any] = succeed(()) | '0' ~ zeros + lazy val ones: NT[Any] = succ(()) | '1' ~ ones + lazy val zeros: NT[Any] = succ(()) | '0' ~ zeros lazy val oneszeros: Parser[Any] = '1' ~ '1' ~ '0' ~ '0' @@ -200,15 +204,14 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers bin(oneszeros) shouldParse "aabb" bin(oneszeros) shouldNotParse "aabbb" - bin(ones) shouldNotParse ("b" * 50) + bin(ones) shouldNotParse "b".repeat(50) } - describe("Results of ambiguous parses") { lazy val A: NT[Any] = (A <~ '+') ~ A | digit - def shouldParseWith(str: String)(expected: Set[Any]) { - (A <<< str).results.toSet should be (expected) + def shouldParseWith(str: String)(expected: Set[Any]) = { + (A <<< str).results.toSet should be(expected) } shouldParseWith("3") { Set('3') } @@ -216,7 +219,6 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers shouldParseWith("3+2+1") { Set(('3', ('2', '1')), (('3', '2'), '1')) } } - // Usecase // ------- // Standard example from data dependent parsing papers (like "One parser to rule them all", @@ -227,7 +229,9 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // input stream. Benefit of our approach: Body parser never sees more than N characters. describe("IMAP") { - val number = consumed(charRange('1', '9') ~ many(digit) | '0').map { _.mkString.toInt } + val number = consumed(charRange('1', '9') ~ many(digit) | '0').map { + _.mkString.toInt + } val header: Parser[Int] = ('{' ~ space) ~> number <~ (space ~ '}') @@ -250,13 +254,11 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers IMAP(many('a')) shouldNotParse "{ 7 }" } - - // Usecase. interleaving parsers def interleave[T, S](p: Parser[T], q: Parser[S]): Parser[(T, S)] = - (done(p) & done(q)) | eat { c => - interleave(q, (p << c)) map { case (s, t) => (t, s) } - } + (done(p) & done(q)) | eat { c => + interleave(q, (p << c)) map { case (s, t) => (t, s) } + } describe("interleaving two parsers") { val p = 'a' ~ 'a' ~ 'a' @@ -277,12 +279,15 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers else { readLine(p << c) } } - done(p) | // do not indent and p can accept - (space ~ space) ~> readLine(p) | // indent by 2 and read one line, then recurse - (many(space) ~ newline) >> { _ => indent(p) } // skip lines with whitespace only, then recurse + done(p) | // do not indent and p can accept + (space ~ space) ~> readLine( + p + ) | // indent by 2 and read one line, then recurse + (many(space) ~ newline) >> { _ => + indent(p) + } // skip lines with whitespace only, then recurse } - describe("indenting parsers") { val xs = many(some('x') ~ '\n') @@ -291,24 +296,25 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers indent(xs) shouldParse " xx\n" indent(xs) shouldParse " xxxxx\n" indent(xs) shouldParse " xxxxx\n xxxxxxx\n" - indent(xs) shouldParse """ xxxxx - | xxxxxxx - | xxxxxxxx - | xxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxxxx - | xxxxxxxxxxxx - | xxxxxxxxxxxxx - | xxxxxxxxxxxxxx - | xxxxxxxxxxxxxxx - | xxxxxxxxxxxxxxxx - | xxxxxxxxxxxxxxx - | xxxxxxxxxxxxxx - |""".stripMargin('|') + indent(xs) shouldParse + """ xxxxx + | xxxxxxx + | xxxxxxxx + | xxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxxxx + | xxxxxxxxxxxx + | xxxxxxxxxxxxx + | xxxxxxxxxxxxxx + | xxxxxxxxxxxxxxx + | xxxxxxxxxxxxxxxx + | xxxxxxxxxxxxxxx + | xxxxxxxxxxxxxx + |""".stripMargin('|') indent(indent(xs)) shouldParse " xx\n" indent(indent(xs)) shouldParse " xxxxx\n" @@ -330,7 +336,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers } describe("Retroactively, allow spaces in arbitrary positions") { - import section_4_2.{ spaced, parens } + import section_4_2.{spaced, parens} val sp = spaced(parens) sp shouldParse "((()))" @@ -349,36 +355,37 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers as shouldParse "aa\naa\n" both shouldParse "a\n" - both shouldParse """aaa - |~~~ - |() - |~~~ - |aaaaa - |""".stripMargin('|') + both shouldParse + """aaa + |~~~ + |() + |~~~ + |aaaaa + |""".stripMargin('|') both shouldParse "a \n\n~~~ \n()\n~~~\naaa\n" - both shouldNotParse """aaa - |~~~ - |( - |~~~ - |aaaaa - |""".stripMargin('|') - - both shouldParse """aaa - |~~~ - |((()) - |~~~ - |aaaaa - | - |~~~ - |) - |~~~ - |""".stripMargin('|') + both shouldNotParse + """aaa + |~~~ + |( + |~~~ + |aaaaa + |""".stripMargin('|') + + both shouldParse + """aaa + |~~~ + |((()) + |~~~ + |aaaaa + | + |~~~ + |) + |~~~ + |""".stripMargin('|') } - - describe("Unescape") { import section_4_2._ @@ -390,42 +397,34 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers describe("Combined examples") { import section_4_2._ - combined shouldParse """aaa - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aa | - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aaaa| - ^|~~~ | - ^|(())| - ^|~~~ | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aa | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aa | - ^|~~~ | - ^|(())| - ^|~~~ | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') + combined shouldParse + """aaa + ^""".stripMargin('^') + + combined shouldParse "+----+\n|aaaa|\n+----+\n" + combined shouldParse "+----+\n|aa |\n+----+\n" + + combined shouldParse + """+----+ + ^|aaaa| + ^|~~~ | + ^|(())| + ^|~~~ | + ^|aaaa| + ^+----+ + ^""".stripMargin('^') + + combined shouldParse "+----+\n|aa |\n|aaaa|\n+----+\n" + + combined shouldParse + """+----+ + ^|aa | + ^|~~~ | + ^|(())| + ^|~~~ | + ^|aaaa| + ^+----+ + ^""".stripMargin('^') } @@ -445,31 +444,38 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers describe("Greedy repitition") { - it ("should return only the result of the longest match") { - greedySome(some('a')) parse "" shouldBe List() - greedyMany(some('a')) parse "" shouldBe List(List()) - greedySome(some('a')) parse "a" shouldBe List(List(List('a'))) - greedySome(some('a')) parse "aaa" shouldBe List(List(List('a', 'a', 'a'))) + it("should return only the result of the longest match") { + parse(greedySome(some('a')), "") shouldBe List() + parse(greedyMany(some('a')), "") shouldBe List(List()) + parse(greedySome(some('a')), "a") shouldBe List(List(List('a'))) + parse(greedySome(some('a')), "aaa") shouldBe + List(List(List('a', 'a', 'a'))) } - it ("should also return longest match if other parser succeeded first") { + it("should also return longest match if other parser succeeded first") { lazy val p = some("ab") | some("a") | some("b") - greedySome(p) parse "ab" shouldBe List(List(List("ab"))) - greedySome(p) parse "abab" shouldBe List(List(List("ab", "ab"))) - greedySome(p) parse "abbab" shouldBe List(List(List("ab"), List("b"), List("ab"))) - greedySome(p) parse "abbaab" shouldBe List(List(List("ab"), List("b"), List("a", "a"), List("b"))) - greedySome(p) parse "aaaab" shouldBe List(List(List("a", "a", "a", "a"), List("b"))) + parse(greedySome(p), "ab") shouldBe List(List(List("ab"))) + parse(greedySome(p), "abab") shouldBe List(List(List("ab", "ab"))) + parse(greedySome(p), "abbab") shouldBe List( + List(List("ab"), List("b"), List("ab")) + ) + parse(greedySome(p), "abbaab") shouldBe List( + List(List("ab"), List("b"), List("a", "a"), List("b")) + ) + parse(greedySome(p), "aaaab") shouldBe List( + List(List("a", "a", "a", "a"), List("b")) + ) lazy val q = "ab" | "a" | "b" - greedySome(q) parse "ab" shouldBe List(List("ab")) - greedySome(q) parse "abab" shouldBe List(List("ab", "ab")) - greedySome(q) parse "abbab" shouldBe List(List("ab", "b", "ab")) - greedySome(q) parse "abbaab" shouldBe List(List("ab", "b", "a", "ab")) - greedySome(q) parse "aaaab" shouldBe List(List("a", "a", "a", "ab")) + parse(greedySome(q), "ab") shouldBe List(List("ab")) + parse(greedySome(q), "abab") shouldBe List(List("ab", "ab")) + parse(greedySome(q), "abbab") shouldBe List(List("ab", "b", "ab")) + parse(greedySome(q), "abbaab") shouldBe List(List("ab", "b", "a", "ab")) + parse(greedySome(q), "aaaab") shouldBe List(List("a", "a", "a", "ab")) } // This shows that our implementation is only locally greedy - println(greedySome("ab" | "a") ~ "b" parse "abab") + println(parse(greedySome("ab" | "a") ~ "b", "abab")) } describe("how to locally rewrite biased choice") { @@ -521,35 +527,43 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // regions inside skip will not be treated by f. // `region` and `skip` should not have an intersection. - def transform[T](region: Parser[Any], skip: Parser[Any], f: Parser[Parser[T]] => Parser[Parser[T]]): Parser[T] => Parser[T] = { + def transform[T]( + region: Parser[Any], + skip: Parser[Any], + f: Parser[Parser[T]] => Parser[Parser[T]] + ): Parser[T] => Parser[T] = { // to prevent accessive re-parsing we introduce some caching on this // parser combinator here. val cache = mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - - lazy val dp = delegate(p) - nonterminal ( - done(p) | biasedAlt( - ( skip &> dp - | region &> f(dp) - ) >> rec, - (any &> dp) >> rec)) - }) + def rec(p: Parser[T]): Parser[T] = + cache.getOrElseUpdate( + p, { + lazy val dp = delegate(p) + nonterminal( + done(p) | biasedAlt( + (skip &> dp | region &> f(dp)) >> rec, + (any &> dp) >> rec + ) + ) + } + ) rec } // parsers as input transformers def filterNewlines[T] = filter[T](_ != '\n') - def mask[T] = mapInPartial[T] { case '\n' => '↩' } - def toSpace[T] = mapInPartial[T] { case '\n' => ' ' } - def unmask[T] = mapInPartial[T] { case '↩' => '\n' } + def mask[T] = mapInPartial[T] { case '\n' => '↩' } + def toSpace[T] = mapInPartial[T] { case '\n' => ' ' } + def unmask[T] = mapInPartial[T] { case '↩' => '\n' } // some lexers - val singleString: Parser[String] = consumed('"' ~ many(nonOf("\"\n")) ~ '"') - val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') - val multilineString: Parser[String] = consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") + val singleString: Parser[String] = + consumed('"' ~ many(nonOf("\"\n")) ~ '"') + val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') + val multilineString: Parser[String] = + consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") singleString shouldParse "\"hello world\"" singleString shouldNotParse "\"hello\nworld\"" @@ -560,14 +574,30 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val collect = consumed(always) ^^ { x => x.mkString } // for now just filter newlines - val p = transform[String](multilineString, singleString | comment, filterNewlines)(collect) + val p = transform[String]( + multilineString, + singleString | comment, + filterNewlines + )(collect) it("should only filter newlines in multiline strings") { - (p parse "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n") should be (List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n")) + parse( + p, + "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n" + ) should be( + List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n") + ) } // here we can already observe performance problems (about 400ms): - p shouldParse "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n some content that is not a program, but could be one \n. # ''' some comment \nIt contains newlines \n, \"and some Strings\". Even Multiline strings with '''newlines\n'''." - + p shouldParse + """hello '''foo + |"bar''' test + | foo " bar'''foo " + | some content that is not a program, but could be one + |. # ''' some comment + |It contains newlines + |, "and some Strings". Even Multiline strings with '''newlines + |'''.""".stripMargin lazy val noText: Parser[Any] = comment | singleString | multilineString @@ -579,16 +609,19 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val pairs = Map[Elem, Elem]('(' -> ')', '[' -> ']', '{' -> '}') val (opening, closing) = (pairs.keys.toList, pairs.values.toList) - - lazy val dyck: NT[Any] = one(opening) >> { paren => many(dyck) ~ pairs(paren) } - //'(' ~> many(dyck) <~ ')' + lazy val dyck: NT[Any] = onOf(opening) >> { paren => + many(dyck) ~ pairs(paren) + } + // '(' ~> many(dyck) <~ ')' // within comments and strings filter out everything val parens = // we need to intersect with the outermost parenthesis to prevent // parsing something like "aaa()aaa" - (one(opening) >> { paren => always ~ pairs(paren) }) &> - transform[Any](noText | nonOf(opening) & nonOf(closing) , err, skip)(dyck) + (onOf(opening) >> { paren => always ~ pairs(paren) }) &> + transform[Any](noText | nonOf(opening) & nonOf(closing), err, skip)( + dyck + ) parens shouldParse "()" parens shouldParse "(())" @@ -617,21 +650,34 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // reusing some definition of `indented` import section_3_5_improved._ - def joiningIndent[T]: Parser[T] => Parser[T] = p => - ilj(elj(mlj(indented(unmask(p))))) - + def joiningIndent[T]: Parser[T] => Parser[T] = + p => ilj(elj(mlj(indented(unmask(p))))) it("should mask perform line joining before checking indentation") { - (joiningIndent(collect) parse " foo'''a \n a'''\n bar\n ( \n )\n") should be ( + parse( + joiningIndent( + collect + ), + " foo'''a \n a'''\n bar\n ( \n )\n" + ) should be( List("foo'''a \n a'''\nbar\n( \n )\n") ) - (joiningIndent(collect) parse " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n") should be ( - List("'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n") + parse( + joiningIndent( + collect + ), + " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n" + ) should be( + List( + "'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n" + ) ) } - joiningIndent(collect) shouldParse " '''some \n multiline \n'''\n ( # comment (\n )\n" - joiningIndent(collect) shouldNotParse " '''some \n multiline \n''\n ( # comment (\n )\n" - + joiningIndent( + collect) shouldParse " '''some \n multiline \n'''\n ( # comment (\n )\n" + joiningIndent( + collect + ) shouldNotParse " '''some \n multiline \n''\n ( # comment (\n )\n" val WS: Parser[Any] = ' ' val spacesNoNl = some(WS) @@ -643,31 +689,37 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // Python Parser Skeleton - lazy val expr: NT[Any] = id | singleString | multilineString | "(" ~> spaces ~> expr <~ spaces <~ ")" | "[" ~> spaces ~> opt(someSep(expr, spaces ~ "," ~ spaces) ~ spaces) <~ "]" - lazy val stmt: NT[Any] = expr <~ lineEnd | "def" ~> spacesNoNl ~> id ~ ("():" ~> suite) + lazy val expr: NT[Any] = + id | singleString | multilineString | "(" ~> spaces ~> expr <~ spaces <~ ")" | "[" ~> spaces ~> opt( + someSep(expr, spaces ~ "," ~ spaces) ~ spaces + ) <~ "]" + lazy val stmt: NT[Any] = + expr <~ lineEnd | "def" ~> spacesNoNl ~> id ~ ("():" ~> suite) lazy val stmts: NT[Any] = someSep(stmt, spaces) lazy val suite: NT[Any] = lineEnd ~> joiningIndent(stmts) - stmt shouldParse "def foo():\n '''hello\n '''\n" + stmt shouldParse "def foo():\n '''hello\n '''\n" stmt shouldNotParse "def foo():\n \"'''hello\n '''\"\n" - stmt shouldParse "def foo():\n '''hello\n ''' # some comment \n" + stmt shouldParse "def foo():\n '''hello\n ''' # some comment \n" stmt shouldNotParse "def foo():\n # '''hello\n ''' some comment \n" - stmt shouldParse "def foo():\n []\n" - stmt shouldParse "def foo():\n [foo, bar]\n" - stmt shouldParse "def foo():\n [foo, \nbar]\n" + stmt shouldParse "def foo():\n []\n" + stmt shouldParse "def foo():\n [foo, bar]\n" + stmt shouldParse "def foo():\n [foo, \nbar]\n" stmt shouldNotParse "def foo():\n \"[foo, \nbar]\"\n" - stmt shouldParse "def foo():\n \"[foo, bar]\"\n" - stmt shouldParse "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" - stmt shouldParse "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" + stmt shouldParse "def foo():\n \"[foo, bar]\"\n" + stmt shouldParse "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" + stmt shouldParse "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" } - describe("Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`") { + describe( + "Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`" + ) { val p = neg("a" | "b") val p_a = p <<< "a" val p_b = p <<< "b" val p_c = p <<< "c" - it ("should preserve the invariant when performing optimization rewrites") { + it("should preserve the invariant when performing optimization rewrites") { p_a.accepts shouldBe false p_a.accepts shouldBe (!p_a.results.isEmpty) p_b.accepts shouldBe false @@ -676,5 +728,4 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers p_c.accepts shouldBe (!p_c.results.isEmpty) } } - } diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 6314a55..1ba0ed3 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -1,10 +1,13 @@ package fcd package test -import org.scalatest._ -trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec - import parsers._ +trait LeftrecTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => + + import parsers.{succeed as succ, *} describe("lazyness of alt") { @@ -14,26 +17,26 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => } describe("p = p ~ . | .") { - lazy val p: NT[_] = p ~ any | any + lazy val p: NT[Any] = p ~ any | any p shouldParse "a" } describe("p = . | p ~ .") { - lazy val p: NT[_] = any | p ~ any + lazy val p: NT[Any] = any | p ~ any p shouldParse "a" } describe("p = (. | .) >> { (. | p) ^^ id }") { - lazy val p: NT[Any] = (p | any) flatMap { _ => (any | p) map identity } - p.shouldParse("aa") - p.shouldParse("aaaaa") + lazy val p: NT[Any] = (p | any) >> { _ => (any | p) ^^ identity } + p shouldParse "aa" + p shouldParse "aaaaa" } describe("p = (. | p) >> { a }") { - lazy val p: NT[Any] = (any | p) flatMap { _ => 'a' } - p.shouldParse("aa") - p.shouldParse("aaa") - p.shouldParse("aaaaaa") + lazy val p: NT[Any] = (any | p) >> { _ => 'a' } + p shouldParse "aa" + p shouldParse "aaa" + p shouldParse "aaaaaa" } } @@ -53,7 +56,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("left recursion") { describe("A = A ~ a | empty") { - lazy val A: NT[_] = A ~ 'a' | succeed(42) + lazy val A: NT[Any] = A ~ 'a' | succ(42) A shouldParse "" A shouldParse "a" @@ -61,7 +64,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => } describe("A = empty | A ~ a ") { - lazy val A: NT[_] = succeed(42) | A ~ 'a' + lazy val A: NT[Any] = succ(42) | A ~ 'a' A shouldParse "" A shouldParse "a" @@ -73,7 +76,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("one level indirect leftrecursion") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A // A shouldParse "1" // A shouldParse "12" @@ -89,8 +92,8 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("two levels indirect leftrecursion") { lazy val num: Parser[Any] = some(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ C ~ '+' ~ num - lazy val C: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ C ~ '+' ~ num + lazy val C: NT[Any] = succ(()) ~ A A shouldParse "1" A shouldParse "12" @@ -106,7 +109,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => // From "Packrat parsers can support left-recursion" describe("super linear parse time") { - lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succeed(()) + lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succ(()) lazy val ones: NT[Any] = ones ~ '1' | '1' start shouldParse "" @@ -121,7 +124,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => } describe("A = A ~ b | c") { - lazy val A: NT[_] = A ~ 'b' | 'c' + lazy val A: NT[Any] = A ~ 'b' | 'c' A shouldParse "c" A shouldParse "cb" @@ -131,7 +134,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => } describe("A = empty ~ A ~ b | empty") { - lazy val A: NT[Any] = succeed("done") ~ A ~ 'b' | succeed("done") + lazy val A: NT[Any] = succ("done") ~ A ~ 'b' | succ("done") A shouldParse "" A shouldParse "b" A shouldParse "bb" @@ -140,7 +143,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[_] = charParser('d') | succeed("done") + lazy val B: NT[Any] = 'd' | succ("done") A shouldParse "c" A shouldParse "cb" @@ -155,11 +158,11 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val p = many(some('a')) p shouldParse "" - p shouldParse("a") - p shouldParse("aaa") - p shouldParse("aaaaaaaaaa") - p shouldNotParse("b") - p shouldNotParse("aaab") + p shouldParse "a" + p shouldParse "aaa" + p shouldParse "aaaaaaaaaa" + p shouldNotParse "b" + p shouldNotParse "aaab" } describe("del(ones)") { @@ -176,8 +179,8 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val rr: NT[String] = "1" ~> rr | "1" lazy val ll: NT[String] = ll <~ "1" | "1" - ll shouldParse ("1" * 40) - rr shouldParse ("1" * 41) + ll shouldParse "1".repeat(40) + rr shouldParse "1".repeat(41) } // Grammar from Tillmann Rendel's GLL library @@ -187,38 +190,25 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => A shouldParse "a" A shouldParse "aa" A shouldParse "aaa" - A shouldParse ("a" * 100) + A shouldParse "a".repeat(100) lazy val A2: Parser[Any] = some(some('a')) - A2 shouldParse ("a" * 1000) + A2 shouldParse "a".repeat(1000) } describe("mixed mutual recursion") { lazy val expression: NT[Any] = - ( literal ~ '+' - | condExpr - ) - - lazy val condExpr: NT[Any] = - ( condExpr ~ '?' - | eqExpr - ) - - lazy val eqExpr: NT[Any] = - ( eqExpr ~ '*' - | literal - ) - - lazy val literal: NT[Any] = - ( many('a') - | '[' ~ arrayEl - ) - - lazy val arrayEl: NT[Any] = - ( expression - | succeed ("undefined") - ) + (literal ~ '+' + | condExpr) + + lazy val condExpr: NT[Any] = condExpr ~ '?' | eqExpr + + lazy val eqExpr: NT[Any] = eqExpr ~ '*' | literal + + lazy val literal: NT[Any] = many('a') | '[' ~ arrayEl + + lazy val arrayEl: NT[Any] = expression | succ("undefined") expression shouldParse "" expression shouldParse "a" @@ -230,22 +220,22 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => } describe("terms") { + enum Term { + case BinOp(lhs: Term, op: String, rhs: Term) + case Num(n: Int) + } - trait Term - case class BinOp(lhs: Term, op: String, rhs: Term) extends Term - case class Num(n: Int) extends Term + import Term._ lazy val term: NT[Term] = - ( term ~ "+" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | term ~ "-" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | fact - ) + (term ~ "+" ~ fact ^^ { case ((l, op), r) => BinOp(l, op, r) } + | term ~ "-" ~ fact ^^ { case ((l, op), r) => BinOp(l, op, r) } + | fact) lazy val fact: NT[Term] = - ( fact ~ "*" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | fact ~ "/" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | num - ) + (fact ~ "*" ~ num ^^ { case ((l, op), r) => BinOp(l, op, r) } + | fact ~ "/" ~ num ^^ { case ((l, op), r) => BinOp(l, op, r) } + | num) lazy val num: Parser[Num] = some(digit) ^^ (ns => Num(ns.mkString.toInt)) @@ -258,7 +248,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("balanced smileys") { lazy val az: NT[Any] = acceptIf(c => c >= 'a' && c <= 'z') lazy val S: NT[Any] = many(az | ' ' | ':' | ':' ~ P | '(' ~ S ~ ')') - lazy val P: NT[Any] = charParser('(') | ')' + lazy val P: NT[Any] = alt('(', ')') S shouldParse "" S shouldNotParse ":((" @@ -289,7 +279,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:))))))))))" S shouldParse "((((((((((((:))))))))))((((((((((:())))))))))))" S shouldNotParse "(((((((((()))))))))))" - S shouldNotParse "(((((((((((((((((((()))))))))))))))))))))" + S shouldNotParse "(((((((((((((((((((()))))))))))))))))))))" S shouldParse "((((((((((:)))))))))))" S shouldParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:)))))))))))))))))))))))))))))))))))))))))))))))))))" S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))" @@ -304,8 +294,8 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => // taken from Tillmann Rendel's GLL library describe("grammar with hidden left recursion") { lazy val S: NT[Any] = C ~ 'a' | 'd' - lazy val B: NT[Any] = succeed(()) | 'a' - lazy val C: NT[Any] = charParser('b') | B ~ C ~ 'b' | 'b' ~ 'b' + lazy val B: NT[Any] = succ(()) | 'a' + lazy val C: NT[Any] = 'b' | B ~ C ~ 'b' | 'b' ~ 'b' S shouldNotParse "" S shouldNotParse "aba" diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 0cf39e0..1cb92e5 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -1,12 +1,13 @@ package fcd package test -import org.scalatest._ +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -trait NegationTests extends CustomMatchers { self: FunSpec with Matchers => +trait NegationTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => - import parsers._ - import parsers.{ not => neg } + import parsers.{not as neg, *} describe("parser \"not(aa)\"") { val p = neg("aa") @@ -49,7 +50,7 @@ trait NegationTests extends CustomMatchers { self: FunSpec with Matchers => } describe("parser \"not((baaa | ba) ~ aa ~ .*) & lower*\"") { - val p: Parser[_] = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) + val p = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) p shouldNotParse "baaa" p shouldNotParse "baaaxx" p shouldParse "" diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index fe7cdb6..d325005 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -1,286 +1,236 @@ package fcd package test -import org.scalatest._ import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -class PythonParserTests extends FunSpec with Matchers { +class PythonParserTests + extends AnyFunSpec + with CustomMatchers[PythonParsers.type](PythonParsers) { - val parsers = PythonParsers import parsers._ + import parsers.given + import Lexeme._ - describe ("indented python parser (lexeme based)") { + describe("indented python parser (lexeme based)") { indented(many(many(Id("A")) <~ NL)) shouldParseWith ( - List(WS, WS, Id("A"), Id("A"), NL, - WS, WS, Id("A"), NL), - List(List(Id("A"), Id("A")), List(Id("A")))) + List(WS, WS, Id("A"), Id("A"), NL, WS, WS, Id("A"), NL), + List(List(Id("A"), Id("A")), List(Id("A"))) + ) } - describe ("implicit line joining") { - - implicit def keyword(kw: Symbol): Lexeme = KW(kw.name) - implicit def punctuation(p: String): Lexeme = Punct(p) - - val p = many(WS | id | "(" | ")" | "[" | "]") - val a = Id("A") - val BS = Punct("\\") - - - dyck shouldParse List[Lexeme]("(", "(", ")", ")") - dyck shouldNotParse List[Lexeme]("(", "(", ")") - extDyck shouldParse List[Lexeme]("(", a, "(", a, NL, a, ")", a, ")") - extDyck shouldNotParse List[Lexeme](a, "(", a, "(", a, NL, a, ")", a, ")", a) - - implicitJoin(p) shouldParse List[Lexeme](a, a, a, a, a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, a, NL, a, a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, NL, a, ")", a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, NL, a, a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, "(", NL, a, ")", a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, "(", NL, a, ")", ")", a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, "[", NL, a, "]", ")", a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, "[", NL, a, ")", "]", a) - - explicitJoin(p) shouldParse List[Lexeme](a, a, a, BS, NL, a, a) - explicitJoin(p) shouldParse List[Lexeme](a, a, a, BS, NL, a, a, BS, NL, a, a) - - val input = List[Lexeme]( - a, NL, - Comment("Hey!!"), a, BS, NL, - a, a, "(", a, "[", a, BS, NL, - a, NL, - a, "]", ")", a) - - val inputWithoutComments = List[Lexeme]( - a, NL, - a, BS, NL, - a, a, "(", a, "[", a, BS, NL, - a, NL, - a, "]", ")", a) - - val inputWithoutExplicit = List[Lexeme]( - a, NL, - a, - a, a, "(", a, "[", a, - a, NL, - a, "]", ")", a) - - val inputResult = List[Lexeme]( - a, NL, - a, - a, a, "(", a, "[", a, - a, - a, "]", ")", a) + describe("implicit line joining") { + given kw: Conversion[String, Lexeme] = KW(_) + given punct: Conversion[Char, Lexeme] = p => Punct(p.toString) + + val p = many(WS | id | '(' | ')' | '[' | ']') + val a = Id("A") + val BS = Punct("\\") + + dyck shouldParse List[Lexeme]('(', '(', ')', ')') + dyck shouldNotParse List[Lexeme]('(', '(', ')') + extDyck shouldParse List('(', a, '(', a, NL, a, ')', a, ')') + extDyck shouldNotParse List(a, '(', a, '(', a, NL, a, ')', a, ')', a) + + implicitJoin(p) shouldParse List(a, a, a, a, a) + implicitJoin(p) shouldNotParse List(a, a, a, NL, a, a) + implicitJoin(p) shouldParse List(a, a, '(', a, NL, a, ')', a) + implicitJoin(p) shouldNotParse List(a, a, '(', a, NL, a, a) + implicitJoin(p) shouldNotParse List(a, a, '(', a, '(', NL, a, ')', a) + implicitJoin(p) shouldParse List(a, a, '(', a, '(', NL, a, ')', ')', a) + implicitJoin(p) shouldParse List(a, a, '(', a, '[', NL, a, ']', ')', a) + implicitJoin(p) shouldNotParse List(a, a, '(', a, '[', NL, a, ')', ']', a) + + explicitJoin(p) shouldParse List(a, a, a, BS, NL, a, a) + explicitJoin(p) shouldParse List(a, a, a, BS, NL, a, a, BS, NL, a, a) + + val input = List[Lexeme](a, NL, Comment("Hey!!"), a, BS, NL, a, a, '(', a, + '[', a, BS, NL, a, NL, a, ']', ')', a) + + val inputWithoutComments = List[Lexeme](a, NL, a, BS, NL, a, a, '(', a, '[', + a, BS, NL, a, NL, a, ']', ')', a) + + val inputWithoutExplicit = + List[Lexeme](a, NL, a, a, a, '(', a, '[', a, a, NL, a, ']', ')', a) + + val inputResult = + List[Lexeme](a, NL, a, a, a, '(', a, '[', a, a, a, ']', ')', a) val collect = consumed(many(any)) - stripComments(collect) shouldParseWith (input, inputWithoutComments) - explicitJoin(collect) shouldParseWith (inputWithoutComments, inputWithoutExplicit) - implicitJoin(collect) shouldParseWith (inputWithoutExplicit, inputResult) + stripComments(collect) shouldParseWith (input, inputWithoutComments) + explicitJoin(collect) shouldParseWith + (inputWithoutComments, inputWithoutExplicit) + implicitJoin(collect) shouldParseWith (inputWithoutExplicit, inputResult) - preprocess(file_input) shouldParse List[Lexeme]( - a, ";", a, "=", 'yield, 'from, a, "=", a, ";", NL, - NL, - a, ";", a, NL, - EOS) + preprocess(file_input) shouldParse List(a, ';', a, '=', "yield", "from", a, + '=', a, ';', NL, NL, a, ';', a, NL, EOS) - preprocess(file_input) shouldParse List[Lexeme]( - a, "=", a, ">>", a, "*", a, NL, - EOS) + preprocess(file_input) shouldParse + List(a, '=', a, Punct(">>"), a, '*', a, NL, EOS) - val sampleProg = List[Lexeme]( - 'def, WS, Id("fun"), "(", WS, a, WS, ")", ":", NL, - WS, WS, a, "+=", WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS) + val sampleProg = List[Lexeme]("def", WS, Id("fun"), '(', WS, a, WS, ')', + ':', NL, WS, WS, a, Punct("+="), WS, a, NL, WS, WS, a, Punct("*="), a, NL, EOS) - (stripComments(collect) parse sampleProg) shouldBe List(sampleProg) - (explicitJoin(collect) parse sampleProg) shouldBe List(sampleProg) - (implicitJoin(collect) parse sampleProg) shouldBe List(sampleProg) + parse(stripComments(collect), sampleProg) shouldBe List(sampleProg) + parse(explicitJoin(collect), sampleProg) shouldBe List(sampleProg) + parse(implicitJoin(collect), sampleProg) shouldBe List(sampleProg) preprocess(file_input) shouldParse sampleProg - val sampleProg2 = List[Lexeme]( - 'def, WS, Id("fun"), "(", NL, - WS, a, WS, - NL, ")", ":", NL, - WS, WS, a, "+=", Comment("Test"), BS, NL, - WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS) - - (preprocess(collect) parse sampleProg2) shouldBe List(sampleProg) + val sampleProg2 = List[Lexeme]("def", WS, Id("fun"), '(', NL, WS, a, WS, NL, + ')', ':', NL, WS, WS, a, Punct("+="), Comment("Test"), BS, NL, WS, a, NL, WS, WS, + a, Punct("*="), a, NL, EOS) + parse(preprocess(collect), sampleProg2) shouldBe List(sampleProg) preprocess(file_input) shouldParse sampleProg2 // https://en.wikibooks.org/wiki/Python_Programming/Decorators + // format: off val traceProg = List[Lexeme]( - Comment("define the Trace class that will be "), NL, - Comment("invoked using decorators"), NL, - 'class, WS, Id("Trace"), "(", Id("object"), ")", ":", NL, - WS, WS, WS, WS, 'def, WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, - WS, WS, WS, WS, NL, - WS, WS, WS, WS, WS, WS, 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'return, WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, - EOS + Comment("define the Trace class that will be "), NL, + Comment("invoked using decorators"), NL, + "class", WS, Id("Trace"), '(', Id("object"), ')', ':', NL, + WS, WS, WS, WS, "def", WS, Id("__init__"), '(', Id("self"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), '.', Id("f"), WS, '=', WS, Id("f"), NL, + WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, "def", WS, Id("__call__"), '(', Id("self"), WS, ',', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("entering function "), WS, '+', WS, Id("self"), '.', Id("f"), '.', Id("__name__"), ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("arg {0}: {1}"), '.', Id("format"), '(', Id("i"), ',', Id("arg"), ')', ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Id("i"), '+', Num("1"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), '.', Id("f"), '(', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', NL, + EOS ) - - argument shouldParse List[Lexeme]("*", Id("kwargs")) - argument shouldParse List[Lexeme]("**", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs2")) - arglist shouldParse List[Lexeme](Id("kwargs"), ",", WS, Id("kwargs")) - arglist shouldParse List[Lexeme]("*", Id("kwargs"), ",", "*", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs"), ",", "**", Id("kwargs")) - arglist shouldParse List[Lexeme]("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) - - arglist shouldParse List[Lexeme]("(", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List[Lexeme]("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List[Lexeme]("(", "*", Id("args"), ",", WS, "*", Id("kwargs"), ")") - test shouldParse List[Lexeme](Id("f"), "(", Id("args"), ",", WS, Id("kwargs"), ")") - test shouldParse List[Lexeme](Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") - - test shouldParse List[Lexeme](Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") + // format: on + + argument shouldParse List('*', Id("kwargs")) + argument shouldParse List(Punct("**"), Id("kwargs")) + arglist shouldParse List(Punct("**"), Id("kwargs2")) + arglist shouldParse List(Id("kwargs"), ',', WS, Id("kwargs")) + arglist shouldParse List('*', Id("kwargs"), ',', '*', Id("kwargs")) + arglist shouldParse List(Punct("**"), Id("kwargs"), ',', Punct("**"), + Id("kwargs")) + arglist shouldParse List('*', Id("kwargs"), ',', WS, '*', Id("kwargs")) + arglist shouldParse List(Punct("**"), Id("kwargs"), ',', WS, Punct("**"), + Id("kwargs")) + arglist shouldParse List('(', Id("args"), ',', WS, Id("kwargs"), ')') + arglist shouldParse List('(', '*', Id("args"), ',', WS, Id("kwargs"), ')') + + arglist shouldParse + List('(', '*', Id("args"), ',', WS, '*', Id("kwargs"), ')') + + test shouldParse + List(Id("f"), '(', Id("args"), ',', WS, Id("kwargs"), ')') + + test shouldParse + List(Id("f"), '(', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), + ')') + + test shouldParse List(Id("print"), '(', Str("entering function "), WS, '+', + WS, Id("self"), '.', Id("f"), '.', Id("__name__"), ')') // TODO is already ambiguous - // (stmt parse List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size shouldBe 1 + // (stmt `parse` List[Lexeme](Id("self"), '.', Id("f"), WS, '=', WS, Id("f"), NL)).size shouldBe 1 // preprocess(file_input) shouldParse traceProg - // (stmt parse List[Lexeme]( - // 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, + // (stmt `parse` List[Lexeme]( + // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, // WS, WS, Id("print"), NL)).size shouldBe 1 - stmt shouldNotParse List[Lexeme]( - 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, Id("print"), NL, - // this line is indented too far - WS, WS, WS, WS, WS, WS, Id("print"), NL) + // format: off + stmt shouldNotParse List( + "def", WS, Id("__call__"), '(', Id("self"), WS, ',', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', ':', NL, + WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, + WS, WS, WS, WS, Id("print"), NL, // this line is indented too far + WS, WS, WS, WS, WS, WS, Id("print"), NL + ) + // format: on // with empty lines + // format: off val traceProg2 = List[Lexeme]( - Comment("define the Trace class that will be "), NL, - Comment("invoked using decorators"), NL, - 'class, WS, Id("Trace"), "(", Id("object"), ")", ":", NL, - WS, WS, WS, WS, 'def, WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, - NL, - WS, WS, WS, WS, 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, - WS, WS, NL, - NL, - NL, - NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'return, WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, - EOS + Comment("define the Trace class that will be "), NL, + Comment("invoked using decorators"), NL, + "class", WS, Id("Trace"), '(', Id("object"), ')', ':', NL, WS, + WS, WS, WS, "def", WS, Id("__init__"), '(', Id("self"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), '.', Id("f"), WS, '=', WS, Id("f"), NL, + NL, + WS, WS, WS, WS, "def", WS, Id("__call__"), '(', Id("self"), WS, ',', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("entering function "), WS, '+', WS, Id("self"), '.', Id("f"), '.', Id("__name__"), + ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("arg {0}: {1}"), '.', Id("format"), '(', Id("i"), ',', Id("arg"), ')', ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Id("i"), '+', Num("1"), NL, + WS, WS, NL, + NL, + NL, + NL, + WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), '.', Id("f"), '(', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', NL, + EOS ) + // format: off preprocess(file_input) shouldParse traceProg2 - (preprocess(file_input) parse traceProg2).size shouldBe 1 + parse(preprocess(file_input), traceProg2).size shouldBe 1 - // suite should parse this: - val dummyin = List[Lexeme](NL, - WS, 'def, WS, Id("f"), "(", ")", ":", NL, - WS, WS, 'def, WS, Id("f"), "(", ")", ":", NL, - WS, WS, WS, Id("print"), NL, - WS, WS, WS, Id("print"), NL, - WS, WS, WS, Id("i"), NL) + // suite should `parse` this: + // format: off + val dummyin = List( + NL, + WS, "def", WS, Id("f"), '(', ')', ':', NL, + WS, WS, "def", WS, Id("f"), '(', ')', ':', NL, + WS, WS, WS, Id("print"), NL, + WS, WS, WS, Id("print"), NL, + WS, WS, WS, Id("i"), NL + ) + // format: on - //println((suite parse dummyin) mkString "\n\n") + // println((suite `parse` dummyin) mkString "\n\n") - stmt shouldNotParse List[Lexeme](WS, WS, WS, Id("i"), NL) - atom shouldNotParse List[Lexeme](WS, WS, WS, Id("i")) + stmt shouldNotParse List(WS, WS, WS, Id("i"), NL) + atom shouldNotParse List(WS, WS, WS, Id("i")) // This is the skeleton of the python parsers (and it is unambiguous) - lazy val aStmt: NT[Any] = aSimpleStmt | 'def ~> aBlock + lazy val aStmt: NT[Any] = aSimpleStmt | "def" ~> aBlock lazy val aSimpleStmt = a <~ NL - lazy val aBlock = aSimpleStmt | NL ~> indented(some(many(emptyLine) ~> aStmt)) + lazy val aBlock = + aSimpleStmt | NL ~> indented(some(many(emptyLine) ~> aStmt)) lazy val aInput: NT[Any] = NL.* ~> many(aStmt <~ NL.*) <~ EOS + // format: off val dummyin2 = List[Lexeme]( - 'def, NL, - WS, a, NL, - WS, a, NL, - WS, 'def, NL, - WS, WS, a, NL, - WS, WS, a, NL, - WS, WS, a, NL, - NL, - 'def, NL, - WS, a, NL, - WS, a, NL, - WS, 'def, NL, - WS, WS,WS,WS,WS,WS, a, NL, - WS, WS,WS,WS,WS,WS, a, NL, - WS, WS,WS,WS,WS,WS, a, NL, - EOS) - - aInput shouldParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, - WS, WS, a, NL, - EOS - ) - - aInput shouldNotParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, + "def", NL, WS, a, NL, - EOS - ) - - aInput shouldParse List[Lexeme]( - 'def, NL, + WS, a, NL, + WS, "def", NL, WS, WS, a, NL, - NL, WS, WS, a, NL, - EOS - ) - - aInput shouldNotParse List[Lexeme]( - 'def, NL, WS, WS, a, NL, - NL, + NL, "def", NL, WS, a, NL, + WS, a, NL, + WS, "def", NL, + WS, WS, WS, WS, WS, WS, a, NL, + WS, WS, WS, WS, WS, WS, a, NL, + WS, WS, WS, WS, WS, WS, a, NL, EOS ) + // format: on - indentBy(WS ~ WS)(collect) shouldParseWith ( - List[Lexeme](WS, WS, a, NL), - List[Lexeme](a, NL)) + aInput shouldParse List("def", NL, WS, WS, a, NL, WS, WS, a, NL, EOS) + aInput shouldNotParse List("def", NL, WS, WS, a, NL, WS, a, NL, EOS) + aInput shouldParse List("def", NL, WS, WS, a, NL, NL, WS, WS, a, NL, EOS) + aInput shouldNotParse List("def", NL, WS, WS, a, NL, NL, WS, a, NL, EOS) - indentBy(WS ~ WS)(collect) shouldParseWith ( - List[Lexeme](WS, WS, NL, NL, WS, WS, a, NL), - List[Lexeme](NL, NL, a, NL)) + indentBy(WS ~ WS)(collect) shouldParseWith + (List(WS, WS, a, NL), List(a, NL)) - (aInput parse dummyin2).size shouldBe 1 - } + indentBy(WS ~ WS)(collect) shouldParseWith + (List(WS, WS, NL, NL, WS, WS, a, NL), List(NL, NL, a, NL)) - // Helpers to allow writing more concise tests. - private implicit class ParserTests[T, P <% Parser[T]](p: => P) { - def shouldParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should parse "$s" """, tags:_*) { - accepts(p, s) shouldBe true - } - def shouldNotParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should not parse "$s" """, tags:_*) { - accepts(p, s) shouldBe false - } - // for unambiguous parses - def shouldParseWith[ES <% Iterable[Elem]](s: ES, result: T) = - it (s"""should parse "$s" with correct result""") { - parse(p, s) shouldBe List(result) - } + parse(aInput, dummyin2).size shouldBe 1 } }