From 2a375912099f1b738d7ab9879d433bea885ab7ac Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 20 Oct 2025 04:54:04 +0200 Subject: [PATCH 01/95] Start migration to more recent Scala & Sbt versions. --- .gitignore | 5 ++--- artifact/build.sbt | 2 +- artifact/project/build.properties | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index e7ad679..0a55727 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,5 @@ lib_managed/ src_managed/ project/boot/ project/plugins/project/ - - - +.bsp +.metals \ No newline at end of file diff --git a/artifact/build.sbt b/artifact/build.sbt index 157aac1..7d2182c 100644 --- a/artifact/build.sbt +++ b/artifact/build.sbt @@ -2,7 +2,7 @@ name := "first-class-derivatives" version := "1.0" -scalaVersion := "2.11.7" +scalaVersion := "3.7.3" scalacOptions ++= Seq("-feature", "-deprecation") diff --git a/artifact/project/build.properties b/artifact/project/build.properties index 19623ba..5e6884d 100644 --- a/artifact/project/build.properties +++ b/artifact/project/build.properties @@ -1 +1 @@ -sbt.version = 0.13.8 +sbt.version=1.11.6 From 974d83249dbdaa42eeff88d0327145cf00798820 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 17:11:41 +0100 Subject: [PATCH 02/95] Remove build properties --- artifact/project/build.properties | 1 - 1 file changed, 1 deletion(-) delete mode 100644 artifact/project/build.properties diff --git a/artifact/project/build.properties b/artifact/project/build.properties deleted file mode 100644 index 5e6884d..0000000 --- a/artifact/project/build.properties +++ /dev/null @@ -1 +0,0 @@ -sbt.version=1.11.6 From e90acd3b69625af2e57491d393282eb46c430854 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 17:11:58 +0100 Subject: [PATCH 03/95] Update gitignore. --- .gitignore | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0a55727..c2841aa 100644 --- a/.gitignore +++ b/.gitignore @@ -8,12 +8,13 @@ # sbt specific .cache .history -.lib/ -dist/* -target/ -lib_managed/ -src_managed/ -project/boot/ -project/plugins/project/ +.lib +.bloop +dist +target +lib_managed +src_managed +project .bsp -.metals \ No newline at end of file +.metals +.scala-build \ No newline at end of file From 1ac14cdbf1bc5b2506a82e347b81fa4df653256b Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 17:12:43 +0100 Subject: [PATCH 04/95] Migrate to Scala3, Parsers. --- artifact/src/main/scala/library/Parsers.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/artifact/src/main/scala/library/Parsers.scala b/artifact/src/main/scala/library/Parsers.scala index 14c0bdb..bd7d2f0 100644 --- a/artifact/src/main/scala/library/Parsers.scala +++ b/artifact/src/main/scala/library/Parsers.scala @@ -1,7 +1,5 @@ package fcd -import language.higherKinds - trait Parsers { // the token type (`Elem`) and the type of the results are left abstract @@ -42,13 +40,13 @@ trait Parsers { // For testing def isSuccess[R](p: Parser[R]): Boolean = !isFailure(p) def isFailure[R](p: Parser[R]): Boolean = !isSuccess(p) - def accepts[R, ES <% Iterable[Elem]](p: Parser[R], s: ES): Boolean = isSuccess(feedAll(p, s)) + def accepts[R](p: Parser[R], s: Iterable[Elem]): Boolean = isSuccess(feedAll(p, s)) // As optimization def always: Parser[Unit] } -trait RichParsers extends Parsers with Syntax with DerivedOps with CharSyntax +trait RichParsers extends Parsers, Syntax, DerivedOps, CharSyntax // A trait to bake parsers in a nested cake trait ParserUsage { From 923ee41024889a291b4514eb2f2bba1914657d3e Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 17:13:19 +0100 Subject: [PATCH 05/95] Migrate to Scala3, DerivedOps. --- .../src/main/scala/library/DerivedOps.scala | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 37ed254..ebaac62 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,6 +1,6 @@ package fcd -trait DerivedOps { self: Parsers with Syntax => +trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) @@ -8,7 +8,7 @@ trait DerivedOps { self: Parsers with Syntax => def no(t: Elem): Parser[Elem] = acceptIf(_ != t) - def acceptSeq[ES <% Iterable[Elem]](es: ES): Parser[List[Elem]] = + def acceptSeq(es: Iterable[Elem]): Parser[List[Elem]] = es.foldRight[Parser[List[Elem]]](succeed(Nil)) { (x, pxs) => accept(x) ~ pxs map mkList } @@ -29,16 +29,15 @@ trait DerivedOps { self: Parsers with Syntax => // def always[T](t: T): Parser[T] = // many(any) map { _ => t } - def oneOf[ES <% Iterable[Elem]](s: ES): Parser[Elem] = acceptIf { + def oneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => s.exists(_ == t) } - def noneOf[ES <% Iterable[Elem]](s: ES): Parser[Elem] = acceptIf { + def noneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => s.forall(_ != t) } - def opt[T](p: Parser[T]): Parser[Option[T]] = - alt(p map { r => Some(r) }, succeed(None)) + def opt[T](p: Parser[T]): Parser[Option[T]] = alt(p map { r => Some(r) }, succeed(None)) def manyN[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) @@ -61,11 +60,8 @@ trait DerivedOps { self: Parsers with Syntax => some_v } - def manyCount(p: Parser[Any]): Parser[Int] = - many(p) map { _.size } - - def someCount(p: Parser[Any]): Parser[Int] = - some(p) map { _.size } + def manyCount(p: Parser[Any]): Parser[Int] = many(p) map { _.size } + def someCount(p: Parser[Any]): Parser[Int] = some(p) map { _.size } // distributive law - chains a list of parsers // --> in Haskell one would use `traverse` @@ -80,17 +76,14 @@ trait DerivedOps { self: Parsers with Syntax => def consumed[T](p: Parser[T]): Parser[List[Elem]] = many(any) <& p - def eat[R](f: Elem => Parser[R]): Parser[R] = - any >> f + def eat[R](f: Elem => Parser[R]): Parser[R] = any >> f def delegate[T](p: Parser[T]): Parser[Parser[T]] = succeed(p) | eat { c => delegate(p << c) } def delegateN[T](n: Int, p: Parser[T]): Parser[Parser[T]] = - if (n <= 0) - succeed(p) - else - eat { c => delegateN(n - 1, p << c) } + if (n <= 0) succeed(p) + else eat { c => delegateN(n - 1, p << c) } // collects the results of parsers def collect[T](ps: List[Parser[T]]): Parser[List[T]] = @@ -117,8 +110,7 @@ trait DerivedOps { self: Parsers with Syntax => private def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } - val succeedForever: NT[Unit] = - succeed(()) | (any ~> succeedForever) + val succeedForever: NT[Unit] = succeed(()) | (any ~> succeedForever) def rightDerivative[R](p: Parser[R], elem: Elem): Parser[R] = done(p << elem) | eat { c => rightDerivative(p << c, elem) } @@ -147,8 +139,7 @@ trait DerivedOps { self: Parsers with Syntax => def filter[T](pred: Elem => Boolean): Parser[T] => Parser[T] = rep(el => p => if (pred(el)) (p << el) else p) - def skip[T]: Parser[T] => Parser[T] = - rep(el => p => p) + def skip[T]: Parser[T] => Parser[T] = rep(el => p => p) def mapIn[T](f: Elem => Elem): Parser[T] => Parser[T] = rep(el => p => p << f(el)) @@ -178,7 +169,7 @@ trait DerivedOps { self: Parsers with Syntax => def greedyMany[T](p: Parser[T]): Parser[List[T]] = greedySome(p) | succeed(Nil) // Instead of a class use a closure: - def greedySome[T]: Parser[T] => NT[List[T]] = { p => + def greedySome[T]: Parser[T] => Parser[List[T]] = { p => def withNext(p: Parser[T], ps: Parser[List[T]]): Parser[List[T]] = done(p) ~ ps ^^ { case t ~ ts => t :: ts } From 14648f78ae1da89af6b853bddb9a5de1d1d112e9 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 20:50:34 +0100 Subject: [PATCH 06/95] Migrate CharSnytax. --- .../src/main/scala/library/CharSyntax.scala | 68 ++++++++----------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/artifact/src/main/scala/library/CharSyntax.scala b/artifact/src/main/scala/library/CharSyntax.scala index a1ed600..2f5b6f0 100644 --- a/artifact/src/main/scala/library/CharSyntax.scala +++ b/artifact/src/main/scala/library/CharSyntax.scala @@ -1,61 +1,47 @@ package fcd -import language.implicitConversions - -trait CharSyntax { self: Parsers with DerivedOps with Syntax => - +trait CharSyntax { self: Parsers & DerivedOps & Syntax => type Elem = Char - implicit def charParser(c: Char): Parser[Char] = accept(c) - def notChar(c: Char): Parser[Char] = acceptIf(_ != c) - val char = any - val letter = acceptIf(_.isLetter) - val upper = acceptIf(_.isUpper) - val lower = acceptIf(_.isLower) - val whitespace = acceptIf(_.isWhitespace) - val digit = acceptIf(_.isDigit) + val char = any + val letter = acceptIf(_.isLetter) + val upper = acceptIf(_.isUpper) + val lower = acceptIf(_.isLower) + val whitespace = acceptIf(_.isWhitespace) + val digit = acceptIf(_.isDigit) val letterOrDigit = acceptIf(_.isLetterOrDigit) - val space = acceptIf(_.isSpaceChar) - val spaces = many(space) - val newline = acceptIf(_ == '\n') + val space = acceptIf(_.isSpaceChar) + val spaces = many(space) + val newline = acceptIf(_ == '\n') def charRange(from: Char, to: Char) = acceptIf { c => c >= from && c <= to } - val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') + val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') def string(s: String): Parser[String] = (acceptSeq(s) map (_.mkString)) - sealed trait Stringable[T] { - def apply: T => String - } - object Stringable { - implicit val char: Stringable[Char] = new Stringable[Char] { - def apply = _.toString - } - implicit val charList: Stringable[List[Char]] = new Stringable[List[Char]] { - def apply = _.mkString - } - implicit val string: Stringable[String] = new Stringable[String] { - def apply = identity - } - implicit val stringList: Stringable[List[String]] = new Stringable[List[String]] { - def apply = _.mkString - } - implicit def seq[T: Stringable, U: Stringable]: Stringable[T ~ U] = new Stringable[T ~ U] { - def apply = { case l ~ r => - implicitly[Stringable[T]].apply(l) + implicitly[Stringable[U]].apply(r) - } - } + sealed trait Stringable[T] { def apply: T => String } + + given Stringable[Char] with { def apply = _.toString } + given Stringable[List[Char]] with { def apply = _.mkString } + given Stringable[String] with { def apply = identity } + given stringList: Stringable[List[String]] with { def apply = _.mkString } + given [T, U](using st: Stringable[T], su: Stringable[U]): Stringable[(T, U)] + with { + def apply = { case (l, r) => st.apply(l) ++ su.apply(r) } } - implicit def liftString(s: String): Parser[String] = string(s) + given Conversion[String, Parser[String]] = string + given Conversion[List[Char], String] = _.mkString - implicit def charString(cs: List[Char]): String = cs.mkString + given [T](using st: Stringable[T]): Conversion[Parser[T], Parser[String]] + with { + def apply(p: Parser[T]) = p map st.apply + } - implicit def stringParser[T: Stringable](p: Parser[T]): Parser[String] = - p map { v => implicitly[Stringable[T]].apply(v) } + given Conversion[Char, Parser[Char]] = accept def noneOf(s: String): Parser[Char] = acceptIf(t => !(s contains t)) } From b9cda64d23dcd6c79069103e64815cf0e9967815 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 20:56:00 +0100 Subject: [PATCH 07/95] Fix comment, rem semicolons. --- .../src/main/scala/library/Attributed.scala | 214 +++++++++--------- 1 file changed, 104 insertions(+), 110 deletions(-) diff --git a/artifact/src/main/scala/library/Attributed.scala b/artifact/src/main/scala/library/Attributed.scala index ed83272..96c0381 100644 --- a/artifact/src/main/scala/library/Attributed.scala +++ b/artifact/src/main/scala/library/Attributed.scala @@ -1,161 +1,155 @@ package might -/** -============================================================================================= -The contents of this file are taken (adapted) from Matt Might's implementation of -parsing with derivatives. The original implementation can be found online at: +/* + The contents of this file are taken (adapted) from Matt Might's + implementation of parsing with derivatives. The original implementation can + be found online at: - http://matt.might.net/articles/parsing-with-derivatives/ - -============================================================================================= -*/ - -/** - A collection of attributes which must be computed by iteration to a fixed point. + http://matt.might.net/articles/parsing-with-derivatives/ */ -trait Attributed { - private var generation = -1 ; - private var stabilized = false ; - /** - An attribute computable by fixed point. - - @param bottom the bottom of the attribute's lattice. - @param join the lub operation on the lattice. - @param wt the partial order on the lattice. - - */ - abstract class Attribute[A](bottom : A, join : (A,A) => A, wt : (A,A) => Boolean) - { - private var currentValue : A = bottom - private var compute : () => A = null +/** A collection of attributes which must be computed by iteration to a fixed + * point. + */ +trait Attributed { + private var generation = -1 + private var stabilized = false + + /** An attribute computable by fixed point. + * + * @param bottom + * the bottom of the attribute's lattice. + * @param join + * the lub operation on the lattice. + * @param wt + * the partial order on the lattice. + */ + abstract class Attribute[A]( + bottom: A, + join: (A, A) => A, + wt: (A, A) => Boolean + ) { + private var currentValue: A = bottom + private var compute: () => A = null private var fixed = false - /** - Sets the computation the updates this attribute. - - @param computation the computation that updates this attribute. - */ - def := (computation : => A) { - compute = (() => computation) - } - - /** - Permanently fixes the value of this attribute. - - @param value the value of this attribute. - - */ - def :== (value : A) { + /** Sets the computation the updates this attribute. + * + * @param computation + * the computation that updates this attribute. + */ + def :=(computation: => A) = { compute = (() => computation) } + + /** Permanently fixes the value of this attribute. + * + * @param value + * the value of this attribute. + */ + def :==(value: A) = { currentValue = value fixed = true } - /** - Recomputes the value of this attribute. - */ - def update() { - if (fixed) - return ; + /** Recomputes the value of this attribute. + */ + def update(): Unit = { + if (fixed) return val old = currentValue val newValue = compute() - if (!wt(newValue,currentValue)) { - currentValue = join(newValue,currentValue) + if (!wt(newValue, currentValue)) { + currentValue = join(newValue, currentValue) FixedPoint.changed = true } } - /** - The current value of this attribute. - */ - def value : A = { - // When the value of this attribute is requested, there are - // three possible cases: - // - // (1) It's already been computed (this.stabilized); - // (2) It's been manually set (this.fixed); or - // (3) It needs to be computed (generation < FixedPoint.generation). + /** The current value of this attribute. + */ + def value: A = { + /* + When the value of this attribute is requested, there are + three possible cases: + (1) It's already been computed (this.stabilized); + (2) It's been manually set (this.fixed); or + (3) It needs to be computed (generation < FixedPoint.generation). + */ if (fixed || stabilized || (generation == FixedPoint.generation)) return currentValue - else - // Run or continue the fixed-point computation: - fix() - - if (FixedPoint.stabilized) - stabilized = true - return currentValue + fix() + if (FixedPoint.stabilized) stabilized = true + currentValue } } // Subsumption tests for attributes: - protected[this] def implies (a : Boolean, b : Boolean) = (!a) || b - protected[this] def follows (a : Boolean, b : Boolean) = (!b) || a - protected[this] def updateAttributes(): Unit + protected def implies(a: Boolean, b: Boolean) = (!a) || b + protected def follows(a: Boolean, b: Boolean) = (!b) || a + protected def updateAttributes(): Unit - private def fix() { + private def fix() = { this.generation = FixedPoint.generation if (FixedPoint.master eq null) { - FixedPoint.master = this ; - do { + FixedPoint.master = this + + FixedPoint.generation += 1 + FixedPoint.changed = false + updateAttributes() + while (FixedPoint.changed) { FixedPoint.generation += 1 FixedPoint.changed = false updateAttributes() - } while (FixedPoint.changed) ; - FixedPoint.stabilized = true ; + } + + FixedPoint.stabilized = true FixedPoint.generation += 1 updateAttributes() FixedPoint.reset() - } else { - updateAttributes() - } + } else updateAttributes() } } - -/** - FixedPoint tracks the state of a fixed point algorithm for the attributes of a grammar. - - In case there are fixed points running in multiple threads, each attribute is thread-local. - */ - +/** FixedPoint tracks the state of a fixed point algorithm for the attributes of + * a grammar. + * + * In case there are fixed points running in multiple threads, each attribute + * is thread-local. + */ private object FixedPoint { - private val _stabilized = new ThreadLocal[Boolean] + private val _stabilized = ThreadLocal[Boolean]() _stabilized.set(false) - def stabilized = _stabilized.get ; - def stabilized_= (v : Boolean) { _stabilized.set(v) } + def stabilized = _stabilized.get + def stabilized_=(v: Boolean) = { _stabilized.set(v) } - private val _running = new ThreadLocal[Boolean] + private val _running = ThreadLocal[Boolean]() _running.set(false) - def running = _running.get ; - def running_= (v : Boolean) { _running.set(v) } + def running = _running.get + def running_=(v: Boolean) = { _running.set(v) } - private val _changed = new ThreadLocal[Boolean] + private val _changed = ThreadLocal[Boolean]() _changed.set(false) - def changed = _changed.get ; - def changed_= (v : Boolean) { _changed.set(v) } + def changed = _changed.get + def changed_=(v: Boolean) = { _changed.set(v) } - private val _generation = new ThreadLocal[Int] + private val _generation = ThreadLocal[Int]() _generation.set(0) - def generation = _generation.get ; - def generation_= (v : Int) { _generation.set(v) } + def generation = _generation.get + def generation_=(v: Int) = { _generation.set(v) } - private val _master = new ThreadLocal[Object] + private val _master = ThreadLocal[Object]() _master.set(null) - def master = _master.get ; - def master_= (v : Object) { _master.set(v) } - - /** - Resets all of the fixed point variables for this thread. - */ - def reset () { - this.stabilized = false ; - this.running = false ; - this.master = null ; - this.changed = false ; - this.generation = 0 ; + def master = _master.get + def master_=(v: Object) = { _master.set(v) } + + /** Resets all of the fixed point variables for this thread. + */ + def reset() = { + this.stabilized = false + this.running = false + this.master = null + this.changed = false + this.generation = 0 } } From 370326fa2a016ef830d544d6fc12346740d142cf Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 21:10:28 +0100 Subject: [PATCH 08/95] Update gitignore. --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c2841aa..30aadb5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Scala *.class *.log +.scalafmt.conf # Mac .DS_Store @@ -17,4 +18,4 @@ src_managed project .bsp .metals -.scala-build \ No newline at end of file +.scala-build From 18b6c08f1cef8e604a05eedf738b6d07d842a2a0 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 21:12:55 +0100 Subject: [PATCH 09/95] Update build.sbt --- artifact/build.sbt | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/artifact/build.sbt b/artifact/build.sbt index 7d2182c..f22061b 100644 --- a/artifact/build.sbt +++ b/artifact/build.sbt @@ -6,16 +6,15 @@ scalaVersion := "3.7.3" scalacOptions ++= Seq("-feature", "-deprecation") -libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.4" % "test" +libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.19" % "test" -resolvers += "Sonatype OSS Snapshots" at - "https://oss.sonatype.org/content/repositories/releases" +resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/releases" -parallelExecution in Test := true +Test / parallelExecution := true -libraryDependencies += "org.scala-lang.modules" % "scala-xml_2.11" % "1.0.5" +libraryDependencies += "org.scala-lang.modules" %% "scala-xml" % "2.4.0" -initialCommands in console := """import fcd._; import fcd.DerivativeParsers._""" +console / initialCommands := """import fcd._; import fcd.DerivativeParsers._""" // For VM users on windows systems, please uncomment the following line: // target := file("/home/vagrant/target/") From 574bb2a019e9d2cf08d2795d39ecfa25ae8b02dd Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 23:03:41 +0100 Subject: [PATCH 10/95] Migrate library to Scala 3.7.0 --- .../scala/library/DerivativeParsers.scala | 183 +++++++++++------- artifact/src/main/scala/library/Syntax.scala | 4 +- 2 files changed, 113 insertions(+), 74 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 47b02fa..9f20f91 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -28,13 +28,14 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def not: Parser[Unit] = new Not(p) // the map family - def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = new MapResults(p, f) + def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = + new MapResults(p, f) def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) // for optimization of biased choice def prefix: Parser[Unit] = { - if (accepts) { + if (accepts) { always } else { eat { el => (p consume el).prefix } @@ -44,7 +45,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => object Fail extends NullaryPrintable("∅") with Parser[Nothing] { override def results = List.empty - override def failed = true + override def failed = true override def accepts = false override def consume: Elem => this.type = in => this @@ -56,7 +57,9 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def and2[U](q: Parser[U]): this.type = this override def map[U](f: Nothing => U): this.type = this override def flatMap[U](g: Nothing => Parser[U]): this.type = this - override def mapResults[U](f: (=> Results[Nothing]) => Results[U]): this.type = this + override def mapResults[U]( + f: (=> Results[Nothing]) => Results[U] + ): this.type = this override def done = this override def not: Parser[Unit] = Always @@ -66,12 +69,16 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => object Always extends NullaryPrintable("∞") with Parser[Unit] { override def results = List(()) - override def failed = false + override def failed = false override def accepts = true override def consume = in => Always override def not: Parser[Unit] = fail - override def and[U](q: Parser[U]): Parser[(Unit, U)] = q map { r => ((), r) } - override def and2[U](q: Parser[U]): Parser[(U, Unit)] = q map { r => (r, ()) } + override def and[U](q: Parser[U]): Parser[(Unit, U)] = q map { r => + ((), r) + } + override def and2[U](q: Parser[U]): Parser[(U, Unit)] = q map { r => + (r, ()) + } // this is a valid optimization, however it almost never occurs. override def alt[U >: Unit](q: Parser[U]) = this @@ -79,26 +86,30 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def toString = "always" } - case class Succeed[R](ress: Results[R]) extends NullaryPrintable("ε") with Parser[R] { p => + case class Succeed[R](ress: Results[R]) + extends NullaryPrintable("ε") + with Parser[R] { p => override def results = ress - override def failed = false + override def failed = false override def accepts = true override def consume = (in: Elem) => fail override def toString = s"ε($ress)" override def done: Parser[R] = this - override def mapResults[T](f: (=> Results[R]) => Results[T]): Parser[T] = Succeed(f(ress)) + override def mapResults[T](f: (=> Results[R]) => Results[T]): Parser[T] = + Succeed(f(ress)) override def seq[U](q: Parser[U]): Parser[R ~ U] = q mapResults { ress2 => for (r <- ress; r2 <- ress2) yield (r, r2) } override def seq2[U](q: Parser[U]): Parser[U ~ R] = q mapResults { ress2 => for (r <- ress; r2 <- ress2) yield (r2, r) } - override def flatMap[U](f: R => Parser[U]): Parser[U] = ress.map(f).reduce(_ alt _) + override def flatMap[U](f: R => Parser[U]): Parser[U] = + ress.map(f).reduce(_ alt _) } case class Accept(elem: Elem) extends Parser[Elem] { def results = List.empty - def failed = false + def failed = false def accepts = false def consume = (in: Elem) => if (in == elem) { @@ -109,12 +120,15 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => lazy val name = "'" + escape(elem) + "'" def printNode = s"""$id [label="$name", shape=circle]""" - private def escape(c: Elem): String = c.toString.replace("\\", "\\\\").replace("\"", "\\\"") + private def escape(c: Elem): String = + c.toString.replace("\\", "\\\\").replace("\"", "\\\"") } - class AcceptIf(f: Elem => Boolean) extends NullaryPrintable("acceptIf") with Parser[Elem] { + class AcceptIf(f: Elem => Boolean) + extends NullaryPrintable("acceptIf") + with Parser[Elem] { def results = List.empty - def failed = false + def failed = false def accepts = false def consume = (in: Elem) => if (f(in)) { @@ -124,18 +138,22 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => } } - class Not[R](val p: Parser[R]) extends UnaryPrintable("not", p) with Parser[Unit] { + class Not[R](val p: Parser[R]) + extends UnaryPrintable("not", p) + with Parser[Unit] { def results = (if (p.results.isEmpty) List(()) else List.empty) - def failed = false // we never know, this is a conservative approx. + def failed = false // we never know, this is a conservative approx. def accepts = !p.accepts def consume: Elem => Parser[Unit] = in => (p consume in).not override def not = p withResults List(()) override def toString = s"not($p)" } - class Alt[R, U >: R](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("|", p, q) with Parser[U] { + class Alt[R, U >: R](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("|", p, q) + with Parser[U] { def results = (p.results ++ q.results).distinct - def failed = p.failed && q.failed + def failed = p.failed && q.failed def accepts = p.accepts || q.accepts def consume = (in: Elem) => (p consume in) alt (q consume in) @@ -144,42 +162,53 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def toString = s"($p | $q)" } - class Seq[R, U](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("~", p, q) with Parser[R ~ U] { + class Seq[R, U](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("~", p, q) + with Parser[R ~ U] { - def results = (for { r <- p.results; u <- q.results } yield (new ~(r, u))).distinct + def results = + (for { r <- p.results; u <- q.results } yield (new ~(r, u))).distinct // q.failed forces q, which might not terminate for grammars with // infinite many nonterminals, like: // def foo(p) = 'a' ~ foo(p << 'a') // so we approximate similar to flatmap. - def failed = p.failed // || q.failed + def failed = p.failed // || q.failed def accepts = p.accepts && q.accepts - def consume = (in: Elem) => ((p consume in) seq q) alt (p.done seq (q consume in)) + def consume = (in: Elem) => + ((p consume in) seq q) alt (p.done seq (q consume in)) override def toString = s"($p ~ $q)" // canonicalization rule (1) from PLDI 2016 override def seq[T](r: Parser[T]): Parser[(R ~ U) ~ T] = - (p seq (q seq r)) map { - case (rr ~ (ru ~ rt)) => ((rr, ru), rt) - } + (p seq (q seq r)) map { case (rr ~ (ru ~ rt)) => + ((rr, ru), rt) + } } - class Done[R](val p: Parser[R]) extends UnaryPrintable(s"done", p) with Parser[R] { + class Done[R](val p: Parser[R]) + extends UnaryPrintable(s"done", p) + with Parser[R] { def results = p.results - def failed = p.failed + def failed = p.failed def accepts = p.accepts def consume = (el: Elem) => fail override def done = this override def toString = s"done($p)" } - class MapResults[R, U](val p: Parser[R], f: (=> Results[R]) => Results[U]) extends UnaryPrintable(s"mapResults", p) with Parser[U] { + class MapResults[R, U](val p: Parser[R], f: (=> Results[R]) => Results[U]) + extends UnaryPrintable(s"mapResults", p) + with Parser[U] { // preserve whether p actually has results (f might ignore its argument...) def results = if (p.results.isEmpty) List() else f(p.results).distinct - def failed = p.failed + def failed = p.failed def accepts = p.accepts def consume = (el: Elem) => (p consume el) mapResults f - override def mapResults[T](g: (=> Results[U]) => Results[T]): Parser[T] = p mapResults { res => g(f(res)) } - override def map[T](g: U => T): Parser[T] = p mapResults { res => f(res) map g } + override def mapResults[T](g: (=> Results[U]) => Results[T]): Parser[T] = + p mapResults { res => g(f(res)) } + override def map[T](g: U => T): Parser[T] = p mapResults { res => + f(res) map g + } override def done = p.done mapResults f // we can forget the results here. @@ -189,36 +218,42 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // canonicalization rule (2) from PLDI 2016 // allows for instance rewriting (always.map(f) & p) -> p.map(...f...) override def seq[S](q: Parser[S]): Parser[U ~ S] = - (p seq q).mapResults(rss => rss.unzip match { - case (us, ss) => f(us) zip ss - }) + (p seq q).mapResults(rss => + rss.unzip match { case (us, ss) => f(us) zip ss } + ) override def seq2[S](q: Parser[S]): Parser[S ~ U] = - (p seq2 q).mapResults(rss => rss.unzip match { - case (ss, us) => ss zip f(us) - }) + (p seq2 q).mapResults(rss => + rss.unzip match { case (ss, us) => ss zip f(us) } + ) override def and[S](q: Parser[S]): Parser[(U, S)] = - (p and q).mapResults(rss => rss.unzip match { - case (us, ss) => f(us) zip ss - }) + (p and q).mapResults(rss => + rss.unzip match { case (us, ss) => f(us) zip ss } + ) override def and2[S](q: Parser[S]): Parser[(S, U)] = - (p and2 q).mapResults(rss => rss.unzip match { - case (ss, us) => ss zip f(us) - }) + (p and2 q).mapResults(rss => + rss.unzip match { case (ss, us) => ss zip f(us) } + ) } - class And[R, U](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("&", p, q) with Parser[(R, U)] { - def results = (for { r <- p.results; u <- q.results } yield ((r, u))).distinct - def failed = p.failed || q.failed + class And[R, U](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("&", p, q) + with Parser[(R, U)] { + def results = + (for { r <- p.results; u <- q.results } yield ((r, u))).distinct + def failed = p.failed || q.failed def accepts = p.accepts && q.accepts def consume = (in: Elem) => (p consume in) and (q consume in) override def not = p.not alt q.not override def toString = s"($p & $q)" } - class FlatMap[R, U](val p: Parser[R], f: R => Parser[U]) extends UnaryPrintable("flatMap", p) with Parser[U] { - def results = ((p.results map f) flatMap (_.results)).distinct //res().distinct + class FlatMap[R, U](val p: Parser[R], f: R => Parser[U]) + extends UnaryPrintable("flatMap", p) + with Parser[U] { + def results = + ((p.results map f) flatMap (_.results)).distinct // res().distinct def accepts = !results.isEmpty - def failed = p.failed // that's the best we know + def failed = p.failed // that's the best we know def consume: Elem => Parser[U] = in => { val next = (p consume in) flatMap f @@ -228,49 +263,50 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def toString = "flatMap" } - class Nonterminal[+R](_p: => Parser[R]) extends Parser[R] { + class Nonterminal[R](_p: => Parser[R]) extends Parser[R] { lazy val p = _p - def accepts: Boolean = propertiesFix.nullable.value - def failed: Boolean = propertiesFix.empty.value + def accepts: Boolean = propertiesFix.nullable.value + def failed: Boolean = propertiesFix.empty.value def results: Results[R] = resultsFix.results.value // This separation into two fixed points is essential to // prevent excessive recomputation. - protected[this] object propertiesFix extends Attributed { - object nullable extends Attribute[Boolean](false,_ || _,implies) - object empty extends Attribute[Boolean](true,_ && _,follows) + protected object propertiesFix extends Attributed { + object nullable extends Attribute[Boolean](false, _ || _, implies) + object empty extends Attribute[Boolean](true, _ && _, follows) - empty := p.failed - nullable := p.accepts + empty := p.failed + nullable := p.accepts - override protected[this] def updateAttributes() { + override protected def updateAttributes() = { empty.update() nullable.update() } } - protected[this] object resultsFix extends Attributed { - object results extends Attribute[List[R]]( - List.empty, - (nw, ol) => (nw ++ ol).distinct, - (nw, ol) => nw.toSet.subsetOf(ol.toSet)) + protected object resultsFix extends Attributed { + object results + extends Attribute[List[R]]( + List.empty, + (nw, ol) => (nw ++ ol).distinct, + (nw, ol) => nw.toSet.subsetOf(ol.toSet) + ) results := p.results - override protected[this] def updateAttributes() { - results.update() - } + override protected def updateAttributes() = results.update() } - private[this] val cache: mutable.ListMap[Elem, Parser[R]] = mutable.ListMap.empty + private val cache: mutable.ListMap[Elem, Parser[R]] = mutable.ListMap.empty // Wrapping in `nonterminal` is cecessary for left-recursive // grammars and for grammars like "DerivativeParsers / preprocessor" // that recursively derive. Optimizing the nonterminal node away causes // divergence on these grammars. Worse, in the latter case // forcing `next` will already cause divergence. override def consume: Elem => Parser[R] = el => - cache.getOrElseUpdate(el, + cache.getOrElseUpdate( + el, if (p.failed) fail else @@ -321,11 +357,14 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def done[T](p: Parser[T]): Parser[T] = p.done - override def nonterminal[R](_p: => Parser[R]): Nonterminal[R] = new Nonterminal(_p) - def nonterminal[R](name: String)(_p: => Parser[R]): Nonterminal[R] = new Nonterminal(_p).named(name) + override def nonterminal[R](_p: => Parser[R]): Nonterminal[R] = + new Nonterminal(_p) + def nonterminal[R](name: String)(_p: => Parser[R]): Nonterminal[R] = + new Nonterminal(_p).named(name) def feed[R](p: Parser[R], in: Elem) = p.consume(in) - def parse[R](p: Parser[R], in: Iterable[Elem]): Results[R] = feedAll(p, in).results + def parse[R](p: Parser[R], in: Iterable[Elem]): Results[R] = + feedAll(p, in).results // for testing override def isSuccess[R](p: Parser[R]): Boolean = p.accepts diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 1a59b9e..4b111b8 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -2,9 +2,9 @@ package fcd import language.implicitConversions -trait Syntax { self: Parsers with DerivedOps => +trait Syntax { self: Parsers & DerivedOps => - implicit class ParserOps[R, P <% Parser[R]](p: P) { + implicit class ParserOps[R](p: Parser[R]) { def <<(in: Elem): Parser[R] = self.feed(p, in) def <<<(in: Seq[Elem]): Parser[R] = self.feedAll(p, in) def parse(s: Seq[Elem]) = self.parse(p, s) From e766194a4f742592a6ea8b947a0a4ba01dc3d4e5 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 4 Nov 2025 23:15:16 +0100 Subject: [PATCH 11/95] Remove new keyword --- .../main/scala/library/DerivativeParsers.scala | 18 +++++++++--------- .../src/main/scala/library/Printable.scala | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 9f20f91..2bac4a0 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -17,19 +17,19 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def failed: Boolean def alt[U >: R](q: Parser[U]): Parser[U] = q alt2 p - def alt2[U >: R](q: Parser[U]): Parser[U] = new Alt(q, p) + def alt2[U >: R](q: Parser[U]): Parser[U] = Alt(q, p) def and[U](q: Parser[U]): Parser[(R, U)] = q and2 p - def and2[U](q: Parser[U]): Parser[(U, R)] = new And(q, p) + def and2[U](q: Parser[U]): Parser[(U, R)] = And(q, p) def seq[U](q: Parser[U]): Parser[R ~ U] = q seq2 p def seq2[U](q: Parser[U]): Parser[U ~ R] = new Seq(q, p) - def flatMap[U](f: R => Parser[U]): Parser[U] = new FlatMap(p, f) + def flatMap[U](f: R => Parser[U]): Parser[U] = FlatMap(p, f) def done: Parser[R] = if (accepts) Succeed(p.results) else fail - def not: Parser[Unit] = new Not(p) + def not: Parser[Unit] = Not(p) // the map family def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = - new MapResults(p, f) + MapResults(p, f) def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) @@ -318,7 +318,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => this } var name = "nt" - private val rec = new DynamicVariable[Boolean](false) + private val rec = DynamicVariable[Boolean](false) override def toString = if (rec.value) s"nt(${System.identityHashCode(this)})" @@ -340,7 +340,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => val fail: Parser[Nothing] = Fail val always: Parser[Unit] = Always def succeed[R](res: R): Parser[R] = Succeed(List(res)) - def acceptIf(cond: Elem => Boolean): Parser[Elem] = new AcceptIf(cond) + def acceptIf(cond: Elem => Boolean): Parser[Elem] = AcceptIf(cond) // combinators with parser arguments def not[R](p: Parser[R]): Parser[Unit] = p.not @@ -358,9 +358,9 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def done[T](p: Parser[T]): Parser[T] = p.done override def nonterminal[R](_p: => Parser[R]): Nonterminal[R] = - new Nonterminal(_p) + Nonterminal(_p) def nonterminal[R](name: String)(_p: => Parser[R]): Nonterminal[R] = - new Nonterminal(_p).named(name) + Nonterminal(_p).named(name) def feed[R](p: Parser[R], in: Elem) = p.consume(in) def parse[R](p: Parser[R], in: Iterable[Elem]): Results[R] = diff --git a/artifact/src/main/scala/library/Printable.scala b/artifact/src/main/scala/library/Printable.scala index 52651b6..6dfb1e0 100644 --- a/artifact/src/main/scala/library/Printable.scala +++ b/artifact/src/main/scala/library/Printable.scala @@ -26,7 +26,7 @@ trait Printable { |""".stripMargin('|') def printToFile(path: String): Unit = { - val is = new ByteArrayInputStream(printGraph.getBytes("UTF-8")) + val is = ByteArrayInputStream(printGraph.getBytes("UTF-8")) (s"dot -Tpng -o $path" #< is) ! } From 3e6382023d98198dcdfaa1ee57b9387ecbc71dc2 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 5 Nov 2025 04:07:27 +0100 Subject: [PATCH 12/95] Remove duplicate&unused code --- .../scala/library/DerivativeParsers.scala | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 2bac4a0..6ed627d 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -16,12 +16,9 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def accepts: Boolean def failed: Boolean - def alt[U >: R](q: Parser[U]): Parser[U] = q alt2 p - def alt2[U >: R](q: Parser[U]): Parser[U] = Alt(q, p) - def and[U](q: Parser[U]): Parser[(R, U)] = q and2 p - def and2[U](q: Parser[U]): Parser[(U, R)] = And(q, p) - def seq[U](q: Parser[U]): Parser[R ~ U] = q seq2 p - def seq2[U](q: Parser[U]): Parser[U ~ R] = new Seq(q, p) + def alt[U >: R](q: Parser[U]): Parser[U] = Alt(p, q) + def and[U](q: Parser[U]): Parser[(R, U)] = And(p, q) + def seq[U](q: Parser[U]): Parser[(R, U)] = new Seq(p, q) def flatMap[U](f: R => Parser[U]): Parser[U] = FlatMap(p, f) def done: Parser[R] = if (accepts) Succeed(p.results) else fail @@ -50,11 +47,8 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def consume: Elem => this.type = in => this override def alt[U >: Nothing](q: Parser[U]): q.type = q - override def alt2[U >: Nothing](q: Parser[U]): q.type = q override def seq[U](q: Parser[U]): this.type = this - override def seq2[U](q: Parser[U]): this.type = this override def and[U](q: Parser[U]): this.type = this - override def and2[U](q: Parser[U]): this.type = this override def map[U](f: Nothing => U): this.type = this override def flatMap[U](g: Nothing => Parser[U]): this.type = this override def mapResults[U]( @@ -76,13 +70,9 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def and[U](q: Parser[U]): Parser[(Unit, U)] = q map { r => ((), r) } - override def and2[U](q: Parser[U]): Parser[(U, Unit)] = q map { r => - (r, ()) - } // this is a valid optimization, however it almost never occurs. override def alt[U >: Unit](q: Parser[U]) = this - override def alt2[U >: Unit](q: Parser[U]) = this override def toString = "always" } @@ -100,9 +90,6 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def seq[U](q: Parser[U]): Parser[R ~ U] = q mapResults { ress2 => for (r <- ress; r2 <- ress2) yield (r, r2) } - override def seq2[U](q: Parser[U]): Parser[U ~ R] = q mapResults { ress2 => - for (r <- ress; r2 <- ress2) yield (r2, r) - } override def flatMap[U](f: R => Parser[U]): Parser[U] = ress.map(f).reduce(_ alt _) } @@ -221,18 +208,10 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => (p seq q).mapResults(rss => rss.unzip match { case (us, ss) => f(us) zip ss } ) - override def seq2[S](q: Parser[S]): Parser[S ~ U] = - (p seq2 q).mapResults(rss => - rss.unzip match { case (ss, us) => ss zip f(us) } - ) override def and[S](q: Parser[S]): Parser[(U, S)] = (p and q).mapResults(rss => rss.unzip match { case (us, ss) => f(us) zip ss } ) - override def and2[S](q: Parser[S]): Parser[(S, U)] = - (p and2 q).mapResults(rss => - rss.unzip match { case (ss, us) => ss zip f(us) } - ) } class And[R, U](val p: Parser[R], val q: Parser[U]) From f19711e55d26f791d60e03cfcc533fff7df67a58 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 5 Nov 2025 04:24:08 +0100 Subject: [PATCH 13/95] Make infix, remove unncesssary conversion of alias --- artifact/src/main/scala/library/Syntax.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 4b111b8..4fcc2cb 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -9,8 +9,8 @@ trait Syntax { self: Parsers & DerivedOps => def <<<(in: Seq[Elem]): Parser[R] = self.feedAll(p, in) def parse(s: Seq[Elem]) = self.parse(p, s) - def map[U](f: R => U): Parser[U] = self.map(p, f) - def flatMap[U](f: R => Parser[U]): Parser[U] = self.flatMap(p, f) + infix def map[U](f: R => U): Parser[U] = self.map(p, f) + infix def flatMap[U](f: R => Parser[U]): Parser[U] = self.flatMap(p, f) def ~[U](q: Parser[U]) = seq(p, q) def ~>[U](q: Parser[U]) = seq(p, q) map { case (a, b) => b } @@ -44,9 +44,6 @@ trait Syntax { self: Parsers & DerivedOps => implicit def toParser[R](nt: NT[R]): Parser[R] = nt.parser implicit def toNT[R](parser: => Parser[R]): NT[R] = NT(nonterminal(parser)) - implicit def tupleSeq2[T1, T2, O](f: (T1, T2) => O): (T1 ~ T2) => O = { - case t1 ~ t2 => f(t1, t2) - } implicit def tupleSeq3[T1, T2, T3, O](f: (T1, T2, T3) => O): (T1 ~ T2 ~ T3) => O = { case t1 ~ t2 ~ t3 => f(t1, t2, t3) } From 641094d423a545da12c3d500af9dffa5ea4f5fa0 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 5 Nov 2025 05:08:48 +0100 Subject: [PATCH 14/95] Replace implicits by givens --- .../src/main/scala/library/CharSyntax.scala | 8 +-- .../src/main/scala/library/DerivedOps.scala | 24 ++++---- artifact/src/main/scala/library/Syntax.scala | 55 ++++++++----------- 3 files changed, 37 insertions(+), 50 deletions(-) diff --git a/artifact/src/main/scala/library/CharSyntax.scala b/artifact/src/main/scala/library/CharSyntax.scala index 2f5b6f0..b322220 100644 --- a/artifact/src/main/scala/library/CharSyntax.scala +++ b/artifact/src/main/scala/library/CharSyntax.scala @@ -20,7 +20,7 @@ trait CharSyntax { self: Parsers & DerivedOps & Syntax => val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') - def string(s: String): Parser[String] = (acceptSeq(s) map (_.mkString)) + def string(s: String): Parser[String] = acceptSeq(s) ^^ (_.mkString) sealed trait Stringable[T] { def apply: T => String } @@ -36,10 +36,8 @@ trait CharSyntax { self: Parsers & DerivedOps & Syntax => given Conversion[String, Parser[String]] = string given Conversion[List[Char], String] = _.mkString - given [T](using st: Stringable[T]): Conversion[Parser[T], Parser[String]] - with { - def apply(p: Parser[T]) = p map st.apply - } + given [T](using st: Stringable[T]): Conversion[Parser[T], Parser[String]] = + p => p ^^ st.apply given Conversion[Char, Parser[Char]] = accept diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index ebaac62..2318824 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -10,17 +10,17 @@ trait DerivedOps { self: Parsers & Syntax => def acceptSeq(es: Iterable[Elem]): Parser[List[Elem]] = es.foldRight[Parser[List[Elem]]](succeed(Nil)) { (x, pxs) => - accept(x) ~ pxs map mkList + accept(x) ~ pxs ^^ mkList } def some[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case p ~ ps => p :: ps } some_v } def many[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case p ~ ps => p :: ps } many_v } @@ -37,16 +37,16 @@ trait DerivedOps { self: Parsers & Syntax => t => s.forall(_ != t) } - def opt[T](p: Parser[T]): Parser[Option[T]] = alt(p map { r => Some(r) }, succeed(None)) + def opt[T](p: Parser[T]): Parser[Option[T]] = alt(p ^^ { r => Some(r) }, succeed(None)) def manyN[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else p ~ manyN(n - 1, p) map { case r ~ rs => r :: rs } + else p ~ manyN(n - 1, p) ^^ { case r ~ rs => r :: rs } } def atMost[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else (p ~ atMost(n - 1, p) map { case r ~ rs => r :: rs }) | succeed(Nil) + else (p ~ atMost(n - 1, p) ^^ { case r ~ rs => r :: rs }) | succeed(Nil) } def manySep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { @@ -56,21 +56,21 @@ trait DerivedOps { self: Parsers & Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case p ~ ps => p :: ps } some_v } - def manyCount(p: Parser[Any]): Parser[Int] = many(p) map { _.size } - def someCount(p: Parser[Any]): Parser[Int] = some(p) map { _.size } + def manyCount(p: Parser[Any]): Parser[Int] = many(p) ^^ { _.size } + def someCount(p: Parser[Any]): Parser[Int] = some(p) ^^ { _.size } // distributive law - chains a list of parsers // --> in Haskell one would use `traverse` def distr[T](ps: List[Parser[T]]): Parser[List[T]] = ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - (p ~ l) map { case a ~ b => a :: b } + (p ~ l) ^^ { case a ~ b => a :: b } } - def join[T](p: Parser[Parser[T]]): Parser[T] = p flatMap done + def join[T](p: Parser[Parser[T]]): Parser[T] = p >> done // A parser that captures the tokens consumed by `p` def consumed[T](p: Parser[T]): Parser[List[Elem]] = @@ -88,7 +88,7 @@ trait DerivedOps { self: Parsers & Syntax => // collects the results of parsers def collect[T](ps: List[Parser[T]]): Parser[List[T]] = ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - done(p) >> { r => l.map(r :: _) } + done(p) >> { r => l ^^ (r :: _) } } def includes[T](p: Parser[T]): Parser[T] = diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 4fcc2cb..a9cd946 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -1,59 +1,48 @@ package fcd -import language.implicitConversions - trait Syntax { self: Parsers & DerivedOps => - - implicit class ParserOps[R](p: Parser[R]) { - def <<(in: Elem): Parser[R] = self.feed(p, in) - def <<<(in: Seq[Elem]): Parser[R] = self.feedAll(p, in) - def parse(s: Seq[Elem]) = self.parse(p, s) - - infix def map[U](f: R => U): Parser[U] = self.map(p, f) - infix def flatMap[U](f: R => Parser[U]): Parser[U] = self.flatMap(p, f) - + extension[R] (p: Parser[R]) { + def <<(in: Elem): Parser[R] = feed(p, in) + def <<<(in: Seq[Elem]): Parser[R] = feedAll(p, in) def ~[U](q: Parser[U]) = seq(p, q) - def ~>[U](q: Parser[U]) = seq(p, q) map { case (a, b) => b } - def <~[U](q: Parser[U]) = seq(p, q) map { case (a, b) => a } - + def ~>[U](q: Parser[U]) = map(seq(p, q), { case (a, b) => b }) + def <~[U](q: Parser[U]) = map(seq(p, q), { case (a, b) => a }) def |[U >: R](q: Parser[U]) = alt(p, q) - def &[U](q: Parser[U]) = and(p, q) - def <&[U](q: Parser[U]) = and(p, q) map { _._1 } - def &>[U](q: Parser[U]) = and(p, q) map { _._2 } + def <&[U](q: Parser[U]) = map(and(p, q), _._1) + def &>[U](q: Parser[U]) = map(and(p, q), _._2) // biased Alternative def <|[U >: R](q: Parser[U]) = biasedAlt(p, q) def |>[U >: R](q: Parser[U]) = biasedAlt(q, p) - def ^^[U](f: R => U): Parser[U] = p map f - def ^^^[U](u: => U): Parser[U] = p map { _ => u } - - def >>[U](f: R => Parser[U]): Parser[U] = p flatMap f + def ^^[U](f: R => U): Parser[U] = map(p,f) + def ^^^[U](u: => U): Parser[U] = map(p, _ => u ) + def >>[U](f: R => Parser[U]): Parser[U] = flatMap(p, f) def ? = opt(p) def * = many(p) def + = some(p) } - implicit def liftToParsers[R, U](p: Parser[R])(implicit conv: R => U): Parser[U] = - p map { conv } + given liftToParser[R,U](using conv: R => U): Conversion[Parser[R], Parser[U]] = + p => map(p,conv) // tag nonterminals - this allows automatic insertion of nt-markers final case class NT[+R](parser: Parser[R]) - implicit def toParser[R](nt: NT[R]): Parser[R] = nt.parser - implicit def toNT[R](parser: => Parser[R]): NT[R] = NT(nonterminal(parser)) + given [R]: Conversion[NT[R], Parser[R]] = _.parser + given [R]: Conversion[Parser[R], NT[R]] = parser => NT(nonterminal(parser)) - implicit def tupleSeq3[T1, T2, T3, O](f: (T1, T2, T3) => O): (T1 ~ T2 ~ T3) => O = { - case t1 ~ t2 ~ t3 => f(t1, t2, t3) + given tupleSeq3[T1, T2, T3, O]: Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { + def apply(f: (T1, T2, T3) => O) = { case t1 ~ t2 ~ t3 => f(t1, t2, t3) } } - implicit def tupleSeq4[T1, T2, T3, T4, O](f: (T1, T2, T3, T4) => O): (T1 ~ T2 ~ T3 ~ T4) => O = { - case t1 ~ t2 ~ t3 ~ t4 => f(t1, t2, t3, t4) + given tupleSeq4[T1, T2, T3, T4, O]: Conversion[(T1, T2, T3, T4) => O, (T1 ~ T2 ~ T3 ~ T4) => O] with { + def apply(f: (T1, T2, T3, T4) => O) = { case t1 ~ t2 ~ t3 ~ t4 => f(t1, t2, t3, t4) } } - implicit def tupleSeq5[T1, T2, T3, T4, T5, O](f: (T1, T2, T3, T4, T5) => O): (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O = { - case t1 ~ t2 ~ t3 ~ t4 ~ t5 => f(t1, t2, t3, t4, t5) + given tupleSeq5[T1, T2, T3, T4, T5, O]: Conversion[(T1, T2, T3, T4, T5) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O] with { + def apply(f: (T1, T2, T3, T4, T5) => O) = { case t1 ~ t2 ~ t3 ~ t4 ~ t5 => f(t1, t2, t3, t4, t5) } } - implicit def tupleSeq6[T1, T2, T3, T4, T5, T6, O](f: (T1, T2, T3, T4, T5, T6) => O): (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O = { - case t1 ~ t2 ~ t3 ~ t4 ~ t5 ~ t6 => f(t1, t2, t3, t4, t5, t6) + given tupleSeq6[T1, T2, T3, T4, T5, T6, O]: Conversion[(T1, T2, T3, T4, T5, T6) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O] with { + def apply(f: (T1, T2, T3, T4, T5, T6) => O) = { case t1 ~ t2 ~ t3 ~ t4 ~ t5 ~ t6 => f(t1, t2, t3, t4, t5, t6) } } } From 60ee954dbe636c576fb134efda0504c12baea762 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 5 Nov 2025 05:18:27 +0100 Subject: [PATCH 15/95] Fight infix warning --- .../src/main/scala/library/DerivativeParsers.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 6ed627d..ce48df5 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -16,19 +16,19 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def accepts: Boolean def failed: Boolean - def alt[U >: R](q: Parser[U]): Parser[U] = Alt(p, q) - def and[U](q: Parser[U]): Parser[(R, U)] = And(p, q) - def seq[U](q: Parser[U]): Parser[(R, U)] = new Seq(p, q) - def flatMap[U](f: R => Parser[U]): Parser[U] = FlatMap(p, f) + infix def alt[U >: R](q: Parser[U]): Parser[U] = Alt(p, q) + infix def and[U](q: Parser[U]): Parser[(R, U)] = And(p, q) + infix def seq[U](q: Parser[U]): Parser[(R, U)] = new Seq(p, q) + infix def flatMap[U](f: R => Parser[U]): Parser[U] = FlatMap(p, f) def done: Parser[R] = if (accepts) Succeed(p.results) else fail def not: Parser[Unit] = Not(p) // the map family - def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = + infix def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = MapResults(p, f) - def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } - def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) + infix def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } + infix def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) // for optimization of biased choice def prefix: Parser[Unit] = { From 1b4eda752ca08f11de84e30f1a705e02ee13aa86 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 5 Nov 2025 23:42:46 +0100 Subject: [PATCH 16/95] Make Section3 work --- .../src/main/scala/examples/paper/Section3.scala | 10 ++-------- artifact/src/main/scala/library/Parsers.scala | 13 ++----------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index 2319e73..b8730af 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -31,14 +31,8 @@ package fcd * instantiated to an object `paper` in `Paper.scala`. */ -trait Section3 extends ParserUsage { - - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers - - // import all symbols from the library - import parsers._ +trait Section3 { self: RichParsers => /** * Section 3.2 First-Class Derivatives */ @@ -141,7 +135,7 @@ trait Section3 extends ParserUsage { // one can use the following parser as kind of "mock-parser" // // It will accept all words and return the input stream it has processed. - val collect = consumed(many(any)) map (_.mkString) + val collect = consumed(many(any)) ^^ (cs => cs.mkString) // for instance, you can try the following in the REPL // diff --git a/artifact/src/main/scala/library/Parsers.scala b/artifact/src/main/scala/library/Parsers.scala index bd7d2f0..05ac850 100644 --- a/artifact/src/main/scala/library/Parsers.scala +++ b/artifact/src/main/scala/library/Parsers.scala @@ -21,8 +21,8 @@ trait Parsers { def succeed[R](res: R): Parser[R] def acceptIf(cond: Elem => Boolean): Parser[Elem] - def map[R, U](p: Parser[R], f: R => U): Parser[U] - def flatMap[R, U](p: Parser[R], f: R => Parser[U]): Parser[U] + infix def map[R, U](p: Parser[R], f: R => U): Parser[U] + infix def flatMap[R, U](p: Parser[R], f: R => Parser[U]): Parser[U] def alt[R, U >: R](p: Parser[R], q: Parser[U]): Parser[U] def and[R, U](p: Parser[R], q: Parser[U]): Parser[(R, U)] def seq[R, U](p: Parser[R], q: Parser[U]): Parser[(R, U)] @@ -47,12 +47,3 @@ trait Parsers { } trait RichParsers extends Parsers, Syntax, DerivedOps, CharSyntax - -// A trait to bake parsers in a nested cake -trait ParserUsage { - // Override _parsers in concrete tests suites with the - // appropriate parser implementation. - type Parsers - def _parsers: Parsers - lazy val parsers: Parsers = _parsers -} From c862098936e2970eb9e84f3e806ba5a7140b3334 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 5 Nov 2025 23:57:29 +0100 Subject: [PATCH 17/95] Make Section 7 work. --- artifact/src/main/scala/examples/paper/Section3.scala | 2 +- artifact/src/main/scala/examples/paper/Section7.scala | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index b8730af..d485061 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -32,7 +32,7 @@ package fcd */ -trait Section3 { self: RichParsers => +trait Section3 extends RichParsers { /** * Section 3.2 First-Class Derivatives */ diff --git a/artifact/src/main/scala/examples/paper/Section7.scala b/artifact/src/main/scala/examples/paper/Section7.scala index 151478a..3d6a3f7 100644 --- a/artifact/src/main/scala/examples/paper/Section7.scala +++ b/artifact/src/main/scala/examples/paper/Section7.scala @@ -17,14 +17,8 @@ package fcd * derivative based parsing as described by Matt Might et al, translated to an * object oriented setting. */ -trait Section7 extends ParserUsage { - - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers - - // import all symbols from the library - import parsers._ +trait Section7 extends RichParsers { /** * Section 7.1, introduces the concrete type of a parser as * From 127e5aa9ae150fa720c58e602277de1d75558a0d Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 00:02:59 +0100 Subject: [PATCH 18/95] fight warnings, import implicit conversion --- .../main/scala/examples/paper/Section3.scala | 2 + .../main/scala/examples/paper/Section4.scala | 42 +++++++------------ .../main/scala/examples/paper/Section7.scala | 2 + .../src/main/scala/library/DerivedOps.scala | 2 + 4 files changed, 21 insertions(+), 27 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index d485061..7a936c4 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -1,5 +1,7 @@ package fcd +import scala.language.implicitConversions + /** * Section 3 - First-class Derivatives: Gaining * Fine Grained Control over the Input Stream diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index 7fe7c8a..c776809 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -1,5 +1,7 @@ package fcd +import scala.language.implicitConversions + /** * Section 4 - Applications * ========================== @@ -12,30 +14,17 @@ package fcd * Section 4 gives additional applications and use cases where our approach * results in a modular solution. */ -trait Section4 extends ParserUsage { self: Section3 => - - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers - - // import all symbols from the library - import parsers._ - +trait Section4 extends Section3 { /** * Section 4.1 - Increased Reuuse through Parser Selection */ object section_4_1 { // very simplified grammar to illustrate parser selection - import section_3_5_improved._ - - lazy val stmt: NT[Any] = - ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) - + lazy val stmt = ("while" ~ space ~ "(true):" ~ block | some('x') ~ '\n') lazy val stmts = many(stmt) - lazy val block: NT[Any] = '\n' ~ indented(stmts) + lazy val block = '\n' ~ indented(stmts) // ### Example: Retroactive selection of the while statement nonterminal // @@ -47,7 +36,6 @@ trait Section4 extends ParserUsage { self: Section3 => // // > stmt parse "xxx\n" lazy val whileStmt = "while" ~> (stmt <<< "while") - lazy val untilStmt = "until" ~> (stmt <<< "while") } @@ -55,14 +43,14 @@ trait Section4 extends ParserUsage { self: Section3 => * Section 4.2 Modular Definitions as Combinators */ object section_4_2 { - def unescChar(c: Char): String = StringContext treatEscapes s"\\$c" + def unescChar(c: Char) = StringContext treatEscapes s"\\$c" // ### Example. Preprocessor that unescapes backslash escaped characters // // For instance, try // // unescape(many("\n" | "a")) parse "\\na\\n\\naaa" - def unescape[T](p: Parser[T]): Parser[T] = + def unescape[T](p: Parser[T]) = done(p) | eat { case '\\' => char >> { c => unescape( p <<< unescChar(c) ) @@ -82,7 +70,7 @@ trait Section4 extends ParserUsage { self: Section3 => | eat { c => inCode(text, code << c) } ) - def inText[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = + def inText[R, S](text: Parser[R], code: Parser[S]) = ( done(text & code) | marker ~> inCode(text, code) | eat { c => inText(text << c, code) } @@ -142,27 +130,27 @@ trait Section4 extends ParserUsage { self: Section3 => type Layout = List[Int] - def table[T](cell: Parser[T]): NT[List[List[T]]] = + def table[T](cell: Parser[T]) = (head <~ lineEnd) >> { layout => body(layout, cell) } // a parser computing the table layout def head: Parser[Layout] = some('+'~> manyCount('-')) <~ '+' - def body[T](layout: Layout, cell: Parser[T]): Parser[List[List[T]]] = + def body[T](layout: Layout, cell: Parser[T]) = many(rowLine(layout, layout.map(n => cell)) <~ rowSeparator(layout)) // given a layout, creates a parser for row separators - def rowSeparator(layout: Layout): Parser[Any] = + def rowSeparator(layout: Layout) = layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_+_) ~ lineEnd // either read another rowLine or quit cell parsers and collect results - def rowLine[T](layout: Layout, cells: List[Parser[T]]): Parser[List[T]] = + def rowLine[T](layout: Layout, cells: List[Parser[T]]) = ( ('|' ~> distr(delegateCells(layout, cells)) <~ lineEnd) >> { cs => rowLine(layout, cs) } | collect(cells) ) // first feed n tokens to every cell parser, then feed newline and read a pipe - def delegateCells[T](layout: Layout, cells: List[Parser[T]]): List[Parser[Parser[T]]] = + def delegateCells[T](layout: Layout, cells: List[Parser[T]]) = layout.zip(cells).map { case (n, p) => delegateN(n, p).map(_ << '\n') <~ '|' } @@ -184,8 +172,8 @@ trait Section4 extends ParserUsage { self: Section3 => // |~~~ | // |aaaa| // +----+ - lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) - lazy val asAndTables: NT[Any] = as | table(combined) + lazy val combined = inText(asAndTables, spaced(parens)) + lazy val asAndTables = as | table(combined) // Again, some more examples of words that are recognized by `combined` can // be found in `DerivativeParsersTests.scala`. diff --git a/artifact/src/main/scala/examples/paper/Section7.scala b/artifact/src/main/scala/examples/paper/Section7.scala index 3d6a3f7..d0d09bd 100644 --- a/artifact/src/main/scala/examples/paper/Section7.scala +++ b/artifact/src/main/scala/examples/paper/Section7.scala @@ -1,5 +1,7 @@ package fcd +import scala.language.implicitConversions + /** * Section 7 - Implementation * ========================== diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 2318824..fd8ac21 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,5 +1,7 @@ package fcd +import scala.language.implicitConversions + trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) From 19731529fbbc9e5028f46c5fe1a0977be99ee4c0 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 00:53:43 +0100 Subject: [PATCH 19/95] Revert back to cake-pattern --- artifact/src/main/scala/examples/paper/Section3.scala | 2 +- artifact/src/main/scala/examples/paper/Section7.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index 7a936c4..a46798a 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -34,7 +34,7 @@ import scala.language.implicitConversions */ -trait Section3 extends RichParsers { +trait Section3 { self: RichParsers => /** * Section 3.2 First-Class Derivatives */ diff --git a/artifact/src/main/scala/examples/paper/Section7.scala b/artifact/src/main/scala/examples/paper/Section7.scala index d0d09bd..21c008e 100644 --- a/artifact/src/main/scala/examples/paper/Section7.scala +++ b/artifact/src/main/scala/examples/paper/Section7.scala @@ -20,7 +20,7 @@ import scala.language.implicitConversions * object oriented setting. */ -trait Section7 extends RichParsers { +trait Section7 { self: RichParsers => /** * Section 7.1, introduces the concrete type of a parser as * From 026b5dbc380e2a3a9c3d5e003fbed146d7b37427 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 13:42:45 +0100 Subject: [PATCH 20/95] Simplify lambda. --- artifact/src/main/scala/examples/paper/Section3.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index a46798a..287f5c6 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -137,7 +137,7 @@ trait Section3 { self: RichParsers => // one can use the following parser as kind of "mock-parser" // // It will accept all words and return the input stream it has processed. - val collect = consumed(many(any)) ^^ (cs => cs.mkString) + val collect = consumed(many(any)) ^^ { _.mkString } // for instance, you can try the following in the REPL // From 7eb6f6a69ecd835eb5d7d86cdba748e2a50d8fe8 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 13:50:53 +0100 Subject: [PATCH 21/95] formatting and start working on section4 --- .../main/scala/examples/paper/Section4.scala | 103 +++++++++--------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index c776809..87fa9be 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -2,29 +2,31 @@ package fcd import scala.language.implicitConversions -/** - * Section 4 - Applications - * ========================== - * This file contains all code examples from section 5 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * Section 4 gives additional applications and use cases where our approach - * results in a modular solution. - */ - -trait Section4 extends Section3 { - /** - * Section 4.1 - Increased Reuuse through Parser Selection - */ +/** Section 4 - Applications + * + * This file contains all code examples from section 5 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * Section 4 gives additional applications and use cases where our approach + * results in a modular solution. + */ +trait Section4 { self: Section3 & RichParsers => + + /** Section 4.1 - Increased Reuuse through Parser Selection + */ object section_4_1 { // very simplified grammar to illustrate parser selection - lazy val stmt = ("while" ~ space ~ "(true):" ~ block | some('x') ~ '\n') + import section_3_5_improved._ + + lazy val stmt: NT[Any] = + ("while" ~ space ~ "(true):" ~ block + | some('x') ~ '\n') + lazy val stmts = many(stmt) - lazy val block = '\n' ~ indented(stmts) + lazy val block: NT[Any] = '\n' ~ indented(stmts) // ### Example: Retroactive selection of the while statement nonterminal // @@ -36,25 +38,26 @@ trait Section4 extends Section3 { // // > stmt parse "xxx\n" lazy val whileStmt = "while" ~> (stmt <<< "while") + lazy val untilStmt = "until" ~> (stmt <<< "while") } - /** - * Section 4.2 Modular Definitions as Combinators - */ + /** Section 4.2 Modular Definitions as Combinators + */ object section_4_2 { - def unescChar(c: Char) = StringContext treatEscapes s"\\$c" + def unescChar(c: Char): String = StringContext treatEscapes s"\\$c" // ### Example. Preprocessor that unescapes backslash escaped characters // // For instance, try // // unescape(many("\n" | "a")) parse "\\na\\n\\naaa" - def unescape[T](p: Parser[T]) = + def unescape[T](p: Parser[T]): Parser[T] = done(p) | eat { - case '\\' => char >> { c => - unescape( p <<< unescChar(c) ) - } + case '\\' => + char >> { c => + unescape(p <<< unescChar(c)) + } case c => unescape(p << c) } @@ -66,15 +69,13 @@ trait Section4 extends Section3 { // We have two states: Inside the code block and outside the code block def inCode[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = - ( marker ~> inText(text, code) - | eat { c => inCode(text, code << c) } - ) + (marker ~> inText(text, code) + | eat { c => inCode(text, code << c) }) - def inText[R, S](text: Parser[R], code: Parser[S]) = - ( done(text & code) - | marker ~> inCode(text, code) - | eat { c => inText(text << c, code) } - ) + def inText[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = + (done(text & code) + | marker ~> inCode(text, code) + | eat { c => inText(text << c, code) }) // Simple variant of balanced parenthesis lazy val parens: NT[Any] = '(' ~ parens ~ ')' | succeed(()) @@ -130,29 +131,33 @@ trait Section4 extends Section3 { type Layout = List[Int] - def table[T](cell: Parser[T]) = + def table[T](cell: Parser[T]): NT[List[List[T]]] = (head <~ lineEnd) >> { layout => body(layout, cell) } // a parser computing the table layout - def head: Parser[Layout] = some('+'~> manyCount('-')) <~ '+' + def head: Parser[Layout] = some('+' ~> manyCount('-')) <~ '+' - def body[T](layout: Layout, cell: Parser[T]) = + def body[T](layout: Layout, cell: Parser[T]): Parser[List[List[T]]] = many(rowLine(layout, layout.map(n => cell)) <~ rowSeparator(layout)) // given a layout, creates a parser for row separators - def rowSeparator(layout: Layout) = - layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_+_) ~ lineEnd + def rowSeparator(layout: Layout): Parser[Any] = + layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_ + _) ~ lineEnd // either read another rowLine or quit cell parsers and collect results - def rowLine[T](layout: Layout, cells: List[Parser[T]]) = - ( ('|' ~> distr(delegateCells(layout, cells)) <~ lineEnd) >> { cs => rowLine(layout, cs) } - | collect(cells) - ) + def rowLine[T](layout: Layout, cells: List[Parser[T]]): Parser[List[T]] = + (('|' ~> distr(delegateCells(layout, cells)) <~ lineEnd) >> { cs => + rowLine(layout, cs) + } + | collect(cells)) // first feed n tokens to every cell parser, then feed newline and read a pipe - def delegateCells[T](layout: Layout, cells: List[Parser[T]]) = - layout.zip(cells).map { - case (n, p) => delegateN(n, p).map(_ << '\n') <~ '|' + def delegateCells[T]( + layout: Layout, + cells: List[Parser[T]] + ): List[Parser[Parser[T]]] = + layout.zip(cells).map { case (n, p) => + delegateN(n, p).map(_ << '\n') <~ '|' } // We can use the table combinator recursively to parse nested tables. @@ -172,8 +177,8 @@ trait Section4 extends Section3 { // |~~~ | // |aaaa| // +----+ - lazy val combined = inText(asAndTables, spaced(parens)) - lazy val asAndTables = as | table(combined) + lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) + lazy val asAndTables: NT[Any] = as | table(combined) // Again, some more examples of words that are recognized by `combined` can // be found in `DerivativeParsersTests.scala`. From b7db52ae0c4e377ec413d0ccc8587eea1ae3a589 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 14:02:41 +0100 Subject: [PATCH 22/95] Make Section4 work --- artifact/src/main/scala/examples/paper/Section4.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index 87fa9be..e4b9bef 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -142,7 +142,9 @@ trait Section4 { self: Section3 & RichParsers => // given a layout, creates a parser for row separators def rowSeparator(layout: Layout): Parser[Any] = - layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_ + _) ~ lineEnd + layout + .map { n => List.fill(n)('-').mkString + "+" } + .foldLeft("+")(_ + _) ~ lineEnd // either read another rowLine or quit cell parsers and collect results def rowLine[T](layout: Layout, cells: List[Parser[T]]): Parser[List[T]] = @@ -157,7 +159,7 @@ trait Section4 { self: Section3 & RichParsers => cells: List[Parser[T]] ): List[Parser[Parser[T]]] = layout.zip(cells).map { case (n, p) => - delegateN(n, p).map(_ << '\n') <~ '|' + map(delegateN(n, p), (_ << '\n')) <~ '|' } // We can use the table combinator recursively to parse nested tables. From d990c3fe3db9dd480b43d57f5f234024e42179ab Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 14:25:40 +0100 Subject: [PATCH 23/95] Migrate PythonParsers --- .../main/scala/examples/PythonParsers.scala | 493 +++++++++--------- 1 file changed, 260 insertions(+), 233 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 46762b2..fed9024 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -2,104 +2,105 @@ package fcd import scala.language.implicitConversions -/** - * Additional Case Study: Python Parser - * ==================================== - * This file contains an additional python parser implementation to support - * the claims in our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * Submitted to OOPSLA 2016. - * - * The parser is implemented on top of a very simple lexer. The lexer is - * completely indentation unaware and for instance should lex: - * - * while␣(True):\n - * ␣␣a␣*=␣a\n - * - * as - * - * KW("while"), WS, Punct("("), KW("True"), Punct(")"), Punct(":"), NL, - * WS, WS, Id("a"), Punct("*="), WS, Id("a"), NL - * - * Multiline strings should be lexed as instance of Str, with `value` including - * all of the spaces and newlines that appear in the multiline string. - * - * Python programs are then parsed with the parser `preprocess(file_input)`, - * where `preprocess` in turn is a parser combinator composed of the following - * three separately defined "stream preprocessing" parser combinators: - * - * 1. stripComments Removes all comment lexemes from the stream - * 2. explicitJoin Implements explicit line joining by dropping all - * NL tokens that are preceded by a Punct("\\") - * 3. implicitJoin Implements implicit line joining by dropping all - * NL tokens that occur inside pairs of parenthesis. - * - * Interestingly, `implicitJoin` itself is defined from components in the - * following way: - * - * 1. The Dyck language of balanced parenthesis is defined (`dyck`) - * 2. The input to `dyck` is transformed to filter out all non-parenthesis - * tokens (`extDyck`) - * 3. implicitJoin now delegates *all* tokens while it awaits an opening - * parenthesis. After seeing such opening parenthesis it filters out - * NL when delegating until `extDyck` is successful and thus all pairs of - * parens are closed. - * - * Indentation senstivity itself is handled in nonterminal `suite` the way it is - * described in the paper. - * - * The python grammar itself is a straightforward translation of: - * https://docs.python.org/3.5/reference/grammar.html - */ -trait PythonLexemes { self: Parsers with DerivedOps with Syntax => - - trait Lexeme - case class Str(value: String) extends Lexeme - case class Num(value: String) extends Lexeme - case class KW(name: String) extends Lexeme - case class Id(name: String) extends Lexeme - // Punctuation - case class Punct(sym: String) extends Lexeme - case object NL extends Lexeme - case object WS extends Lexeme // whitespace - case class Comment(content: String) extends Lexeme - case object EOS extends Lexeme +/** Additional Case Study: Python Parser \==================================== + * This file contains an additional python parser implementation to support the + * claims in our paper: + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives + * Submitted to OOPSLA 2016. + * + * The parser is implemented on top of a very simple lexer. The lexer is + * completely indentation unaware and for instance should lex: + * + * while␣(True):\n ␣␣a␣*=␣a\n + * + * as + * + * KW("while"), WS, Punct("("), KW("True"), Punct(")"), Punct(":"), NL, WS, WS, + * Id("a"), Punct("*="), WS, Id("a"), NL + * + * Multiline strings should be lexed as instance of Str, with `value` including + * all of the spaces and newlines that appear in the multiline string. + * + * Python programs are then parsed with the parser `preprocess(file_input)`, + * where `preprocess` in turn is a parser combinator composed of the following + * three separately defined "stream preprocessing" parser combinators: + * + * 1. stripComments Removes all comment lexemes from the stream 2. + * explicitJoin Implements explicit line joining by dropping all NL tokens + * that are preceded by a Punct("\\") 3. implicitJoin Implements implicit + * line joining by dropping all NL tokens that occur inside pairs of + * parenthesis. + * + * Interestingly, `implicitJoin` itself is defined from components in the + * following way: + * + * 1. The Dyck language of balanced parenthesis is defined (`dyck`) 2. The + * input to `dyck` is transformed to filter out all non-parenthesis tokens + * (`extDyck`) 3. implicitJoin now delegates *all* tokens while it awaits + * an opening parenthesis. After seeing such opening parenthesis it + * filters out NL when delegating until `extDyck` is successful and thus + * all pairs of parens are closed. + * + * Indentation senstivity itself is handled in nonterminal `suite` the way it + * is described in the paper. + * + * The python grammar itself is a straightforward translation of: + * https://docs.python.org/3.5/reference/grammar.html + */ +trait PythonLexemes { self: Parsers & DerivedOps & Syntax => + enum Lexeme { + case Str(value: String) + case Num(value: String) + case KW(name: String) + case Id(name: String) + case Punct(sym: String) + case Comment(content: String) + case NL + case WS + case EOS + } + + import Lexeme._ type Elem = Lexeme - implicit def lex(lex: Elem): Parser[Elem] = accept(lex) - implicit def kw(kw: Symbol): Parser[Elem] = accept(KW(kw.name)) - implicit def punct(p: String): Parser[Elem] = accept(Punct(p)) + given lex: Conversion[Elem, Parser[Elem]] = accept(_) + given kw: Conversion[Symbol, Parser[Elem]] = { kw => accept(KW(kw.name)) } + given punct: Conversion[String, Parser[Elem]] = { p => accept(Punct(p)) } - lazy val string: Parser[Str] = any flatMap { + lazy val string: Parser[Str] = any >> { case s: Str => succeed(s) - case _ => fail + case _ => fail } - lazy val number: Parser[Num] = any flatMap { + lazy val number: Parser[Num] = any >> { case n: Num => succeed(n) - case _ => fail + case _ => fail } - lazy val id: Parser[Id] = any flatMap { + lazy val id: Parser[Id] = any >> { case id: Id => succeed(id) - case _ => fail + case _ => fail } - lazy val comment: Parser[Comment] = any flatMap { + lazy val comment: Parser[Comment] = any >> { case c: Comment => succeed(c) - case _ => fail + case _ => fail } def isComment: Lexeme => Boolean = _.isInstanceOf[Comment] def isNL: Lexeme => Boolean = _ == NL } -trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Syntax with DerivedOps => +trait PythonParsers extends PythonLexemes, PythonAst { + self: Parsers & Syntax & DerivedOps => // general toolbox def no(els: Elem*): Parser[Elem] = acceptIf(el => !(els contains el)) - def no(els: Iterable[Elem]): Parser[Elem] = no(els.toSeq : _*) - def switch[T](p: Elem => Boolean, thn: Elem => Parser[T], els: Elem => Parser[T]): Parser[T] = + def no(els: Iterable[Elem]): Parser[Elem] = no(els.toSeq*) + def switch[T]( + p: Elem => Boolean, + thn: Elem => Parser[T], + els: Elem => Parser[T] + ): Parser[T] = eat { c => if (p(c)) thn(c) else els(c) } // Simply preprocesses the input stream and strips out comments @@ -109,10 +110,13 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy stripped } + import Lexeme._ + val pairs = Map[Elem, Elem]( Punct("(") -> Punct(")"), Punct("[") -> Punct("]"), - Punct("{") -> Punct("}")) + Punct("{") -> Punct("}") + ) val (opening, closing) = (pairs.keys, pairs.values) @@ -132,9 +136,8 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // over more than one physical line without using backslashes. // [...] Implicitly continued lines can carry comments. def implicitJoin[T]: Parser[T] => Parser[T] = repeat[T] { p => - ( extDyck &> filter(_ != NL)(delegate(p)) - | noneOf(opening ++ closing) &> delegate(p) - ) + (extDyck &> filter(_ != NL)(delegate(p)) + | noneOf(opening ++ closing) &> delegate(p)) } // Strips out newlines if they are preceeded by a backslash punctuation @@ -149,19 +152,19 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // and the following end-of-line character. def explicitJoin[T]: Parser[T] => Parser[T] = p => { lazy val join: NT[T] = - done(p) | switch(_ == Punct("\\"), - bs => switch(_ == NL, - _ => join, - c => explicitJoin(p << bs << c)), - c => explicitJoin(p << c)) + done(p) | switch( + _ == Punct("\\"), + bs => switch(_ == NL, _ => join, c => explicitJoin(p << bs << c)), + c => explicitJoin(p << c) + ) join } - val line = many(no(NL)) ~ NL + val line = many(no(NL)) ~ NL val emptyLine = many(WS) ~ NL - def indentBy[T](indentation: Parser[Any]): Parser[T] => Parser[T] = repeat[T] { p => + def indentBy[T](indentation: Parser[Any]) = repeat[T] { p => // here we use (locally) biased choice to prevent ambiguities - biasedAlt ( + biasedAlt( // pass empty lines as NL to p emptyLine ^^ { _ => p << NL }, // first consume `n` spaces, then delegate to p @@ -169,32 +172,34 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy ) } - def indented[T](p: Parser[T]): Parser[T] = - consumed(some(WS)) >> { i => not(prefix(WS)) &> indentBy(acceptSeq(i))(p) <<< i } - - def preprocess[T] = stripComments[T] compose explicitJoin[T] compose implicitJoin[T] + def indented[T](p: Parser[T]) = + consumed(some(WS)) >> { i => + not(prefix(WS)) &> indentBy(acceptSeq(i))(p) <<< i + } + def preprocess[T] = + stripComments[T] compose explicitJoin[T] compose implicitJoin[T] - def binOp[T, S](p: Parser[T], op: Parser[S], f: (T, S, T) => T): Parser[T] = { - lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case l ~ op ~ r => f(l, op, r) } | p) + def binOp[T, S](p: Parser[T], op: Parser[S], f: (T, S, T) => T) = { + lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case l ~ op ~ r => + f(l, op, r) + } | p) ps } // --- Space Helpers --- - lazy val whitespace = WS - lazy val linebreak = NL - lazy val space = whitespace | linebreak + val whitespace = WS + val linebreak = NL + val space = alt(whitespace, linebreak) lazy val spaces = many(whitespace) - implicit class SpaceHelpers[T, P <% Parser[T]](p: P) { - def ␣[U](q: => Parser[U]): Parser[T ~ U] = - p ~ (spaces ~> q) - def <␣[U](q: => Parser[U]): Parser[T] = - p <~ (spaces ~ q) - def ␣>[U](q: => Parser[U]): Parser[U] = - p ~> (spaces ~> q) + extension [T](p: Parser[T]) { + def ␣[U](q: => Parser[U]): Parser[T ~ U] = p ~ (spaces ~> q) + def <␣[U](q: => Parser[U]): Parser[T] = p <~ (spaces ~ q) + def ␣>[U](q: => Parser[U]): Parser[U] = p ~> (spaces ~> q) } + def listOf[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = someSep(p, spaces ~ sep ~ spaces) <~ opt(spaces ~ sep) @@ -205,31 +210,33 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // --- Python Grammar --- // see: https://docs.python.org/3.5/reference/grammar.html - lazy val file_input: NT[Program] = emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ Program + lazy val file_input: NT[Program] = + emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ this.Program.apply - lazy val decorator: Parser[Decorator] = - "@" ~> dotted_name ~ ("(" ~> optArgs <~ ")" | succeed(Nil)) <~ NL ^^ Decorator + lazy val decorator: Parser[Decorator] = + "@" ~> dotted_name ~ ("(" ~> optArgs <~ ")" | succeed( + Nil + )) <~ NL ^^ this.Decorator.apply lazy val decorators: Parser[List[Decorator]] = some(decorator) - lazy val decorated: Parser[Decorated] = - decorators ~ (classdef | funcdef | async_funcdef) ^^ Decorated - + lazy val decorated: Parser[Decorated] = + decorators ~ (classdef | funcdef | async_funcdef) ^^ this.Decorated.apply // --- Functions --- - lazy val async_funcdef: Parser[FuncDef] = 'async ␣> funcdef + lazy val async_funcdef: Parser[FuncDef] = "async" ␣> funcdef lazy val funcdef: Parser[FuncDef] = - 'def ␣> (id ␣ parameters ~ spacedOpt("->" ␣> test)) ␣ (":" ␣> suite) ^^ FuncDef + "def" ␣> (id ␣ parameters ~ spacedOpt( + "->" ␣> test + )) ␣ (":" ␣> suite) ^^ this.FuncDef.apply lazy val parameters: Parser[Any] = "(" ~> spacedOpt(typedargslist) <␣ ")" - // ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef] def fpdef(p: Parser[Any]): Parser[Any] = - ( "*" ~ spacedOpt(p) - ~ spacedOpt("," ␣> testdefs(p)) - ~ spacedOpt("," ␣> ("**" ␣ p)) - | "**" ␣ p - ) - def testdefs(p: Parser[Any]): Parser[Any] = someSep(p ~ spacedOpt("=" ␣> test), ",") + ("*" ~ spacedOpt(p) + ~ spacedOpt("," ␣> testdefs(p)) + ~ spacedOpt("," ␣> ("**" ␣ p)) + | "**" ␣ p) + def testdefs(p: Parser[Any]) = someSep(p ~ spacedOpt("=" ␣> test), ",") lazy val typedargslist: Parser[Any] = testdefs(tfpdef) ~ spacedOpt("," ␣> fpdef(tfpdef)) | fpdef(tfpdef) @@ -241,165 +248,185 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy lazy val vfpdef: Parser[Any] = id // --- Statements --- - lazy val stmt: NT[Any] = simple_stmt | compound_stmt - lazy val simple_stmt: Parser[Any] = listOf(small_stmt, ";") <␣ NL ^^ Simple - lazy val small_stmt: Parser[Any] = - ( expr_stmt | del_stmt - | pass_stmt | flow_stmt | import_stmt - | global_stmt | nonlocal_stmt | assert_stmt - ) + lazy val stmt: NT[Any] = simple_stmt | compound_stmt + lazy val simple_stmt: Parser[Any] = + listOf(small_stmt, ";") <␣ NL ^^ this.Simple.apply + lazy val small_stmt: Parser[Any] = + (expr_stmt | del_stmt + | pass_stmt | flow_stmt | import_stmt + | global_stmt | nonlocal_stmt | assert_stmt) lazy val expr_stmt: Parser[Any] = - ( testlist_star_expr - | testlist_star_expr ␣ augassign ␣ ( yield_expr | testlist ) - | testlist_star_expr ~ some(spaces ~> "=" ␣> ( yield_expr | testlist_star_expr )) - ) ^^ ExprStmt + (testlist_star_expr + | testlist_star_expr ␣ augassign ␣ (yield_expr | testlist) + | testlist_star_expr ~ some( + spaces ~> "=" ␣> (yield_expr | testlist_star_expr) + )) ^^ this.ExprStmt.apply lazy val testlist_star_expr: Parser[Any] = listOf(test | star_expr, ",") - lazy val augassign: Parser[Any] = ( "+=" | "-=" | "*=" | "@=" | "/=" | "%=" - | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" - | "//=" - ) - lazy val del_stmt: Parser[Stmt] = 'del ␣> exprlist ^^ Del - lazy val pass_stmt: Parser[Stmt] = 'pass ^^^ Pass - lazy val flow_stmt: Parser[Stmt] = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt - lazy val break_stmt: Parser[Stmt] = 'break ^^^ Break - lazy val continue_stmt: Parser[Stmt] = 'continue ^^^ Continue - lazy val return_stmt: Parser[Stmt] = 'return ~> spacedOpt(testlist) ^^ Return - lazy val yield_stmt: Parser[Stmt] = yield_expr ^^ ExprStmt - lazy val raise_stmt: Parser[Stmt] = 'raise ~> spacedOpt(test ~ spacedOpt('from ␣ test)) ^^ Raise - lazy val import_stmt: Parser[Any] = import_name | import_from - lazy val import_name: Parser[Any] = 'import ␣> dotted_as_names ^^ { n => Import(n) } + lazy val augassign: Parser[Any] = ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" + | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" + | "//=") + lazy val del_stmt: Parser[Stmt] = "del" ␣> exprlist ^^ this.Del.apply + lazy val pass_stmt: Parser[Stmt] = "pass" ^^^ Pass + lazy val flow_stmt: Parser[Stmt] = + break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt + lazy val break_stmt: Parser[Stmt] = "break" ^^^ Break + lazy val continue_stmt: Parser[Stmt] = "continue" ^^^ Continue + lazy val return_stmt: Parser[Stmt] = + "return" ~> spacedOpt(testlist) ^^ this.Return.apply + lazy val yield_stmt: Parser[Stmt] = yield_expr ^^ this.ExprStmt.apply + lazy val raise_stmt: Parser[Stmt] = + "raise" ~> spacedOpt(test ~ spacedOpt("from" ␣ test)) ^^ this.Raise.apply + lazy val import_stmt: Parser[Any] = import_name | import_from + lazy val import_name: Parser[Any] = "import" ␣> dotted_as_names ^^ { n => + Import(n) + } // # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS lazy val import_from: Parser[Any] = - ('from ~> (spacedMany("." | "...") ~ dotted_name | some("." | "...")) ␣ - 'import ␣> ("*" | "(" ␣> import_as_names <␣ ")" | import_as_names)) ^^ { - case (from, names) => Import(names, Some(from)) + ("from" ~> (spacedMany("." | "...") ~ dotted_name | some( + "." | "..." + )) ␣ + "import" + ␣> ("*" | "(" ␣> import_as_names <␣ ")" | import_as_names)) ^^ { + case (from, names) => Import(names, Some(from)) } - lazy val import_as_name: Parser[Any] = id ~ spacedOpt('as ␣ id) - lazy val dotted_as_name: Parser[Any] = dotted_name ~ spacedOpt('as ␣ id) + lazy val import_as_name: Parser[Any] = id ~ spacedOpt("as" ␣ id) + lazy val dotted_as_name: Parser[Any] = + dotted_name ~ spacedOpt("as" ␣ id) lazy val import_as_names: Parser[Any] = listOf(test | import_as_name, ",") lazy val dotted_as_names: Parser[Any] = someSep(dotted_as_name, ",") - lazy val dotted_name: Parser[Any] = someSep(id, ".") - - lazy val global_stmt: Parser[Any] = 'global ␣> someSep(id, ",") ^^ Global - lazy val nonlocal_stmt: Parser[Any] = 'nonlocal ␣> someSep(id, ",") ^^ Nonlocal - lazy val assert_stmt: Parser[Any] = 'assert ␣> someSep(test, ",") ^^ Assert + lazy val dotted_name: Parser[Any] = someSep(id, ".") + lazy val global_stmt: Parser[Any] = + "global" ␣> someSep(id, ",") ^^ this.Global.apply + lazy val nonlocal_stmt: Parser[Any] = + "nonlocal" ␣> someSep(id, ",") ^^ this.Nonlocal.apply + lazy val assert_stmt: Parser[Any] = + "assert" ␣> someSep(test, ",") ^^ this.Assert.apply lazy val compound_stmt: Parser[Any] = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt - lazy val async_stmt: Parser[Any] = 'async ␣> (funcdef | with_stmt | for_stmt) - lazy val if_stmt: Parser[Any] = - 'if ␣> test ␣ (":" ␣> suite ~ - spacedMany('elif ␣> test ␣ (":" ␣> suite)) ~ - spacedOpt(('else ␣ ":") ␣> suite)) + lazy val async_stmt: Parser[Any] = + "async" ␣> (funcdef | with_stmt | for_stmt) + lazy val if_stmt: Parser[Any] = + "if" ␣> test ␣ (":" ␣> suite ~ + spacedMany("elif" ␣> test ␣ (":" ␣> suite)) ~ + spacedOpt(("else" ␣ ":") ␣> suite)) lazy val while_stmt: Parser[Any] = - 'while ␣> test ␣ (":" ␣> suite ~ spacedOpt(('else ␣ ":") ␣> suite)) + "while" ␣> test ␣ (":" ␣> suite ~ spacedOpt( + ("else" ␣ ":") ␣> suite + )) lazy val for_stmt: Parser[Any] = - 'for ␣> exprlist ␣ ('in ␣> testlist ␣ (":" ␣> suite ~ spacedOpt(('else ␣> ":") ␣> suite))) ^^ { - case (exprs ~ (tests ~ (body ~ default))) => For(exprs, tests, body, default) + "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (":" ␣> suite ~ spacedOpt( + ("else" ␣> ":") ␣> suite + ))) ^^ { case (exprs ~ (tests ~ (body ~ default))) => + For(exprs, tests, body, default) } lazy val try_stmt: Parser[Any] = - ('try ␣ ":") ␣> suite ␣ (some(except_clause ␣ (":" ␣> suite)) ~ - spacedOpt(('else ␣ ":") ␣> suite) ~ - spacedOpt(('finally ␣ ":") ␣> suite) - | (('finally ␣ ":") ␣> suite) - ) - lazy val with_stmt: Parser[Any] = 'with ␣> someSep(with_item, ",") ␣ (":" ␣> suite) - lazy val with_item: Parser[Any] = test ~ spacedOpt('as ␣> expr) + ("try" ␣ ":") ␣> suite ␣ (some(except_clause ␣ (":" ␣> suite)) ~ + spacedOpt(("else" ␣ ":") ␣> suite) ~ + spacedOpt(("finally" ␣ ":") ␣> suite) + | (("finally" ␣ ":") ␣> suite)) + lazy val with_stmt: Parser[Any] = + "with" ␣> someSep(with_item, ",") ␣ (":" ␣> suite) + lazy val with_item: Parser[Any] = test ~ spacedOpt("as" ␣> expr) // # NB compile.c makes sure that the default except clause is last - lazy val except_clause: Parser[Any] = 'except ~> spacedOpt(test ␣ opt('as ␣> id)) - + lazy val except_clause: Parser[Any] = + "except" ~> spacedOpt(test ␣ opt("as" ␣> id)) // INDENTATION // changed to also allow empty lines - lazy val suite: Parser[Any] = simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) + lazy val suite: Parser[Any] = + simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) // --- Expressions --- - lazy val test: NT[Any] = ( or_test ~ spacedOpt('if ␣> or_test ␣ ('else ␣> test)) - | lambdef - ) - lazy val test_nocond: NT[Any] = or_test | lambdef_nocond - lazy val lambdef: NT[Any] = 'lambda ~> spacedOpt(varargslist) ␣ (":" ␣> test) - lazy val lambdef_nocond: NT[Any] = 'lambda ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) - lazy val or_test: NT[Any] = someSep(and_test, 'or) - lazy val and_test: NT[Any] = someSep(not_test, 'and) - lazy val not_test: NT[Any] = 'not ␣> not_test | comparison - lazy val comparison: NT[Any] = someSep(expr, comp_op) + lazy val test: NT[Any] = + (or_test ~ spacedOpt("if" ␣> or_test ␣ ("else" ␣> test)) + | lambdef) + lazy val test_nocond: NT[Any] = or_test | lambdef_nocond + lazy val lambdef: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test) + lazy val lambdef_nocond: NT[Any] = + "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) + lazy val or_test: NT[Any] = someSep(and_test, "or") + lazy val and_test: NT[Any] = someSep(not_test, "and") + lazy val not_test: NT[Any] = "not" ␣> not_test | comparison + lazy val comparison: NT[Any] = someSep(expr, comp_op) // # <> isn't actually a valid comparison operator in Python. It's here for the // # sake of a __future__ import described in PEP 401 (which really works :-) - lazy val comp_op: Parser[Any] = ( "<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" - |'in | 'not ␣ 'in | 'is | 'is ␣ 'not - ) - - lazy val expr: NT[Any] = binOp(xor_expr, "|", BinOp) - lazy val xor_expr: NT[Any] = binOp(and_expr, "^", BinOp) - lazy val and_expr: NT[Any] = binOp(shift_expr, "&", BinOp) - lazy val shift_expr: NT[Any] = binOp(arith_expr, "<<" | ">>", BinOp) - lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", BinOp) - lazy val term: NT[Any] = binOp(factor, "*" | "@" | "/" | "%" | "//", BinOp) - lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power - lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor - - lazy val atom_expr: Parser[Any] = opt('await ~ spaces) ~> atom ~ spacedMany(trailer) - lazy val atom: Parser[Any] = ( "(" ␣> ( yield_expr | testlist_comp) <␣ ")" - | "[" ~> spacedOpt(testlist_comp) <␣ "]" - | "{" ~> spacedOpt(dictorsetmaker) <␣ "}" - | id | number | some(string) | "..." - | 'None | 'True | 'False - ) - + lazy val comp_op: Parser[Any] = ("<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" + | "in" | "not" ␣ "in" | "is" | "is" ␣ "not") + + lazy val expr: NT[Any] = binOp(xor_expr, "|", this.BinOp.apply) + lazy val xor_expr: NT[Any] = binOp(and_expr, "^", this.BinOp.apply) + lazy val and_expr: NT[Any] = binOp(shift_expr, "&", this.BinOp.apply) + lazy val shift_expr: NT[Any] = + binOp(arith_expr, "<<" | ">>", this.BinOp.apply) + lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", this.BinOp.apply) + lazy val term: NT[Any] = + binOp(factor, "*" | "@" | "/" | "%" | "//", this.BinOp.apply) + lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power + lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor + + lazy val atom_expr: Parser[Any] = + opt("await" ~ spaces) ~> atom ~ spacedMany(trailer) + lazy val atom: Parser[Any] = ("(" ␣> (yield_expr | testlist_comp) <␣ ")" + | "[" ~> spacedOpt(testlist_comp) <␣ "]" + | "{" ~> spacedOpt(dictorsetmaker) <␣ "}" + | id | number | some(string) | "..." + | "None" | "True" | "False") lazy val star_expr: Parser[Any] = "*" ␣ expr - lazy val yield_expr: Parser[Any] = 'yield ~ spacedOpt('from ␣ test | testlist) + lazy val yield_expr: Parser[Any] = + "yield" ~ spacedOpt("from" ␣ test | testlist) - lazy val testlist_comp: Parser[Any] = ( listOf(test | star_expr, ",") - | (test | star_expr) ␣ comp_for - ) + lazy val testlist_comp: Parser[Any] = (listOf(test | star_expr, ",") + | (test | star_expr) ␣ comp_for) - lazy val trailer: Parser[Any] = ( "(" ␣> optArgs <␣ ")" - | "[" ␣> subscriptlist <␣ "]" - | "." ␣> id - ) + lazy val trailer: Parser[Any] = ("(" ␣> optArgs <␣ ")" + | "[" ␣> subscriptlist <␣ "]" + | "." ␣> id) lazy val subscriptlist: Parser[Any] = listOf(subscript, ",") - lazy val subscript: Parser[Any] = test | spacedOpt(test) ~ ":" ~ spacedOpt(test) ~ spacedOpt(":" ~> spacedOpt(test)) + lazy val subscript: Parser[Any] = + test | spacedOpt(test) ~ ":" ~ spacedOpt(test) ~ spacedOpt( + ":" ~> spacedOpt(test) + ) lazy val exprlist: Parser[List[Any]] = listOf(expr | star_expr, ",") lazy val testlist: Parser[Any] = listOf(test, ",") lazy val dictorsetmaker: Parser[Any] = - ( ( listOf(test ␣ (":" ␣> test) | "**" ␣> expr, ",") - | (test ␣ (":" ␣> test) | "**" ␣> expr) ␣ comp_for - ) - | ( listOf(test | star_expr, ",") - | (test | star_expr) ␣ comp_for - ) - ) - + ((listOf(test ␣ (":" ␣> test) | "**" ␣> expr, ",") + | (test ␣ (":" ␣> test) | "**" ␣> expr) ␣ comp_for) + | (listOf(test | star_expr, ",") + | (test | star_expr) ␣ comp_for)) lazy val classdef: Parser[Any] = - 'class ␣> (id ~ spacedOpt("(" ␣> optArgs <␣ ")" )) ␣ (":" ␣> suite) + "class" ␣> (id ~ spacedOpt("(" ␣> optArgs <␣ ")")) ␣ (":" ␣> suite) lazy val arglist: Parser[List[Any]] = listOf(argument, ",") lazy val optArgs: Parser[List[Any]] = arglist | succeed(Nil) lazy val argument: Parser[Any] = - ( test ~ spacedOpt(comp_for) - | test ␣ "=" ␣ test - | "**" ␣ test - | "*" ␣ test - ) + (test ~ spacedOpt(comp_for) + | test ␣ "=" ␣ test + | "**" ␣ test + | "*" ␣ test) - lazy val comp_iter: NT[Any] = comp_for | comp_if - lazy val comp_for: NT[Any] = 'for ␣> exprlist ␣ ('in ␣> or_test ~ spacedOpt(comp_iter)) - lazy val comp_if: Parser[Any] = 'if ␣> test_nocond ~ spacedOpt(comp_iter) + lazy val comp_iter: NT[Any] = comp_for | comp_if + lazy val comp_for = + "for" ␣> exprlist ␣ ("in" ␣> or_test ~ spacedOpt(comp_iter)) + lazy val comp_if = "if" ␣> test_nocond ~ spacedOpt(comp_iter) } -object PythonParsers extends PythonParsers with DerivedOps with DerivativeParsers with Syntax { - override def accept(t: Elem): Parser[Elem] = acceptIf(_ == t) +object PythonParsers + extends PythonParsers, + DerivedOps, + DerivativeParsers, + Syntax { + override def accept(t: Elem) = acceptIf(_ == t) } From 6fecca5d34419fcbc43cf19dacd5cac13b366602 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 14:40:02 +0100 Subject: [PATCH 24/95] Migrate Paper.scala --- .../src/main/scala/examples/paper/Paper.scala | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Paper.scala b/artifact/src/main/scala/examples/paper/Paper.scala index 6f4d40a..c8cca60 100644 --- a/artifact/src/main/scala/examples/paper/Paper.scala +++ b/artifact/src/main/scala/examples/paper/Paper.scala @@ -1,16 +1,13 @@ package fcd -/** - * This object instantiates the examples from section 3, 4 and 7 and makes them - * available in the REPL via: - * - * > import paper._ - */ -object paper extends Section3 with Section4 with Section7 { - - // Use the derivative based parsers for examples in the paper - type Parsers = DerivativeParsers.type - def _parsers: DerivativeParsers.type = DerivativeParsers - override lazy val parsers: DerivativeParsers.type = _parsers - -} +/** This object instantiates the examples from section 3, 4 and 7 and makes them + * available in the REPL via: + * + * > import paper._ + */ +object paper + extends RichParsers + with DerivativeParsers + with Section3 + with Section4 + with Section7 From 3450c4523e598366fdf98f66461b4e03aaf975d5 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 14:53:19 +0100 Subject: [PATCH 25/95] Remove deprecated use of mutable ListMap --- artifact/src/main/scala/library/DerivativeParsers.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index ce48df5..d4165de 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -277,7 +277,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override protected def updateAttributes() = results.update() } - private val cache: mutable.ListMap[Elem, Parser[R]] = mutable.ListMap.empty + private val cache: mutable.HashMap[Elem, Parser[R]] = mutable.HashMap.empty // Wrapping in `nonterminal` is cecessary for left-recursive // grammars and for grammars like "DerivativeParsers / preprocessor" // that recursively derive. Optimizing the nonterminal node away causes From a5df7060fcc167cd8cf464fa473c1875bf8adbde Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 15:07:37 +0100 Subject: [PATCH 26/95] Start migrating LeftrecTests --- artifact/src/test/scala/LeftrecTests.scala | 295 ++++++++++----------- 1 file changed, 144 insertions(+), 151 deletions(-) diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 6314a55..6fac774 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -2,7 +2,7 @@ package fcd package test import org.scalatest._ -trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => +trait LeftrecTests extends CustomMatchers { self: FunSpec & Matchers => import parsers._ @@ -10,30 +10,30 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("p = p | .") { lazy val p: NT[Any] = p | any - p shouldParse "a" + p `shouldParse` "a" } describe("p = p ~ . | .") { lazy val p: NT[_] = p ~ any | any - p shouldParse "a" + p `shouldParse` "a" } describe("p = . | p ~ .") { lazy val p: NT[_] = any | p ~ any - p shouldParse "a" + p `shouldParse` "a" } describe("p = (. | .) >> { (. | p) ^^ id }") { lazy val p: NT[Any] = (p | any) flatMap { _ => (any | p) map identity } - p.shouldParse("aa") - p.shouldParse("aaaaa") + p `shouldParse` "aa" + p `shouldParse` "aaaaa" } describe("p = (. | p) >> { a }") { - lazy val p: NT[Any] = (any | p) flatMap { _ => 'a' } - p.shouldParse("aa") - p.shouldParse("aaa") - p.shouldParse("aaaaaa") + lazy val p: NT[Any] = (any | p) flatMap { _ => 'a' } + p `shouldParse` "aa" + p `shouldParse` "aaa" + p `shouldParse` "aaaaaa" } } @@ -41,12 +41,12 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("p = . ~ p") { lazy val p: NT[Any] = any ~ p - p shouldNotParse "a" + p `shouldNotParse` "a" } describe("p = p ~ .") { lazy val p: NT[Any] = p ~ any - p shouldNotParse "a" + p `shouldNotParse` "a" } } @@ -55,17 +55,17 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("A = A ~ a | empty") { lazy val A: NT[_] = A ~ 'a' | succeed(42) - A shouldParse "" - A shouldParse "a" - A shouldParse "aa" + A `shouldParse` "" + A `shouldParse` "a" + A `shouldParse` "aa" } describe("A = empty | A ~ a ") { lazy val A: NT[_] = succeed(42) | A ~ 'a' - A shouldParse "" - A shouldParse "a" - A shouldParse "aa" + A `shouldParse` "" + A `shouldParse` "a" + A `shouldParse` "aa" } // Simple example of indirect leftrecursion from @@ -75,15 +75,15 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val A: NT[Any] = B ~ '-' ~ num | num lazy val B: NT[Any] = succeed(()) ~ A - // A shouldParse "1" - // A shouldParse "12" - // A shouldParse "12-32" - // A shouldParse "12-32-45" + // A `shouldParse` "1" + // A `shouldParse` "12" + // A `shouldParse` "12-32" + // A `shouldParse` "12-32-45" - B shouldParse "1" - B shouldParse "12" - B shouldParse "12-32" - B shouldParse "12-32-45" + B `shouldParse` "1" + B `shouldParse` "12" + B `shouldParse` "12-32" + B `shouldParse` "12-32-45" } describe("two levels indirect leftrecursion") { @@ -92,16 +92,16 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val B: NT[Any] = succeed(()) ~ C ~ '+' ~ num lazy val C: NT[Any] = succeed(()) ~ A - A shouldParse "1" - A shouldParse "12" - C shouldParse "2" - C shouldParse "22" - B shouldParse "12+32" - A shouldParse "12+32-42" - A shouldParse "12+12-32+45-44" - A shouldNotParse "" - A shouldNotParse "12+13+14" - A shouldNotParse "12+13+14-14-56" + A `shouldParse` "1" + A `shouldParse` "12" + C `shouldParse` "2" + C `shouldParse` "22" + B `shouldParse` "12+32" + A `shouldParse` "12+32-42" + A `shouldParse` "12+12-32+45-44" + A `shouldNotParse` "" + A `shouldNotParse` "12+13+14" + A `shouldNotParse` "12+13+14-14-56" } // From "Packrat parsers can support left-recursion" @@ -109,32 +109,32 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succeed(()) lazy val ones: NT[Any] = ones ~ '1' | '1' - start shouldParse "" - start shouldParse "1" - start shouldParse "12" - start shouldParse "11112" - start shouldParse "111111" - start shouldParse "1111112" + start `shouldParse` "" + start `shouldParse` "1" + start `shouldParse` "12" + start `shouldParse` "11112" + start `shouldParse` "111111" + start `shouldParse` "1111112" // Actually computing the result triggers a stackoverflow - // start shouldParse ("1" * 200) + // start `shouldParse` ("1" * 200) } describe("A = A ~ b | c") { lazy val A: NT[_] = A ~ 'b' | 'c' - A shouldParse "c" - A shouldParse "cb" - A shouldParse "cbb" - A shouldParse "cbbbbbbbbbbbbb" - A shouldNotParse "cbbbbbbbbbbbbbc" + A `shouldParse` "c" + A `shouldParse` "cb" + A `shouldParse` "cbb" + A `shouldParse` "cbbbbbbbbbbbbb" + A `shouldNotParse` "cbbbbbbbbbbbbbc" } describe("A = empty ~ A ~ b | empty") { lazy val A: NT[Any] = succeed("done") ~ A ~ 'b' | succeed("done") - A shouldParse "" - A shouldParse "b" - A shouldParse "bb" + A `shouldParse` "" + A `shouldParse` "b" + A `shouldParse` "bb" } // should parse at most as many 'd's as it parses 'b's. @@ -142,24 +142,24 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' lazy val B: NT[_] = charParser('d') | succeed("done") - A shouldParse "c" - A shouldParse "cb" - A shouldParse "dcb" - A shouldParse "cbb" - A shouldParse "ddcbb" - A shouldNotParse "dddcb" - A shouldParse "dddddcbbbbbbbbbbbbb" + A `shouldParse` "c" + A `shouldParse` "cb" + A `shouldParse` "dcb" + A `shouldParse` "cbb" + A `shouldParse` "ddcbb" + A `shouldNotParse` "dddcb" + A `shouldParse` "dddddcbbbbbbbbbbbbb" } describe("many(some(a))") { lazy val p = many(some('a')) - p shouldParse "" - p shouldParse("a") - p shouldParse("aaa") - p shouldParse("aaaaaaaaaa") - p shouldNotParse("b") - p shouldNotParse("aaab") + p `shouldParse` "" + p `shouldParse` "a" + p `shouldParse` "aaa" + p `shouldParse` "aaaaaaaaaa" + p `shouldNotParse` "b" + p `shouldNotParse` "aaab" } describe("del(ones)") { @@ -176,57 +176,52 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val rr: NT[String] = "1" ~> rr | "1" lazy val ll: NT[String] = ll <~ "1" | "1" - ll shouldParse ("1" * 40) - rr shouldParse ("1" * 41) + ll `shouldParse` ("1" * 40) + rr `shouldParse` ("1" * 41) } // Grammar from Tillmann Rendel's GLL library describe("very ambiguous") { lazy val A: NT[Char] = A ~> A | A ~> A ~> A | 'a' - A shouldNotParse "" - A shouldParse "a" - A shouldParse "aa" - A shouldParse "aaa" - A shouldParse ("a" * 100) + A `shouldNotParse` "" + A `shouldParse` "a" + A `shouldParse` "aa" + A `shouldParse` "aaa" + A `shouldParse` ("a" * 100) lazy val A2: Parser[Any] = some(some('a')) - A2 shouldParse ("a" * 1000) + A2 `shouldParse` ("a" * 1000) } describe("mixed mutual recursion") { lazy val expression: NT[Any] = - ( literal ~ '+' - | condExpr - ) + (literal ~ '+' + | condExpr) lazy val condExpr: NT[Any] = - ( condExpr ~ '?' - | eqExpr - ) + (condExpr ~ '?' + | eqExpr) lazy val eqExpr: NT[Any] = - ( eqExpr ~ '*' - | literal - ) + (eqExpr ~ '*' + | literal) lazy val literal: NT[Any] = - ( many('a') - | '[' ~ arrayEl - ) + (many('a') + | '[' ~ arrayEl) lazy val arrayEl: NT[Any] = - ( expression - | succeed ("undefined") - ) - - expression shouldParse "" - expression shouldParse "a" - expression shouldParse "aaaaa" - expression shouldParse "[" - expression shouldParse "[a" - expression shouldParse "[aaaaa" - expression shouldParse "[[[[a" + (expression + | succeed("undefined")) + + expression `shouldParse` "" + expression `shouldParse` "a" + expression `shouldParse` "aaaaa" + expression `shouldParse` "[" + expression `shouldParse` "[a" + expression `shouldParse` "[aaaaa" + expression `shouldParse` "[[[[a" } describe("terms") { @@ -236,22 +231,20 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => case class Num(n: Int) extends Term lazy val term: NT[Term] = - ( term ~ "+" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | term ~ "-" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | fact - ) + (term ~ "+" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } + | term ~ "-" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } + | fact) lazy val fact: NT[Term] = - ( fact ~ "*" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | fact ~ "/" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | num - ) + (fact ~ "*" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } + | fact ~ "/" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } + | num) lazy val num: Parser[Num] = some(digit) ^^ (ns => Num(ns.mkString.toInt)) - num shouldParse "12345" - term shouldParse "12+31" - term shouldParse "12*8+31*45" + num `shouldParse` "12345" + term `shouldParse` "12+31" + term `shouldParse` "12*8+31*45" } // Grammar and testcases from Tillmann Rendel's GLL library. @@ -260,44 +253,44 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val S: NT[Any] = many(az | ' ' | ':' | ':' ~ P | '(' ~ S ~ ')') lazy val P: NT[Any] = charParser('(') | ')' - S shouldParse "" - S shouldNotParse ":((" - S shouldParse "i am sick today (:()" - S shouldParse "(:)" - S shouldParse "hacker cup: started :):)" - S shouldNotParse ")(" - S shouldNotParse "(((a)):()a(()(((:))a((:)():(((()()a)))(:a(::)(a)))(a)((a::():(a)():)a(a(a(:aa(:()(a(((((()))))))))" - S shouldParse "():)((()():(:())))::aa((((:(((:)))::a:(:))()a)):(a):::((()a((a(aa(():))(():())((::a)a)):)()" - S shouldParse ":(a):(:)aa)a(:()::():))a:aaa:)(:)((()()))a()(((()(:)))(:(aa:()())())a((a)a:(:()))(a((():)))" - S shouldParse ":a:)(:))()(()()a)aaa::a()()a:()()a::)((()(a(a))))try implementing sleep sort if you are stuck:(:)a)" - S shouldNotParse "(a())(::)(a))():(((a(()(:))a(:)))(:(:(:((():)(a))(:))(a)():(:(()aa):)(a((())a)a((a):)()(:(" - S shouldParse "(::a((a)a:()):):a)aa:)a(:::))(a())aa(a():))(:)a)((():)(:a:)a))):a(a)((:()(()())a))()a((()a))" - S shouldParse "()(((a)((aa)))a)a()(a)(aa:a)()(((:())aa)):()():():a:(a)(a())a:)::a:(aa:):()((a:)())aa)a(a:)" - S shouldParse ":)()((a)):(():a:a:)(:a)):)(()(:)::::(a(::a())(a):(:((((:(aa(()))a)(((((((((()a()a):)))((:)))))))))" - S shouldParse "a(a)::(((::)))())((a)(:((:a())):((::(:()(a)))i am trapped in a test case generator :(:(a(:::))" - S shouldParse "((:):::(()()):)(()()():())aaa)(:(a:)a:((())a(((a(:())aa():a:)((()):)(()(:)(a())a:()a)a():(" - S shouldNotParse "(:a))" - S shouldParse "::((:))(((:)(aaa)(a())()(a:)(:)(:)()):)a())aa)())(():a):()::):)a()())a()):):(:a)a):()(a)(a)" - S shouldParse "()a(:)(a:a):(())):a()():((a(:):a()()::)(a:)(()a((a:)(a)a(a:a:)(a)a(a:(()()()::a()a()(()a:())))" - S shouldParse "()((:a(a()()a))())((:a(:a)(()a((((a((a(()(:aa()()()))):)(():):)(:(a))():(())(():()):):(()a))" - S shouldParse "(((((((((())))))))))" - S shouldParse "(((((((((((((((((((())))))))))))))))))))" - S shouldParse "((((((((((:))))))))))" - S shouldParse "((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldNotParse "(((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:))))))))))" - S shouldParse "((((((((((((:))))))))))((((((((((:())))))))))))" - S shouldNotParse "(((((((((()))))))))))" - S shouldNotParse "(((((((((((((((((((()))))))))))))))))))))" - S shouldParse "((((((((((:)))))))))))" - S shouldParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:)))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))" - S shouldParse "((((((((((((:))))))))))((((((((((:)))))))))))))" - S shouldNotParse "((((((((((:))))))))))))" - S shouldNotParse "((((((((((((:))))))))))((((((((((:)))))))))))))))" - S shouldNotParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))))))))" - S shouldNotParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldParse` "" + S `shouldNotParse` ":((" + S `shouldParse` "i am sick today (:()" + S `shouldParse` "(:)" + S `shouldParse` "hacker cup: started :):)" + S `shouldNotParse` ")(" + S `shouldNotParse` "(((a)):()a(()(((:))a((:)():(((()()a)))(:a(::)(a)))(a)((a::():(a)():)a(a(a(:aa(:()(a(((((()))))))))" + S `shouldParse` "():)((()():(:())))::aa((((:(((:)))::a:(:))()a)):(a):::((()a((a(aa(():))(():())((::a)a)):)()" + S `shouldParse` ":(a):(:)aa)a(:()::():))a:aaa:)(:)((()()))a()(((()(:)))(:(aa:()())())a((a)a:(:()))(a((():)))" + S `shouldParse` ":a:)(:))()(()()a)aaa::a()()a:()()a::)((()(a(a))))try implementing sleep sort if you are stuck:(:)a)" + S `shouldNotParse` "(a())(::)(a))():(((a(()(:))a(:)))(:(:(:((():)(a))(:))(a)():(:(()aa):)(a((())a)a((a):)()(:(" + S `shouldParse` "(::a((a)a:()):):a)aa:)a(:::))(a())aa(a():))(:)a)((():)(:a:)a))):a(a)((:()(()())a))()a((()a))" + S `shouldParse` "()(((a)((aa)))a)a()(a)(aa:a)()(((:())aa)):()():():a:(a)(a())a:)::a:(aa:):()((a:)())aa)a(a:)" + S `shouldParse` ":)()((a)):(():a:a:)(:a)):)(()(:)::::(a(::a())(a):(:((((:(aa(()))a)(((((((((()a()a):)))((:)))))))))" + S `shouldParse` "a(a)::(((::)))())((a)(:((:a())):((::(:()(a)))i am trapped in a test case generator :(:(a(:::))" + S `shouldParse` "((:):::(()()):)(()()():())aaa)(:(a:)a:((())a(((a(:())aa():a:)((()):)(()(:)(a())a:()a)a():(" + S `shouldNotParse` "(:a))" + S `shouldParse` "::((:))(((:)(aaa)(a())()(a:)(:)(:)()):)a())aa)())(():a):()::):)a()())a()):):(:a)a):()(a)(a)" + S `shouldParse` "()a(:)(a:a):(())):a()():((a(:):a()()::)(a:)(()a((a:)(a)a(a:a:)(a)a(a:(()()()::a()a()(()a:())))" + S `shouldParse` "()((:a(a()()a))())((:a(:a)(()a((((a((a(()(:aa()()()))):)(():):)(:(a))():(())(():()):):(()a))" + S `shouldParse` "(((((((((())))))))))" + S `shouldParse` "(((((((((((((((((((())))))))))))))))))))" + S `shouldParse` "((((((((((:))))))))))" + S `shouldParse` "((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldNotParse` "(((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:))))))))))" + S `shouldParse` "((((((((((((:))))))))))((((((((((:())))))))))))" + S `shouldNotParse` "(((((((((()))))))))))" + S `shouldNotParse` "(((((((((((((((((((()))))))))))))))))))))" + S `shouldParse` "((((((((((:)))))))))))" + S `shouldParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:)))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))" + S `shouldParse` "((((((((((((:))))))))))((((((((((:)))))))))))))" + S `shouldNotParse` "((((((((((:))))))))))))" + S `shouldNotParse` "((((((((((((:))))))))))((((((((((:)))))))))))))))" + S `shouldNotParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))))))))" + S `shouldNotParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))))))" } // This is grammar Γ₁ from Scott and Johnstone (2010, Sec. 5). @@ -307,13 +300,13 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val B: NT[Any] = succeed(()) | 'a' lazy val C: NT[Any] = charParser('b') | B ~ C ~ 'b' | 'b' ~ 'b' - S shouldNotParse "" - S shouldNotParse "aba" - S shouldParse "d" - S shouldParse "ba" - S shouldParse "bba" - S shouldParse "abba" - S shouldParse "aabbba" + S `shouldNotParse` "" + S `shouldNotParse` "aba" + S `shouldParse` "d" + S `shouldParse` "ba" + S `shouldParse` "bba" + S `shouldParse` "abba" + S `shouldParse` "aabbba" } } } From 482889abf3b864bddee248d2b0c56d3713245db2 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 15:52:50 +0100 Subject: [PATCH 27/95] Start migrating tests --- .../src/test/scala/BasicCombinatorsTest.scala | 66 +- artifact/src/test/scala/CustomMatchers.scala | 25 +- .../test/scala/DerivativeParsersTests.scala | 294 ++--- artifact/src/test/scala/LeftrecTests.scala | 21 +- artifact/src/test/scala/NegationTests.scala | 66 +- .../src/test/scala/PythonParserTests.scala | 1119 ++++++++++++++--- 6 files changed, 1164 insertions(+), 427 deletions(-) diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index b6706f7..0426a0f 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -1,70 +1,72 @@ package fcd package test -import org.scalatest._ +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers -trait BasicCombinatorTests extends CustomMatchers { self: FunSpec with Matchers => +trait BasicCombinatorTests extends CustomMatchers { self: AnyFunSpec & Matchers => import parsers._ describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' - p shouldParse "abc" - p shouldNotParse "abcd" + p `shouldParse` "abc" + p `shouldNotParse` "abcd" } describe("parser \"ab | ac\"") { val p = ('a' ~ 'b') | ('a' ~ 'c') - p shouldParse "ab" - p shouldParse "ac" - p shouldNotParse "bc" - p shouldNotParse "a" - p shouldNotParse "abc" + p `shouldParse` "ab" + p `shouldParse` "ac" + p `shouldNotParse` "bc" + p `shouldNotParse` "a" + p `shouldNotParse` "abc" } describe("parser \"baaa | ba\"") { val p: Parser[_] = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' - p shouldParse "baaa" - p shouldParse "ba" - ((p ~ 'c' ~ 'o') | (p ~ 'c')) shouldParse "bac" - ((p ~ 'c' ~ 'o') | (p ~ 'c')) shouldParse "baco" + p `shouldParse` "baaa" + p `shouldParse` "ba" + ((p ~ 'c' ~ 'o') | (p ~ 'c')) `shouldParse` "bac" + ((p ~ 'c' ~ 'o') | (p ~ 'c')) `shouldParse` "baco" } describe("parser \"(baaa | ba) aa\"") { val p: Parser[_] = ("baaa" | "ba") ~ "aa" - p shouldParse "baaaaa" - p shouldParse "baaa" + p `shouldParse` "baaaaa" + p `shouldParse` "baaa" } describe("parser \"succeed(a) b\"") { val p = succeed('a') ~ 'b' - p shouldParse "b" - p shouldNotParse "" + p `shouldParse` "b" + p `shouldNotParse` "" } describe("parser \"succeed(a) succeed(b)\"") { val p = succeed('a') ~ succeed('b') - p shouldParse "" + p `shouldParse` "" } describe("parser \"succeed(a) | succeed(b)\"") { val p = succeed('a') | succeed('b') - p shouldParse "" + p `shouldParse` "" } describe("parser \"(a a a | a a)+") { val p: Parser[_] = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' - describe("some(_)") { some(p) shouldParse "aaaa" } - describe("_ ~ 'b'") { (p ~ 'b') shouldParse "aaab" } + describe("some(_)") { some(p) `shouldParse` "aaaa" } + describe("_ ~ 'b'") { (p ~ 'b') `shouldParse` "aaab" } describe("some(_) ~ 'b'") { - (some(p) ~ 'b') shouldParse "aab" - (some(p) ~ 'b') shouldParse "aaab" - (some(p) ~ 'b') shouldParse "aaaaab" + (some(p) ~ 'b') `shouldParse` "aab" + (some(p) ~ 'b') `shouldParse` "aaab" + (some(p) ~ 'b') `shouldParse` "aaaaab" } describe("some(_ ~ 'a') ~ 'b'") { - (some(p ~ 'a') ~ 'b') shouldParse "aaaab" - (some(p ~ 'a') ~ 'b') shouldParse "aaab" + (some(p ~ 'a') ~ 'b') `shouldParse` "aaaab" + (some(p ~ 'a') ~ 'b') `shouldParse` "aaab" } } @@ -73,12 +75,12 @@ trait BasicCombinatorTests extends CustomMatchers { self: FunSpec with Matchers val largeInput = "a" * 100 - p shouldParse "a" - p shouldParse "aaaaaa" - p shouldParse largeInput - p shouldNotParse "" - p shouldNotParse ("b" + largeInput) - p shouldNotParse (largeInput + "b") + p `shouldParse` "a" + p `shouldParse` "aaaaaa" + p `shouldParse` largeInput + p `shouldNotParse` "" + p `shouldNotParse` "b" + largeInput + p `shouldNotParse` largeInput + "b" } } diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index d5b0f79..3c20470 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -1,10 +1,11 @@ package fcd package test -import org.scalatest._ -import org.scalatest.matchers._ +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.matchers.BeMatcher -trait CustomMatchers { self: FunSpec with Matchers => +trait CustomMatchers { self: AnyFunSpec & Matchers => // Due to initialization problems we have to use this pattern // of def and lazy val. @@ -16,19 +17,19 @@ trait CustomMatchers { self: FunSpec with Matchers => lazy val parsers = _parsers import parsers.{ Results, isSuccess, Parser, accepts, Elem } - implicit class ParserTests[T, P <% Parser[T]](p: => P) { - def shouldParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should parse "$s" """, tags:_*) { - accepts(p, s) shouldBe true + implicit class ParserTests[T](p: => Parser[T]) { + def shouldParse(s: Iterable[Elem], tags: Tag*) = + it (s"""should parse "$s" """, tags*) { + accepts(p, s) `shouldBe` true } - def shouldNotParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should not parse "$s" """, tags:_*) { - accepts(p, s) shouldBe false + def shouldNotParse(s: Iterable[Elem], tags: Tag*) = + it (s"""should not parse "$s" """, tags*) { + accepts(p, s) `shouldBe` false } } - class SuccessMatcher extends BeMatcher[Parser[_]] { - def apply(left: Parser[_]) = + class SuccessMatcher extends BeMatcher[Parser[?]] { + def apply(left: Parser[?]) = MatchResult( isSuccess(left), left.toString + " was not successful", diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index c1fc23d..04d6055 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -1,11 +1,12 @@ package fcd package test -import org.scalatest._ import scala.language.higherKinds import language.implicitConversions +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers -class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers +class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatchers with BasicCombinatorTests with NegationTests with LeftrecTests @@ -18,7 +19,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // it is necessary to rename some combinators since names are already // bound by scala test. - import parsers.{ fail => err, noneOf => nonOf, oneOf => one, not => neg } + import parsers.{ fail => err, noneOf => nonOf, oneOf => one, not => neg, succeed => succ } // This test illustrates how to write graph representations of the // parsers to a file. (To execute it replace `ignore` by `describe` and @@ -26,22 +27,22 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers describe("printing graph representations of parsers") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A A.printToFile("test.png") } describe("Examples in section 3") { import section_3_2._ - number shouldParse "42" + number `shouldParse` "42" } describe("Indentation with feed") { import section_3_4_improved._ val xs = many(some('x') ~ '\n') - indented(xs) shouldParse " xxx\n xxxx\n" - indented(xs) shouldParse " xxxxxxxxxx\n xxxxxxxxxx\n" + indented(xs) `shouldParse` " xxx\n xxxx\n" + indented(xs) `shouldParse` " xxxxxxxxxx\n xxxxxxxxxx\n" lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block @@ -50,16 +51,16 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) - stmt shouldParse "while (true):\n xxxxx\n xxxxx\n" - stmt shouldParse "while (true):\n while (true):\n xxxxx\n xxxx\n" + stmt `shouldParse` "while (true):\n xxxxx\n xxxxx\n" + stmt `shouldParse` "while (true):\n while (true):\n xxxxx\n xxxx\n" } describe("Indentation with delegation") { import section_3_5_improved._ val xs = many(some('x') ~ '\n') - indented(xs) shouldParse " xxx\n xxxx\n" - indented(xs) shouldParse " xxxxxxxxxx\n xxxxxxxxxx\n" + indented(xs) `shouldParse` " xxx\n xxxx\n" + indented(xs) `shouldParse` " xxxxxxxxxx\n xxxxxxxxxx\n" lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block @@ -68,8 +69,8 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) - stmt shouldParse "while (true):\n xxxxx\n xxxxx\n" - stmt shouldParse "while (true):\n while (true):\n xxxxx\n xxxx\n" + stmt `shouldParse` "while (true):\n xxxxx\n xxxxx\n" + stmt `shouldParse` "while (true):\n while (true):\n xxxxx\n xxxx\n" } describe("Simplified tables for paper") { @@ -77,12 +78,12 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val xs = many(some('x') ~ '\n') - table(xs) shouldParse """+---+ + table(xs) `shouldParse` """+---+ ^|xxx| ^+---+ ^""".stripMargin('^') - table(xs) shouldParse """+---+--------+------------+ + table(xs) `shouldParse` """+---+--------+------------+ ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| @@ -120,12 +121,12 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val xs = many(some('x') ~ '\n') - table(xs) shouldParse """+---+ + table(xs) `shouldParse` """+---+ ^|xxx| ^+---+ ^""".stripMargin('^') - table(xs) shouldParse """+---+--------+------------+ + table(xs) `shouldParse` """+---+--------+------------+ ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| @@ -135,7 +136,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers ^+---+--------+------------+ ^""".stripMargin('^') - table(xs) shouldNotParse """+---+--------+------------+ + table(xs) `shouldNotParse` """+---+--------+------------+ ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| @@ -148,7 +149,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val nestedTables: NT[Any] = table(xs | nestedTables) - nestedTables shouldParse """+---+--------+------------+ + nestedTables `shouldParse` """+---+--------+------------+ ^|xxx|+-+----+|xxxxxxxxxxxx| ^|xxx||x|xxxx||xxxxxxxxxxxx| ^|xxx|+-+----+|xxxxxxxxxxxx| @@ -158,7 +159,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers ^+---+--------+------------+ ^""".stripMargin('^') - nestedTables shouldNotParse """+---+--------+------------+ + nestedTables `shouldNotParse` """+---+--------+------------+ ^|xxx|+-+----+|xxxxxxxxxxxx| ^|xxx||x|oxxx||xxxxxxxxxxxx| ^|xxx|+-+----+|xxxxxxxxxxxx| @@ -175,32 +176,32 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers } describe("flatMap uses fixed point computation") { - lazy val fm: NT[Int] = succeed(1) | fm.flatMap { n => if (n < 5) succeed(n + 1) else err } + lazy val fm: NT[Int] = succ(1) | fm.flatMap { n => if (n < 5) succeed(n + 1) else err } - fm.results.toSet shouldBe Set(1,2,3,4,5) + fm.results.toSet `shouldBe` Set(1,2,3,4,5) } describe("Stream preprocessing") { - lazy val ones: NT[Any] = succeed(()) | '1' ~ ones - lazy val zeros: NT[Any] = succeed(()) | '0' ~ zeros + lazy val ones: NT[Any] = succ(()) | '1' ~ ones + lazy val zeros: NT[Any] = succ(()) | '0' ~ zeros lazy val oneszeros: Parser[Any] = '1' ~ '1' ~ '0' ~ '0' def bin(p: Parser[Any]): NT[Any] = done(p) | (('a' ~> bin(p << '1')) | ('b' ~> bin(p << '0'))) - ones shouldParse "1111" + ones `shouldParse` "1111" bin(ones).accepts - bin(ones) shouldParse "aaaaa" - bin(ones) shouldNotParse "aaaaab" - bin(zeros) shouldParse "bbbbb" - bin(zeros) shouldNotParse "bbbbba" - bin(oneszeros) shouldParse "aabb" - bin(oneszeros) shouldNotParse "aabbb" - - bin(ones) shouldNotParse ("b" * 50) + bin(ones) `shouldParse` "aaaaa" + bin(ones) `shouldNotParse` "aaaaab" + bin(zeros) `shouldParse` "bbbbb" + bin(zeros) `shouldNotParse` "bbbbba" + bin(oneszeros) `shouldParse` "aabb" + bin(oneszeros) `shouldNotParse` "aabbb" + + bin(ones) `shouldNotParse` ("b" * 50) } @@ -242,12 +243,12 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers def IMAP[T](body: Parser[T]): Parser[T] = header >> feedNTimes(body) - IMAP(many('a')) shouldParse "{ 1 }a" - IMAP(many('a')) shouldNotParse "{ 1 }" - IMAP(many('a')) shouldNotParse "{ 1 }aa" - IMAP(many('a')) shouldParse "{ 7 }aaaaaaa" - IMAP(many('a')) shouldNotParse "{ 7 }aaaaaaaa" - IMAP(many('a')) shouldNotParse "{ 7 }" + IMAP(many('a')) `shouldParse` "{ 1 }a" + IMAP(many('a')) `shouldNotParse` "{ 1 }" + IMAP(many('a')) `shouldNotParse` "{ 1 }aa" + IMAP(many('a')) `shouldParse` "{ 7 }aaaaaaa" + IMAP(many('a')) `shouldNotParse` "{ 7 }aaaaaaaa" + IMAP(many('a')) `shouldNotParse` "{ 7 }" } @@ -262,11 +263,11 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val p = 'a' ~ 'a' ~ 'a' val q = 'b' ~ 'b' ~ 'b' - interleave(p, q) shouldParse "ababab" - interleave(p, q) shouldNotParse "abababab" - interleave(p, q) shouldNotParse "abab" - interleave(p, q) shouldNotParse "ab" - interleave(p, q) shouldNotParse "" + interleave(p, q) `shouldParse` "ababab" + interleave(p, q) `shouldNotParse` "abababab" + interleave(p, q) `shouldNotParse` "abab" + interleave(p, q) `shouldNotParse` "ab" + interleave(p, q) `shouldNotParse` "" } // Usecase. Indentation that also skips empty lines @@ -287,11 +288,11 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val xs = many(some('x') ~ '\n') - indent(xs) shouldParse "" - indent(xs) shouldParse " xx\n" - indent(xs) shouldParse " xxxxx\n" - indent(xs) shouldParse " xxxxx\n xxxxxxx\n" - indent(xs) shouldParse """ xxxxx + indent(xs) `shouldParse` "" + indent(xs) `shouldParse` " xx\n" + indent(xs) `shouldParse` " xxxxx\n" + indent(xs) `shouldParse` " xxxxx\n xxxxxxx\n" + indent(xs) `shouldParse` """ xxxxx | xxxxxxx | xxxxxxxx | xxxxxxxxx @@ -310,62 +311,62 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers | xxxxxxxxxxxxxx |""".stripMargin('|') - indent(indent(xs)) shouldParse " xx\n" - indent(indent(xs)) shouldParse " xxxxx\n" - indent(indent(xs)) shouldParse " xxxxx\n xxxxxxx\n" + indent(indent(xs)) `shouldParse` " xx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n xxxxxxx\n" - indent(indent(xs)) shouldParse " xxxxx\n\n xxxxxxx\n" - indent(indent(xs)) shouldParse " xxxxx\n \n xxxxxxx\n" - indent(indent(xs)) shouldParse " xxxxx\n \n\n \n xxxxxxx\n" - indent(indent(xs)) shouldNotParse " xxxxx\n \n\n \n xxxxxxx\n" - indent(indent(xs)) shouldNotParse " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n\n xxxxxxx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n \n xxxxxxx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) `shouldNotParse` " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) `shouldNotParse` " xxxxx\n \n\n \n xxxxxxx\n" } describe("Parens parser") { import section_4_2.parens - parens shouldParse "" - parens shouldParse "()" - parens shouldParse "(())" - parens shouldNotParse "(()" + parens `shouldParse` "" + parens `shouldParse` "()" + parens `shouldParse` "(())" + parens `shouldNotParse` "(()" } describe("Retroactively, allow spaces in arbitrary positions") { import section_4_2.{ spaced, parens } val sp = spaced(parens) - sp shouldParse "((()))" - sp shouldParse "((( )))" - sp shouldParse "( (( )))" - sp shouldParse "( (( ))) " - sp shouldParse "( (\n (\n )) ) " - sp shouldNotParse "( ( ( )) " + sp `shouldParse` "((()))" + sp `shouldParse` "((( )))" + sp `shouldParse` "( (( )))" + sp `shouldParse` "( (( ))) " + sp `shouldParse` "( (\n (\n )) ) " + sp `shouldNotParse` "( ( ( )) " } describe("Allowing parens in code blocks") { import section_4_2._ - as shouldParse "aaa\n" - as shouldParse "\n" - as shouldParse "aa\naa\n" + as `shouldParse` "aaa\n" + as `shouldParse` "\n" + as `shouldParse` "aa\naa\n" - both shouldParse "a\n" - both shouldParse """aaa + both `shouldParse` "a\n" + both `shouldParse` """aaa |~~~ |() |~~~ |aaaaa |""".stripMargin('|') - both shouldParse "a \n\n~~~ \n()\n~~~\naaa\n" + both `shouldParse` "a \n\n~~~ \n()\n~~~\naaa\n" - both shouldNotParse """aaa + both `shouldNotParse` """aaa |~~~ |( |~~~ |aaaaa |""".stripMargin('|') - both shouldParse """aaa + both `shouldParse` """aaa |~~~ |((()) |~~~ @@ -383,27 +384,27 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers import section_4_2._ - unescape(many('\n')) shouldParse """\n\n\n""" - unescape(many("\n" | "a")) shouldParse """\na\n\n""" - unescape(many("\n" | "a")) shouldParse """\na\n\naaa""" + unescape(many('\n')) `shouldParse` """\n\n\n""" + unescape(many("\n" | "a")) `shouldParse` """\na\n\n""" + unescape(many("\n" | "a")) `shouldParse` """\na\n\naaa""" } describe("Combined examples") { import section_4_2._ - combined shouldParse """aaa + combined `shouldParse` """aaa ^""".stripMargin('^') - combined shouldParse """+----+ + combined `shouldParse` """+----+ ^|aaaa| ^+----+ ^""".stripMargin('^') - combined shouldParse """+----+ + combined `shouldParse` """+----+ ^|aa | ^+----+ ^""".stripMargin('^') - combined shouldParse """+----+ + combined `shouldParse` """+----+ ^|aaaa| ^|~~~ | ^|(())| @@ -412,13 +413,13 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers ^+----+ ^""".stripMargin('^') - combined shouldParse """+----+ + combined `shouldParse` """+----+ ^|aa | ^|aaaa| ^+----+ ^""".stripMargin('^') - combined shouldParse """+----+ + combined `shouldParse` """+----+ ^|aa | ^|~~~ | ^|(())| @@ -432,40 +433,40 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers describe("Biased choice") { val p = biasedAlt("foo", some(letter)) ~ "bar" - p shouldParse "foobar" - p shouldNotParse "foozbar" - p shouldParse "barbar" + p `shouldParse` "foobar" + p `shouldNotParse` "foozbar" + p `shouldParse` "barbar" // this test shows, that we can only implement a locally biased choice val q = biasedAlt("foo", "f") ~ "oo" // should actually *not* parse "foo", but does: - q shouldParse "foo" + q `shouldParse` "foo" } describe("Greedy repitition") { it ("should return only the result of the longest match") { - greedySome(some('a')) parse "" shouldBe List() - greedyMany(some('a')) parse "" shouldBe List(List()) - greedySome(some('a')) parse "a" shouldBe List(List(List('a'))) - greedySome(some('a')) parse "aaa" shouldBe List(List(List('a', 'a', 'a'))) + greedySome(some('a')) parse "" `shouldBe` List() + greedyMany(some('a')) parse "" `shouldBe` List(List()) + greedySome(some('a')) parse "a" `shouldBe` List(List(List('a'))) + greedySome(some('a')) parse "aaa" `shouldBe` List(List(List('a', 'a', 'a'))) } - it ("should also return longest match if other parser succeeded first") { + it ("should also return longest match if other parser succed first") { lazy val p = some("ab") | some("a") | some("b") - greedySome(p) parse "ab" shouldBe List(List(List("ab"))) - greedySome(p) parse "abab" shouldBe List(List(List("ab", "ab"))) - greedySome(p) parse "abbab" shouldBe List(List(List("ab"), List("b"), List("ab"))) - greedySome(p) parse "abbaab" shouldBe List(List(List("ab"), List("b"), List("a", "a"), List("b"))) - greedySome(p) parse "aaaab" shouldBe List(List(List("a", "a", "a", "a"), List("b"))) + greedySome(p) parse "ab" `shouldBe` List(List(List("ab"))) + greedySome(p) parse "abab" `shouldBe` List(List(List("ab", "ab"))) + greedySome(p) parse "abbab" `shouldBe` List(List(List("ab"), List("b"), List("ab"))) + greedySome(p) parse "abbaab" `shouldBe` List(List(List("ab"), List("b"), List("a", "a"), List("b"))) + greedySome(p) parse "aaaab" `shouldBe` List(List(List("a", "a", "a", "a"), List("b"))) lazy val q = "ab" | "a" | "b" - greedySome(q) parse "ab" shouldBe List(List("ab")) - greedySome(q) parse "abab" shouldBe List(List("ab", "ab")) - greedySome(q) parse "abbab" shouldBe List(List("ab", "b", "ab")) - greedySome(q) parse "abbaab" shouldBe List(List("ab", "b", "a", "ab")) - greedySome(q) parse "aaaab" shouldBe List(List("a", "a", "a", "ab")) + greedySome(q) parse "ab" `shouldBe` List(List("ab")) + greedySome(q) parse "abab" `shouldBe` List(List("ab", "ab")) + greedySome(q) parse "abbab" `shouldBe` List(List("ab", "b", "ab")) + greedySome(q) parse "abbaab" `shouldBe` List(List("ab", "b", "a", "ab")) + greedySome(q) parse "aaaab" `shouldBe` List(List("a", "a", "a", "ab")) } // This shows that our implementation is only locally greedy @@ -488,15 +489,15 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val r: Parser[Any] = ("oo" | "b") val ex: Parser[Any] = biasedAlt(p, q) ~ r - // ex shouldNotParse "foo" //-> fails + // ex `shouldNotParse` "foo" //-> fails // If the right-hand-side `r` is locally known the parser can be // rewritten to: val rewrite = p ~ r | (neg(p ~ always) &> (q ~ r)) - rewrite shouldNotParse "foo" - rewrite shouldParse "foooo" - rewrite shouldParse "fb" + rewrite `shouldNotParse` "foo" + rewrite `shouldParse` "foooo" + rewrite `shouldParse` "fb" } // Since "lexing" is performed after indentation checking, but indentation @@ -551,10 +552,10 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') val multilineString: Parser[String] = consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") - singleString shouldParse "\"hello world\"" - singleString shouldNotParse "\"hello\nworld\"" - singleString shouldParse "\"hello'''world\"" - multilineString shouldParse "'''Hello \" \n\" world'''" + singleString `shouldParse` "\"hello world\"" + singleString `shouldNotParse` "\"hello\nworld\"" + singleString `shouldParse` "\"hello'''world\"" + multilineString `shouldParse` "'''Hello \" \n\" world'''" // for testing val collect = consumed(always) ^^ { x => x.mkString } @@ -566,7 +567,7 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers (p parse "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n") should be (List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n")) } // here we can already observe performance problems (about 400ms): - p shouldParse "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n some content that is not a program, but could be one \n. # ''' some comment \nIt contains newlines \n, \"and some Strings\". Even Multiline strings with '''newlines\n'''." + p `shouldParse` "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n some content that is not a program, but could be one \n. # ''' some comment \nIt contains newlines \n, \"and some Strings\". Even Multiline strings with '''newlines\n'''." lazy val noText: Parser[Any] = comment | singleString | multilineString @@ -590,21 +591,21 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers (one(opening) >> { paren => always ~ pairs(paren) }) &> transform[Any](noText | nonOf(opening) & nonOf(closing) , err, skip)(dyck) - parens shouldParse "()" - parens shouldParse "(())" - parens shouldParse "(()()())" - parens shouldParse "(()[]())" - parens shouldParse "(()[()[]]())" - parens shouldNotParse "(()[()[]())" - parens shouldNotParse "a (()) a" - parens shouldNotParse "(()" - parens shouldParse "( hello world ())" - parens shouldParse "( [# foo \"()) \n ()]{\" [ \" hello } world ())" - parens shouldNotParse "( [# foo \"()) \n ()]{\" [ \" hello world ())" - parens shouldNotParse "( [# foo \"()) \n ()]\" [ \" hello } world ())" - parens shouldNotParse "( [# foo \"()) \n )]{\" [ \" hello } world ())" - parens shouldParse "( hello \" ) \"world ())" - parens shouldNotParse "( hello \" ) \"" + parens `shouldParse` "()" + parens `shouldParse` "(())" + parens `shouldParse` "(()()())" + parens `shouldParse` "(()[]())" + parens `shouldParse` "(()[()[]]())" + parens `shouldNotParse` "(()[()[]())" + parens `shouldNotParse` "a (()) a" + parens `shouldNotParse` "(()" + parens `shouldParse` "( hello world ())" + parens `shouldParse` "( [# foo \"()) \n ()]{\" [ \" hello } world ())" + parens `shouldNotParse` "( [# foo \"()) \n ()]{\" [ \" hello world ())" + parens `shouldNotParse` "( [# foo \"()) \n ()]\" [ \" hello } world ())" + parens `shouldNotParse` "( [# foo \"()) \n )]{\" [ \" hello } world ())" + parens `shouldParse` "( hello \" ) \"world ())" + parens `shouldNotParse` "( hello \" ) \"" lazy val escapedNL = '\\' ~ '\n' @@ -629,8 +630,8 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers List("'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n") ) } - joiningIndent(collect) shouldParse " '''some \n multiline \n'''\n ( # comment (\n )\n" - joiningIndent(collect) shouldNotParse " '''some \n multiline \n''\n ( # comment (\n )\n" + joiningIndent(collect) `shouldParse` " '''some \n multiline \n'''\n ( # comment (\n )\n" + joiningIndent(collect) `shouldNotParse` " '''some \n multiline \n''\n ( # comment (\n )\n" val WS: Parser[Any] = ' ' @@ -648,17 +649,17 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val stmts: NT[Any] = someSep(stmt, spaces) lazy val suite: NT[Any] = lineEnd ~> joiningIndent(stmts) - stmt shouldParse "def foo():\n '''hello\n '''\n" - stmt shouldNotParse "def foo():\n \"'''hello\n '''\"\n" - stmt shouldParse "def foo():\n '''hello\n ''' # some comment \n" - stmt shouldNotParse "def foo():\n # '''hello\n ''' some comment \n" - stmt shouldParse "def foo():\n []\n" - stmt shouldParse "def foo():\n [foo, bar]\n" - stmt shouldParse "def foo():\n [foo, \nbar]\n" - stmt shouldNotParse "def foo():\n \"[foo, \nbar]\"\n" - stmt shouldParse "def foo():\n \"[foo, bar]\"\n" - stmt shouldParse "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" - stmt shouldParse "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" + stmt `shouldParse` "def foo():\n '''hello\n '''\n" + stmt `shouldNotParse` "def foo():\n \"'''hello\n '''\"\n" + stmt `shouldParse` "def foo():\n '''hello\n ''' # some comment \n" + stmt `shouldNotParse` "def foo():\n # '''hello\n ''' some comment \n" + stmt `shouldParse` "def foo():\n []\n" + stmt `shouldParse` "def foo():\n [foo, bar]\n" + stmt `shouldParse` "def foo():\n [foo, \nbar]\n" + stmt `shouldNotParse` "def foo():\n \"[foo, \nbar]\"\n" + stmt `shouldParse` "def foo():\n \"[foo, bar]\"\n" + stmt `shouldParse` "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" + stmt `shouldParse` "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" } describe("Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`") { @@ -668,13 +669,12 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val p_c = p <<< "c" it ("should preserve the invariant when performing optimization rewrites") { - p_a.accepts shouldBe false - p_a.accepts shouldBe (!p_a.results.isEmpty) - p_b.accepts shouldBe false - p_b.accepts shouldBe (!p_b.results.isEmpty) - p_c.accepts shouldBe true - p_c.accepts shouldBe (!p_c.results.isEmpty) + p_a.accepts `shouldBe` false + p_a.accepts `shouldBe` (!p_a.results.isEmpty) + p_b.accepts `shouldBe` false + p_b.accepts `shouldBe` (!p_b.results.isEmpty) + p_c.accepts `shouldBe` true + p_c.accepts `shouldBe` (!p_c.results.isEmpty) } } - } diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 6fac774..05c4995 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -1,25 +1,28 @@ package fcd package test -import org.scalatest._ -trait LeftrecTests extends CustomMatchers { self: FunSpec & Matchers => +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers + +trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => import parsers._ describe("lazyness of alt") { describe("p = p | .") { - lazy val p: NT[Any] = p | any + lazy val p = p | any p `shouldParse` "a" } describe("p = p ~ . | .") { - lazy val p: NT[_] = p ~ any | any + lazy val p: NT[?] = p ~ any | any p `shouldParse` "a" } describe("p = . | p ~ .") { - lazy val p: NT[_] = any | p ~ any + lazy val p: NT[?] = any | p ~ any p `shouldParse` "a" } @@ -53,7 +56,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec & Matchers => describe("left recursion") { describe("A = A ~ a | empty") { - lazy val A: NT[_] = A ~ 'a' | succeed(42) + lazy val A: NT[?] = A ~ 'a' | succeed(42) A `shouldParse` "" A `shouldParse` "a" @@ -61,7 +64,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec & Matchers => } describe("A = empty | A ~ a ") { - lazy val A: NT[_] = succeed(42) | A ~ 'a' + lazy val A: NT[?] = succeed(42) | A ~ 'a' A `shouldParse` "" A `shouldParse` "a" @@ -121,7 +124,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec & Matchers => } describe("A = A ~ b | c") { - lazy val A: NT[_] = A ~ 'b' | 'c' + lazy val A: NT[?] = A ~ 'b' | 'c' A `shouldParse` "c" A `shouldParse` "cb" @@ -140,7 +143,7 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec & Matchers => // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[_] = charParser('d') | succeed("done") + lazy val B: NT[?] = charParser('d') | succeed("done") A `shouldParse` "c" A `shouldParse` "cb" diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 0cf39e0..6b29912 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -1,59 +1,61 @@ package fcd package test -import org.scalatest._ +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers -trait NegationTests extends CustomMatchers { self: FunSpec with Matchers => +trait NegationTests extends CustomMatchers { self: AnyFunSpec & Matchers => import parsers._ import parsers.{ not => neg } describe("parser \"not(aa)\"") { val p = neg("aa") - p shouldParse "a" - p shouldNotParse "aa" - p shouldParse "aac" - p shouldParse "abc" + p `shouldParse` "a" + p `shouldNotParse` "aa" + p `shouldParse` "aac" + p `shouldParse` "abc" } describe("parser \"not(aa) & lower*\"") { val p = neg("aa") & many(lower) - p shouldParse "a" - p shouldParse "bc" - p shouldParse "ab" - p shouldNotParse "aa" - p shouldParse "abc" - p shouldParse "aac" - p shouldParse "aacdd" + p `shouldParse` "a" + p `shouldParse` "bc" + p `shouldParse` "ab" + p `shouldNotParse` "aa" + p `shouldParse` "abc" + p `shouldParse` "aac" + p `shouldParse` "aacdd" } describe("parser \"not(aa ~ .*) & lower*\"") { val p = neg("aa" ~ many(any)) & many(lower) - p shouldParse "a" - p shouldParse "bc" - p shouldParse "ab" - p shouldNotParse "aa" - p shouldParse "abc" - p shouldNotParse "aac" - p shouldNotParse "aacadasdasdasd" + p `shouldParse` "a" + p `shouldParse` "bc" + p `shouldParse` "ab" + p `shouldNotParse` "aa" + p `shouldParse` "abc" + p `shouldNotParse` "aac" + p `shouldNotParse` "aacadasdasdasd" } describe("parser \"not(.* ~ abc ~ .*)\"") { val p = neg(many(any) ~ "abc" ~ many(any)) - p shouldParse "" - p shouldParse "xx" - p shouldParse "xxabxx" - p shouldNotParse "xxabcxxx" - p shouldNotParse "xxabc" - p shouldNotParse "abcxxx" + p `shouldParse` "" + p `shouldParse` "xx" + p `shouldParse` "xxabxx" + p `shouldNotParse` "xxabcxxx" + p `shouldNotParse` "xxabc" + p `shouldNotParse` "abcxxx" } describe("parser \"not((baaa | ba) ~ aa ~ .*) & lower*\"") { - val p: Parser[_] = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) - p shouldNotParse "baaa" - p shouldNotParse "baaaxx" - p shouldParse "" - p shouldParse "baba" - p shouldParse "baacxx" + val p = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) + p `shouldNotParse` "baaa" + p `shouldNotParse` "baaaxx" + p `shouldParse` "" + p `shouldParse` "baba" + p `shouldParse` "baacxx" } } diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index fe7cdb6..1409679 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -1,286 +1,1015 @@ package fcd package test -import org.scalatest._ import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers -class PythonParserTests extends FunSpec with Matchers { +class PythonParserTests extends AnyFunSpec with Matchers { val parsers = PythonParsers import parsers._ - describe ("indented python parser (lexeme based)") { - indented(many(many(Id("A")) <~ NL)) shouldParseWith ( - List(WS, WS, Id("A"), Id("A"), NL, - WS, WS, Id("A"), NL), - List(List(Id("A"), Id("A")), List(Id("A")))) + describe("indented python parser (lexeme based)") { + indented(many(many(Id("A")) <~ NL)) `shouldParseWith` (List( + WS, + WS, + Id("A"), + Id("A"), + NL, + WS, + WS, + Id("A"), + NL + ), + List(List(Id("A"), Id("A")), List(Id("A")))) } - describe ("implicit line joining") { + describe("implicit line joining") { implicit def keyword(kw: Symbol): Lexeme = KW(kw.name) implicit def punctuation(p: String): Lexeme = Punct(p) - val p = many(WS | id | "(" | ")" | "[" | "]") - val a = Id("A") - val BS = Punct("\\") - + val p = many(WS | id | "(" | ")" | "[" | "]") + val a = Id("A") + val BS = Punct("\\") - dyck shouldParse List[Lexeme]("(", "(", ")", ")") - dyck shouldNotParse List[Lexeme]("(", "(", ")") - extDyck shouldParse List[Lexeme]("(", a, "(", a, NL, a, ")", a, ")") - extDyck shouldNotParse List[Lexeme](a, "(", a, "(", a, NL, a, ")", a, ")", a) + dyck `shouldParse` List[Lexeme]("(", "(", ")", ")") + dyck `shouldNotParse` List[Lexeme]("(", "(", ")") + extDyck `shouldParse` List[Lexeme]("(", a, "(", a, NL, a, ")", a, ")") + extDyck `shouldNotParse` List[Lexeme]( + a, + "(", + a, + "(", + a, + NL, + a, + ")", + a, + ")", + a + ) - implicitJoin(p) shouldParse List[Lexeme](a, a, a, a, a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, a, NL, a, a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, NL, a, ")", a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, NL, a, a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, "(", NL, a, ")", a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, "(", NL, a, ")", ")", a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, "[", NL, a, "]", ")", a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, "[", NL, a, ")", "]", a) + implicitJoin(p) `shouldParse` List[Lexeme](a, a, a, a, a) + implicitJoin(p) `shouldNotParse` List[Lexeme](a, a, a, NL, a, a) + implicitJoin(p) `shouldParse` List[Lexeme](a, a, "(", a, NL, a, ")", a) + implicitJoin(p) `shouldNotParse` List[Lexeme](a, a, "(", a, NL, a, a) + implicitJoin(p) `shouldNotParse` List[Lexeme]( + a, + a, + "(", + a, + "(", + NL, + a, + ")", + a + ) + implicitJoin(p) `shouldParse` List[Lexeme]( + a, + a, + "(", + a, + "(", + NL, + a, + ")", + ")", + a + ) + implicitJoin(p) `shouldParse` List[Lexeme]( + a, + a, + "(", + a, + "[", + NL, + a, + "]", + ")", + a + ) + implicitJoin(p) `shouldNotParse` List[Lexeme]( + a, + a, + "(", + a, + "[", + NL, + a, + ")", + "]", + a + ) - explicitJoin(p) shouldParse List[Lexeme](a, a, a, BS, NL, a, a) - explicitJoin(p) shouldParse List[Lexeme](a, a, a, BS, NL, a, a, BS, NL, a, a) + explicitJoin(p) `shouldParse` List[Lexeme](a, a, a, BS, NL, a, a) + explicitJoin(p) `shouldParse` List[Lexeme]( + a, + a, + a, + BS, + NL, + a, + a, + BS, + NL, + a, + a + ) val input = List[Lexeme]( - a, NL, - Comment("Hey!!"), a, BS, NL, - a, a, "(", a, "[", a, BS, NL, - a, NL, - a, "]", ")", a) + a, + NL, + Comment("Hey!!"), + a, + BS, + NL, + a, + a, + "(", + a, + "[", + a, + BS, + NL, + a, + NL, + a, + "]", + ")", + a + ) val inputWithoutComments = List[Lexeme]( - a, NL, - a, BS, NL, - a, a, "(", a, "[", a, BS, NL, - a, NL, - a, "]", ")", a) - - val inputWithoutExplicit = List[Lexeme]( - a, NL, a, - a, a, "(", a, "[", a, - a, NL, - a, "]", ")", a) - - val inputResult = List[Lexeme]( - a, NL, + NL, + a, + BS, + NL, + a, a, - a, a, "(", a, "[", a, + "(", a, - a, "]", ")", a) + "[", + a, + BS, + NL, + a, + NL, + a, + "]", + ")", + a + ) + + val inputWithoutExplicit = + List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, NL, a, "]", ")", a) + + val inputResult = + List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, a, "]", ")", a) val collect = consumed(many(any)) - stripComments(collect) shouldParseWith (input, inputWithoutComments) - explicitJoin(collect) shouldParseWith (inputWithoutComments, inputWithoutExplicit) - implicitJoin(collect) shouldParseWith (inputWithoutExplicit, inputResult) + stripComments(collect) `shouldParseWith` (input, inputWithoutComments) + explicitJoin( + collect + ) `shouldParseWith` (inputWithoutComments, inputWithoutExplicit) + implicitJoin(collect) `shouldParseWith` (inputWithoutExplicit, inputResult) - preprocess(file_input) shouldParse List[Lexeme]( - a, ";", a, "=", 'yield, 'from, a, "=", a, ";", NL, + preprocess(file_input) `shouldParse` List[Lexeme]( + a, + ";", + a, + "=", + "yield", + 'from, + a, + "=", + a, + ";", + NL, NL, - a, ";", a, NL, - EOS) + a, + ";", + a, + NL, + EOS + ) - preprocess(file_input) shouldParse List[Lexeme]( - a, "=", a, ">>", a, "*", a, NL, - EOS) + preprocess(file_input) `shouldParse` List[Lexeme]( + a, + "=", + a, + ">>", + a, + "*", + a, + NL, + EOS + ) val sampleProg = List[Lexeme]( - 'def, WS, Id("fun"), "(", WS, a, WS, ")", ":", NL, - WS, WS, a, "+=", WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS) + "def", + WS, + Id("fun"), + "(", + WS, + a, + WS, + ")", + ":", + NL, + WS, + WS, + a, + "+=", + WS, + a, + NL, + WS, + WS, + a, + "*=", + a, + NL, + EOS + ) - (stripComments(collect) parse sampleProg) shouldBe List(sampleProg) - (explicitJoin(collect) parse sampleProg) shouldBe List(sampleProg) - (implicitJoin(collect) parse sampleProg) shouldBe List(sampleProg) + (stripComments(collect) parse sampleProg) `shouldBe` List(sampleProg) + (explicitJoin(collect) parse sampleProg) `shouldBe` List(sampleProg) + (implicitJoin(collect) parse sampleProg) `shouldBe` List(sampleProg) - preprocess(file_input) shouldParse sampleProg + preprocess(file_input) `shouldParse` sampleProg val sampleProg2 = List[Lexeme]( - 'def, WS, Id("fun"), "(", NL, - WS, a, WS, - NL, ")", ":", NL, - WS, WS, a, "+=", Comment("Test"), BS, NL, - WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS) + "def", + WS, + Id("fun"), + "(", + NL, + WS, + a, + WS, + NL, + ")", + ":", + NL, + WS, + WS, + a, + "+=", + Comment("Test"), + BS, + NL, + WS, + a, + NL, + WS, + WS, + a, + "*=", + a, + NL, + EOS + ) - (preprocess(collect) parse sampleProg2) shouldBe List(sampleProg) + (preprocess(collect) parse sampleProg2) `shouldBe` List(sampleProg) - preprocess(file_input) shouldParse sampleProg2 + preprocess(file_input) `shouldParse` sampleProg2 // https://en.wikibooks.org/wiki/Python_Programming/Decorators val traceProg = List[Lexeme]( - Comment("define the Trace class that will be "), NL, - Comment("invoked using decorators"), NL, - 'class, WS, Id("Trace"), "(", Id("object"), ")", ":", NL, - WS, WS, WS, WS, 'def, WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, - WS, WS, WS, WS, NL, - WS, WS, WS, WS, WS, WS, 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'return, WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, - EOS + Comment("define the Trace class that will be "), + NL, + Comment("invoked using decorators"), + NL, + "class", + WS, + Id("Trace"), + "(", + Id("object"), + ")", + ":", + NL, + WS, + WS, + WS, + WS, + "def", + WS, + Id("__init__"), + "(", + Id("self"), + ")", + ":", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("self"), + ".", + Id("f"), + WS, + "=", + WS, + Id("f"), + NL, + WS, + WS, + WS, + WS, + NL, + WS, + WS, + WS, + WS, + WS, + WS, + "def", + WS, + Id("__call__"), + "(", + Id("self"), + WS, + ",", + "*", + Id("args"), + ",", + WS, + "**", + Id("kwargs"), + ")", + ":", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("print"), + "(", + Str("entering function "), + WS, + "+", + WS, + Id("self"), + ".", + Id("f"), + ".", + Id("__name__"), + ")", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("i"), + "=", + Num("0"), + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + "for", + WS, + Id("arg"), + WS, + "in", + WS, + Id("args"), + ":", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("print"), + "(", + Str("arg {0}: {1}"), + ".", + Id("format"), + "(", + Id("i"), + ",", + Id("arg"), + ")", + ")", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("i"), + "=", + Id("i"), + "+", + Num("1"), + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + "return", + WS, + Id("self"), + ".", + Id("f"), + "(", + "*", + Id("args"), + ",", + WS, + "**", + Id("kwargs"), + ")", + NL, + EOS ) - argument shouldParse List[Lexeme]("*", Id("kwargs")) - argument shouldParse List[Lexeme]("**", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs2")) - arglist shouldParse List[Lexeme](Id("kwargs"), ",", WS, Id("kwargs")) - arglist shouldParse List[Lexeme]("*", Id("kwargs"), ",", "*", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs"), ",", "**", Id("kwargs")) - arglist shouldParse List[Lexeme]("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) + argument `shouldParse` List[Lexeme]("*", Id("kwargs")) + argument `shouldParse` List[Lexeme]("**", Id("kwargs")) + arglist `shouldParse` List[Lexeme]("**", Id("kwargs2")) + arglist `shouldParse` List[Lexeme](Id("kwargs"), ",", WS, Id("kwargs")) + arglist `shouldParse` List[Lexeme]( + "*", + Id("kwargs"), + ",", + "*", + Id("kwargs") + ) + arglist `shouldParse` List[Lexeme]( + "**", + Id("kwargs"), + ",", + "**", + Id("kwargs") + ) + arglist `shouldParse` List[Lexeme]( + "*", + Id("kwargs"), + ",", + WS, + "*", + Id("kwargs") + ) + arglist `shouldParse` List[Lexeme]( + "**", + Id("kwargs"), + ",", + WS, + "**", + Id("kwargs") + ) - arglist shouldParse List[Lexeme]("(", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List[Lexeme]("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List[Lexeme]("(", "*", Id("args"), ",", WS, "*", Id("kwargs"), ")") - test shouldParse List[Lexeme](Id("f"), "(", Id("args"), ",", WS, Id("kwargs"), ")") - test shouldParse List[Lexeme](Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") + arglist `shouldParse` List[Lexeme]( + "(", + Id("args"), + ",", + WS, + Id("kwargs"), + ")" + ) + arglist `shouldParse` List[Lexeme]( + "(", + "*", + Id("args"), + ",", + WS, + Id("kwargs"), + ")" + ) + arglist `shouldParse` List[Lexeme]( + "(", + "*", + Id("args"), + ",", + WS, + "*", + Id("kwargs"), + ")" + ) + test `shouldParse` List[Lexeme]( + Id("f"), + "(", + Id("args"), + ",", + WS, + Id("kwargs"), + ")" + ) + test `shouldParse` List[Lexeme]( + Id("f"), + "(", + "*", + Id("args"), + ",", + WS, + "**", + Id("kwargs"), + ")" + ) - test shouldParse List[Lexeme](Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") + test `shouldParse` List[Lexeme]( + Id("print"), + "(", + Str("entering function "), + WS, + "+", + WS, + Id("self"), + ".", + Id("f"), + ".", + Id("__name__"), + ")" + ) // TODO is already ambiguous - // (stmt parse List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size shouldBe 1 + // (stmt parse List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size `shouldBe` 1 - // preprocess(file_input) shouldParse traceProg + // preprocess(file_input) `shouldParse` traceProg // (stmt parse List[Lexeme]( - // 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - // WS, WS, Id("print"), NL)).size shouldBe 1 - - stmt shouldNotParse List[Lexeme]( - 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, Id("print"), NL, - // this line is indented too far - WS, WS, WS, WS, WS, WS, Id("print"), NL) + // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + // WS, WS, Id("print"), NL)).size `shouldBe` 1 + + stmt `shouldNotParse` List[Lexeme]( + "def", + WS, + Id("__call__"), + "(", + Id("self"), + WS, + ",", + "*", + Id("args"), + ",", + WS, + "**", + Id("kwargs"), + ")", + ":", + NL, + WS, + WS, + "for", + WS, + Id("arg"), + WS, + "in", + WS, + Id("args"), + ":", + NL, + WS, + WS, + WS, + WS, + Id("print"), + NL, + // this line is indented too far + WS, + WS, + WS, + WS, + WS, + WS, + Id("print"), + NL + ) // with empty lines val traceProg2 = List[Lexeme]( - Comment("define the Trace class that will be "), NL, - Comment("invoked using decorators"), NL, - 'class, WS, Id("Trace"), "(", Id("object"), ")", ":", NL, - WS, WS, WS, WS, 'def, WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, - NL, - WS, WS, WS, WS, 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, - WS, WS, NL, - NL, - NL, - NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'return, WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, - EOS + Comment("define the Trace class that will be "), + NL, + Comment("invoked using decorators"), + NL, + "class", + WS, + Id("Trace"), + "(", + Id("object"), + ")", + ":", + NL, + WS, + WS, + WS, + WS, + "def", + WS, + Id("__init__"), + "(", + Id("self"), + ")", + ":", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("self"), + ".", + Id("f"), + WS, + "=", + WS, + Id("f"), + NL, + NL, + WS, + WS, + WS, + WS, + "def", + WS, + Id("__call__"), + "(", + Id("self"), + WS, + ",", + "*", + Id("args"), + ",", + WS, + "**", + Id("kwargs"), + ")", + ":", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("print"), + "(", + Str("entering function "), + WS, + "+", + WS, + Id("self"), + ".", + Id("f"), + ".", + Id("__name__"), + ")", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("i"), + "=", + Num("0"), + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + "for", + WS, + Id("arg"), + WS, + "in", + WS, + Id("args"), + ":", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("print"), + "(", + Str("arg {0}: {1}"), + ".", + Id("format"), + "(", + Id("i"), + ",", + Id("arg"), + ")", + ")", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + Id("i"), + "=", + Id("i"), + "+", + Num("1"), + NL, + WS, + WS, + NL, + NL, + NL, + NL, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + WS, + "return", + WS, + Id("self"), + ".", + Id("f"), + "(", + "*", + Id("args"), + ",", + WS, + "**", + Id("kwargs"), + ")", + NL, + EOS ) - preprocess(file_input) shouldParse traceProg2 - (preprocess(file_input) parse traceProg2).size shouldBe 1 + preprocess(file_input) `shouldParse` traceProg2 + (preprocess(file_input) parse traceProg2).size `shouldBe` 1 // suite should parse this: - val dummyin = List[Lexeme](NL, - WS, 'def, WS, Id("f"), "(", ")", ":", NL, - WS, WS, 'def, WS, Id("f"), "(", ")", ":", NL, - WS, WS, WS, Id("print"), NL, - WS, WS, WS, Id("print"), NL, - WS, WS, WS, Id("i"), NL) + val dummyin = List[Lexeme]( + NL, + WS, + "def", + WS, + Id("f"), + "(", + ")", + ":", + NL, + WS, + WS, + "def", + WS, + Id("f"), + "(", + ")", + ":", + NL, + WS, + WS, + WS, + Id("print"), + NL, + WS, + WS, + WS, + Id("print"), + NL, + WS, + WS, + WS, + Id("i"), + NL + ) - //println((suite parse dummyin) mkString "\n\n") + // println((suite parse dummyin) mkString "\n\n") - stmt shouldNotParse List[Lexeme](WS, WS, WS, Id("i"), NL) - atom shouldNotParse List[Lexeme](WS, WS, WS, Id("i")) + stmt `shouldNotParse` List[Lexeme](WS, WS, WS, Id("i"), NL) + atom `shouldNotParse` List[Lexeme](WS, WS, WS, Id("i")) // This is the skeleton of the python parsers (and it is unambiguous) - lazy val aStmt: NT[Any] = aSimpleStmt | 'def ~> aBlock + lazy val aStmt: NT[Any] = aSimpleStmt | "def" ~> aBlock lazy val aSimpleStmt = a <~ NL - lazy val aBlock = aSimpleStmt | NL ~> indented(some(many(emptyLine) ~> aStmt)) + lazy val aBlock = + aSimpleStmt | NL ~> indented(some(many(emptyLine) ~> aStmt)) lazy val aInput: NT[Any] = NL.* ~> many(aStmt <~ NL.*) <~ EOS val dummyin2 = List[Lexeme]( - 'def, NL, - WS, a, NL, - WS, a, NL, - WS, 'def, NL, - WS, WS, a, NL, - WS, WS, a, NL, - WS, WS, a, NL, - NL, - 'def, NL, - WS, a, NL, - WS, a, NL, - WS, 'def, NL, - WS, WS,WS,WS,WS,WS, a, NL, - WS, WS,WS,WS,WS,WS, a, NL, - WS, WS,WS,WS,WS,WS, a, NL, - EOS) - - aInput shouldParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, - WS, WS, a, NL, + "def", + NL, + WS, + a, + NL, + WS, + a, + NL, + WS, + "def", + NL, + WS, + WS, + a, + NL, + WS, + WS, + a, + NL, + WS, + WS, + a, + NL, + NL, + "def", + NL, + WS, + a, + NL, + WS, + a, + NL, + WS, + "def", + NL, + WS, + WS, + WS, + WS, + WS, + WS, + a, + NL, + WS, + WS, + WS, + WS, + WS, + WS, + a, + NL, + WS, + WS, + WS, + WS, + WS, + WS, + a, + NL, EOS ) - aInput shouldNotParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, - WS, a, NL, + aInput `shouldParse` List[Lexeme]( + "def", + NL, + WS, + WS, + a, + NL, + WS, + WS, + a, + NL, + EOS + ) + + aInput `shouldNotParse` List[Lexeme]( + "def", + NL, + WS, + WS, + a, + NL, + WS, + a, + NL, EOS ) - aInput shouldParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, + aInput `shouldParse` List[Lexeme]( + "def", + NL, + WS, + WS, + a, + NL, + NL, + WS, + WS, + a, NL, - WS, WS, a, NL, EOS ) - aInput shouldNotParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, + aInput `shouldNotParse` List[Lexeme]( + "def", + NL, + WS, + WS, + a, + NL, + NL, + WS, + a, NL, - WS, a, NL, EOS ) - indentBy(WS ~ WS)(collect) shouldParseWith ( - List[Lexeme](WS, WS, a, NL), - List[Lexeme](a, NL)) + indentBy(WS ~ WS)(collect) `shouldParseWith` (List[Lexeme](WS, WS, a, NL), + List[Lexeme](a, NL)) - indentBy(WS ~ WS)(collect) shouldParseWith ( - List[Lexeme](WS, WS, NL, NL, WS, WS, a, NL), - List[Lexeme](NL, NL, a, NL)) + indentBy(WS ~ WS)(collect) `shouldParseWith` (List[Lexeme]( + WS, + WS, + NL, + NL, + WS, + WS, + a, + NL + ), + List[Lexeme](NL, NL, a, NL)) - (aInput parse dummyin2).size shouldBe 1 + (aInput `parse` dummyin2).size `shouldBe` 1 } // Helpers to allow writing more concise tests. - private implicit class ParserTests[T, P <% Parser[T]](p: => P) { - def shouldParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should parse "$s" """, tags:_*) { - accepts(p, s) shouldBe true + private implicit class ParserTests[T](p: => Parser[T]) { + def shouldParse(s: Iterable[Elem], tags: Tag*) = + it(s"""should parse "$s" """, tags*) { + accepts(p, s) `shouldBe` true } - def shouldNotParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should not parse "$s" """, tags:_*) { - accepts(p, s) shouldBe false + def shouldNotParse(s: Iterable[Elem], tags: Tag*) = + it(s"""should not parse "$s" """, tags*) { + accepts(p, s) `shouldBe` false } // for unambiguous parses - def shouldParseWith[ES <% Iterable[Elem]](s: ES, result: T) = - it (s"""should parse "$s" with correct result""") { - parse(p, s) shouldBe List(result) + def shouldParseWith(s: Iterable[Elem], result: T) = + it(s"""should parse "$s" with correct result""") { + parse(p, s) `shouldBe` List(result) } } } From 3d5b9947bb61444609448d8a52f94693354584b5 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 18:01:04 +0100 Subject: [PATCH 28/95] Introduce given. Use backticks. Fix overloaded method --- .../src/test/scala/BasicCombinatorsTest.scala | 25 +- artifact/src/test/scala/CustomMatchers.scala | 15 +- .../test/scala/DerivativeParsersTests.scala | 272 ++++++++++-------- artifact/src/test/scala/LeftrecTests.scala | 39 ++- artifact/src/test/scala/NegationTests.scala | 3 +- .../src/test/scala/PythonParserTests.scala | 32 +-- 6 files changed, 223 insertions(+), 163 deletions(-) diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index 0426a0f..79c3a42 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -5,9 +5,10 @@ import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers -trait BasicCombinatorTests extends CustomMatchers { self: AnyFunSpec & Matchers => +trait BasicCombinatorTests extends CustomMatchers { + self: AnyFunSpec & Matchers => - import parsers._ + import parsers.{succeed as succ, *} describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' @@ -26,7 +27,7 @@ trait BasicCombinatorTests extends CustomMatchers { self: AnyFunSpec & Matchers } describe("parser \"baaa | ba\"") { - val p: Parser[_] = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' + val p = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' p `shouldParse` "baaa" p `shouldParse` "ba" ((p ~ 'c' ~ 'o') | (p ~ 'c')) `shouldParse` "bac" @@ -34,29 +35,29 @@ trait BasicCombinatorTests extends CustomMatchers { self: AnyFunSpec & Matchers } describe("parser \"(baaa | ba) aa\"") { - val p: Parser[_] = ("baaa" | "ba") ~ "aa" + val p = ("baaa" | "ba") ~ "aa" p `shouldParse` "baaaaa" p `shouldParse` "baaa" } - describe("parser \"succeed(a) b\"") { - val p = succeed('a') ~ 'b' + describe("parser \"succ(a) b\"") { + val p = succ('a') ~ 'b' p `shouldParse` "b" p `shouldNotParse` "" } - describe("parser \"succeed(a) succeed(b)\"") { - val p = succeed('a') ~ succeed('b') + describe("parser \"succ(a) succ(b)\"") { + val p = succ('a') ~ succ('b') p `shouldParse` "" } - describe("parser \"succeed(a) | succeed(b)\"") { - val p = succeed('a') | succeed('b') + describe("parser \"succ(a) | succ(b)\"") { + val p = succ('a') | succ('b') p `shouldParse` "" } describe("parser \"(a a a | a a)+") { - val p: Parser[_] = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' + val p = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' describe("some(_)") { some(p) `shouldParse` "aaaa" } describe("_ ~ 'b'") { (p ~ 'b') `shouldParse` "aaab" } describe("some(_) ~ 'b'") { @@ -73,7 +74,7 @@ trait BasicCombinatorTests extends CustomMatchers { self: AnyFunSpec & Matchers describe("parser \"'a'+\"") { val p = some('a') - val largeInput = "a" * 100 + val largeInput = List.fill(100)('a').mkString p `shouldParse` "a" p `shouldParse` "aaaaaa" diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index 3c20470..8bd921a 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -15,17 +15,22 @@ trait CustomMatchers { self: AnyFunSpec & Matchers => type Parsers = RichParsers def _parsers: RichParsers lazy val parsers = _parsers - import parsers.{ Results, isSuccess, Parser, accepts, Elem } + import parsers.{Results, isSuccess, Parser, accepts, Elem} - implicit class ParserTests[T](p: => Parser[T]) { + extension [T](p: => Parser[T]) { def shouldParse(s: Iterable[Elem], tags: Tag*) = - it (s"""should parse "$s" """, tags*) { + it(s"""should parse "$s" """, tags*) { accepts(p, s) `shouldBe` true } def shouldNotParse(s: Iterable[Elem], tags: Tag*) = - it (s"""should not parse "$s" """, tags*) { + it(s"""should not parse "$s" """, tags*) { accepts(p, s) `shouldBe` false } + // for unambiguous parses + def shouldParseWith(s: Iterable[Elem], result: T) = + it(s"""should parse "$s" with correct result""") { + parse(p, s) `shouldBe` List(result) + } } class SuccessMatcher extends BeMatcher[Parser[?]] { @@ -37,5 +42,5 @@ trait CustomMatchers { self: AnyFunSpec & Matchers => ) } lazy val successful = new SuccessMatcher - lazy val failure = not (successful) + lazy val failure = not(successful) } diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index 04d6055..fa09b35 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -6,20 +6,30 @@ import language.implicitConversions import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers -class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatchers +class DerivativeParsersTests + extends AnyFunSpec + with Matchers + with CustomMatchers with BasicCombinatorTests with NegationTests with LeftrecTests - with Section3 with Section4 with Section7 { + with Section3 + with Section4 + with Section7 { def _parsers: DerivativeParsers.type = DerivativeParsers override lazy val parsers: DerivativeParsers.type = _parsers - import parsers._ - // it is necessary to rename some combinators since names are already // bound by scala test. - import parsers.{ fail => err, noneOf => nonOf, oneOf => one, not => neg, succeed => succ } + import parsers.{ + fail as err, + noneOf as nonOf, + oneOf as one, + not as neg, + succeed as succ, + * + } // This test illustrates how to write graph representations of the // parsers to a file. (To execute it replace `ignore` by `describe` and @@ -46,8 +56,7 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) @@ -64,8 +73,7 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) @@ -99,24 +107,27 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher type Layout = List[Int] // A parser computing the table layout - lazy val head: Parser[Layout] = some('+'~> manyCount('-')) <~ '+' <~ '\n' - - - def table[T](content: Parser[T]): Parser[List[List[T]]] = head >> { layout => - // After knowing the layout the row-separators are fixed - val rowSeparator = layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_+_) ~ '\n' - val initCells = layout.map { _ => content } - - // one line of a cell, given a fixed width. - def cell: Int => Parser[T] => Parser[Parser[T]] = width => p => - (delegateN(width, p) <~ '|') ^^ { p => p << '\n' } - - // repeatAll is like repeat, but with a list of parsers as the state. - val row = repeatAll[T] { ps => - '|' ~> distr(zipWith(layout map cell, ps)) <~ '\n' - } - - some(row(initCells) <~ rowSeparator) + lazy val head: Parser[Layout] = some('+' ~> manyCount('-')) <~ '+' <~ '\n' + + def table[T](content: Parser[T]): Parser[List[List[T]]] = head >> { + layout => + // After knowing the layout the row-separators are fixed + val rowSeparator = + layout + .map { n => List.fill(n)('-').mkString + "+" } + .foldLeft("+")(_ + _) ~ '\n' + val initCells = layout.map { _ => content } + + // one line of a cell, given a fixed width. + def cell: Int => Parser[T] => Parser[Parser[T]] = + width => p => (delegateN(width, p) <~ '|') ^^ { p => p << '\n' } + + // repeatAll is like repeat, but with a list of parsers as the state. + val row = repeatAll[T] { ps => + '|' ~> distr(zipWith(layout map cell, ps)) <~ '\n' + } + + some(row(initCells) <~ rowSeparator) } lazy val xs = many(some('x') ~ '\n') @@ -146,7 +157,6 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher ^+---+--------+------------+ ^""".stripMargin('^') - lazy val nestedTables: NT[Any] = table(xs | nestedTables) nestedTables `shouldParse` """+---+--------+------------+ @@ -169,19 +179,19 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher ^+---+--------+------------+ ^""".stripMargin('^') - // helper that should be in the stdlib - def zipWith[A,B](l1: List[A => B], l2: List[A]): List[B] = + def zipWith[A, B](l1: List[A => B], l2: List[A]): List[B] = (l1 zip l2).map { case (f, x) => f(x) } } describe("flatMap uses fixed point computation") { - lazy val fm: NT[Int] = succ(1) | fm.flatMap { n => if (n < 5) succeed(n + 1) else err } + lazy val fm: NT[Int] = succ(1) | fm.flatMap { n => + if (n < 5) succeed(n + 1) else err + } - fm.results.toSet `shouldBe` Set(1,2,3,4,5) + fm.results.toSet `shouldBe` Set(1, 2, 3, 4, 5) } - describe("Stream preprocessing") { lazy val ones: NT[Any] = succ(()) | '1' ~ ones lazy val zeros: NT[Any] = succ(()) | '0' ~ zeros @@ -201,15 +211,14 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher bin(oneszeros) `shouldParse` "aabb" bin(oneszeros) `shouldNotParse` "aabbb" - bin(ones) `shouldNotParse` ("b" * 50) + bin(ones) `shouldNotParse` ("b" `repeat` 50) } - describe("Results of ambiguous parses") { lazy val A: NT[Any] = (A <~ '+') ~ A | digit - def shouldParseWith(str: String)(expected: Set[Any]) { - (A <<< str).results.toSet should be (expected) + def shouldParseWith(str: String)(expected: Set[Any]) = { + (A <<< str).results.toSet should be(expected) } shouldParseWith("3") { Set('3') } @@ -217,7 +226,6 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher shouldParseWith("3+2+1") { Set(('3', ('2', '1')), (('3', '2'), '1')) } } - // Usecase // ------- // Standard example from data dependent parsing papers (like "One parser to rule them all", @@ -228,7 +236,9 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher // input stream. Benefit of our approach: Body parser never sees more than N characters. describe("IMAP") { - val number = consumed(charRange('1', '9') ~ many(digit) | '0').map { _.mkString.toInt } + val number = consumed(charRange('1', '9') ~ many(digit) | '0').map { + _.mkString.toInt + } val header: Parser[Int] = ('{' ~ space) ~> number <~ (space ~ '}') @@ -251,13 +261,11 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher IMAP(many('a')) `shouldNotParse` "{ 7 }" } - - // Usecase. interleaving parsers def interleave[T, S](p: Parser[T], q: Parser[S]): Parser[(T, S)] = - (done(p) & done(q)) | eat { c => - interleave(q, (p << c)) map { case (s, t) => (t, s) } - } + (done(p) & done(q)) | eat { c => + interleave(q, (p << c)) map { case (s, t) => (t, s) } + } describe("interleaving two parsers") { val p = 'a' ~ 'a' ~ 'a' @@ -278,12 +286,15 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher else { readLine(p << c) } } - done(p) | // do not indent and p can accept - (space ~ space) ~> readLine(p) | // indent by 2 and read one line, then recurse - (many(space) ~ newline) >> { _ => indent(p) } // skip lines with whitespace only, then recurse + done(p) | // do not indent and p can accept + (space ~ space) ~> readLine( + p + ) | // indent by 2 and read one line, then recurse + (many(space) ~ newline) >> { _ => + indent(p) + } // skip lines with whitespace only, then recurse } - describe("indenting parsers") { val xs = many(some('x') ~ '\n') @@ -331,7 +342,7 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher } describe("Retroactively, allow spaces in arbitrary positions") { - import section_4_2.{ spaced, parens } + import section_4_2.{spaced, parens} val sp = spaced(parens) sp `shouldParse` "((()))" @@ -378,8 +389,6 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher |""".stripMargin('|') } - - describe("Unescape") { import section_4_2._ @@ -446,31 +455,39 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher describe("Greedy repitition") { - it ("should return only the result of the longest match") { - greedySome(some('a')) parse "" `shouldBe` List() - greedyMany(some('a')) parse "" `shouldBe` List(List()) - greedySome(some('a')) parse "a" `shouldBe` List(List(List('a'))) - greedySome(some('a')) parse "aaa" `shouldBe` List(List(List('a', 'a', 'a'))) + it("should return only the result of the longest match") { + greedySome(some('a')) `parse` "" `shouldBe` List() + greedyMany(some('a')) `parse` "" `shouldBe` List(List()) + greedySome(some('a')) `parse` "a" `shouldBe` List(List(List('a'))) + greedySome(some('a')) `parse` "aaa" `shouldBe` List( + List(List('a', 'a', 'a')) + ) } - it ("should also return longest match if other parser succed first") { + it("should also return longest match if other parser succed first") { lazy val p = some("ab") | some("a") | some("b") - greedySome(p) parse "ab" `shouldBe` List(List(List("ab"))) - greedySome(p) parse "abab" `shouldBe` List(List(List("ab", "ab"))) - greedySome(p) parse "abbab" `shouldBe` List(List(List("ab"), List("b"), List("ab"))) - greedySome(p) parse "abbaab" `shouldBe` List(List(List("ab"), List("b"), List("a", "a"), List("b"))) - greedySome(p) parse "aaaab" `shouldBe` List(List(List("a", "a", "a", "a"), List("b"))) + greedySome(p) `parse` "ab" `shouldBe` List(List(List("ab"))) + greedySome(p) `parse` "abab" `shouldBe` List(List(List("ab", "ab"))) + greedySome(p) `parse` "abbab" `shouldBe` List( + List(List("ab"), List("b"), List("ab")) + ) + greedySome(p) `parse` "abbaab" `shouldBe` List( + List(List("ab"), List("b"), List("a", "a"), List("b")) + ) + greedySome(p) `parse` "aaaab" `shouldBe` List( + List(List("a", "a", "a", "a"), List("b")) + ) lazy val q = "ab" | "a" | "b" - greedySome(q) parse "ab" `shouldBe` List(List("ab")) - greedySome(q) parse "abab" `shouldBe` List(List("ab", "ab")) - greedySome(q) parse "abbab" `shouldBe` List(List("ab", "b", "ab")) - greedySome(q) parse "abbaab" `shouldBe` List(List("ab", "b", "a", "ab")) - greedySome(q) parse "aaaab" `shouldBe` List(List("a", "a", "a", "ab")) + greedySome(q) `parse` "ab" `shouldBe` List(List("ab")) + greedySome(q) `parse` "abab" `shouldBe` List(List("ab", "ab")) + greedySome(q) `parse` "abbab" `shouldBe` List(List("ab", "b", "ab")) + greedySome(q) `parse` "abbaab" `shouldBe` List(List("ab", "b", "a", "ab")) + greedySome(q) `parse` "aaaab" `shouldBe` List(List("a", "a", "a", "ab")) } // This shows that our implementation is only locally greedy - println(greedySome("ab" | "a") ~ "b" parse "abab") + println(greedySome("ab" | "a") ~ "b" `parse` "abab") } describe("how to locally rewrite biased choice") { @@ -522,35 +539,44 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher // regions inside skip will not be treated by f. // `region` and `skip` should not have an intersection. - def transform[T](region: Parser[Any], skip: Parser[Any], f: Parser[Parser[T]] => Parser[Parser[T]]): Parser[T] => Parser[T] = { + def transform[T]( + region: Parser[Any], + skip: Parser[Any], + f: Parser[Parser[T]] => Parser[Parser[T]] + ): Parser[T] => Parser[T] = { // to prevent accessive re-parsing we introduce some caching on this // parser combinator here. val cache = mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - - lazy val dp = delegate(p) - nonterminal ( - done(p) | biasedAlt( - ( skip &> dp - | region &> f(dp) - ) >> rec, - (any &> dp) >> rec)) - }) + def rec: Parser[T] => Parser[T] = p => + cache.getOrElseUpdate( + p, { + + lazy val dp = delegate(p) + nonterminal( + done(p) | biasedAlt( + (skip &> dp + | region &> f(dp)) >> rec, + (any &> dp) >> rec + ) + ) + } + ) rec } // parsers as input transformers def filterNewlines[T] = filter[T](_ != '\n') - def mask[T] = mapInPartial[T] { case '\n' => '↩' } - def toSpace[T] = mapInPartial[T] { case '\n' => ' ' } - def unmask[T] = mapInPartial[T] { case '↩' => '\n' } + def mask[T] = mapInPartial[T] { case '\n' => '↩' } + def toSpace[T] = mapInPartial[T] { case '\n' => ' ' } + def unmask[T] = mapInPartial[T] { case '↩' => '\n' } // some lexers val singleString: Parser[String] = consumed('"' ~ many(nonOf("\"\n")) ~ '"') - val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') - val multilineString: Parser[String] = consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") + val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') + val multilineString: Parser[String] = + consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") singleString `shouldParse` "\"hello world\"" singleString `shouldNotParse` "\"hello\nworld\"" @@ -561,15 +587,20 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher val collect = consumed(always) ^^ { x => x.mkString } // for now just filter newlines - val p = transform[String](multilineString, singleString | comment, filterNewlines)(collect) + val p = transform[String]( + multilineString, + singleString | comment, + filterNewlines + )(collect) it("should only filter newlines in multiline strings") { - (p parse "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n") should be (List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n")) + (p `parse` "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n") `should` be( + List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n") + ) } // here we can already observe performance problems (about 400ms): p `shouldParse` "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n some content that is not a program, but could be one \n. # ''' some comment \nIt contains newlines \n, \"and some Strings\". Even Multiline strings with '''newlines\n'''." - lazy val noText: Parser[Any] = comment | singleString | multilineString // While the `transform` combinator can be readily used to implement @@ -580,16 +611,19 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher val pairs = Map[Elem, Elem]('(' -> ')', '[' -> ']', '{' -> '}') val (opening, closing) = (pairs.keys.toList, pairs.values.toList) - - lazy val dyck: NT[Any] = one(opening) >> { paren => many(dyck) ~ pairs(paren) } - //'(' ~> many(dyck) <~ ')' + lazy val dyck: NT[Any] = one(opening) >> { paren => + many(dyck) ~ pairs(paren) + } + // '(' ~> many(dyck) <~ ')' // within comments and strings filter out everything val parens = // we need to intersect with the outermost parenthesis to prevent // parsing something like "aaa()aaa" (one(opening) >> { paren => always ~ pairs(paren) }) &> - transform[Any](noText | nonOf(opening) & nonOf(closing) , err, skip)(dyck) + transform[Any](noText | nonOf(opening) & nonOf(closing), err, skip)( + dyck + ) parens `shouldParse` "()" parens `shouldParse` "(())" @@ -618,21 +652,29 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher // reusing some definition of `indented` import section_3_5_improved._ - def joiningIndent[T]: Parser[T] => Parser[T] = p => - ilj(elj(mlj(indented(unmask(p))))) - + def joiningIndent[T]: Parser[T] => Parser[T] = + p => ilj(elj(mlj(indented(unmask(p))))) it("should mask perform line joining before checking indentation") { - (joiningIndent(collect) parse " foo'''a \n a'''\n bar\n ( \n )\n") should be ( + (joiningIndent( + collect + ) `parse` " foo'''a \n a'''\n bar\n ( \n )\n") `should` be( List("foo'''a \n a'''\nbar\n( \n )\n") ) - (joiningIndent(collect) parse " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n") should be ( - List("'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n") + (joiningIndent( + collect + ) `parse` " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n") `should` be( + List( + "'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n" + ) ) } - joiningIndent(collect) `shouldParse` " '''some \n multiline \n'''\n ( # comment (\n )\n" - joiningIndent(collect) `shouldNotParse` " '''some \n multiline \n''\n ( # comment (\n )\n" - + joiningIndent( + collect + ) `shouldParse` " '''some \n multiline \n'''\n ( # comment (\n )\n" + joiningIndent( + collect + ) `shouldNotParse` " '''some \n multiline \n''\n ( # comment (\n )\n" val WS: Parser[Any] = ' ' val spacesNoNl = some(WS) @@ -644,31 +686,37 @@ class DerivativeParsersTests extends AnyFunSpec with Matchers with CustomMatcher // Python Parser Skeleton - lazy val expr: NT[Any] = id | singleString | multilineString | "(" ~> spaces ~> expr <~ spaces <~ ")" | "[" ~> spaces ~> opt(someSep(expr, spaces ~ "," ~ spaces) ~ spaces) <~ "]" - lazy val stmt: NT[Any] = expr <~ lineEnd | "def" ~> spacesNoNl ~> id ~ ("():" ~> suite) + lazy val expr: NT[Any] = + id | singleString | multilineString | "(" ~> spaces ~> expr <~ spaces <~ ")" | "[" ~> spaces ~> opt( + someSep(expr, spaces ~ "," ~ spaces) ~ spaces + ) <~ "]" + lazy val stmt: NT[Any] = + expr <~ lineEnd | "def" ~> spacesNoNl ~> id ~ ("():" ~> suite) lazy val stmts: NT[Any] = someSep(stmt, spaces) lazy val suite: NT[Any] = lineEnd ~> joiningIndent(stmts) - stmt `shouldParse` "def foo():\n '''hello\n '''\n" + stmt `shouldParse` "def foo():\n '''hello\n '''\n" stmt `shouldNotParse` "def foo():\n \"'''hello\n '''\"\n" - stmt `shouldParse` "def foo():\n '''hello\n ''' # some comment \n" + stmt `shouldParse` "def foo():\n '''hello\n ''' # some comment \n" stmt `shouldNotParse` "def foo():\n # '''hello\n ''' some comment \n" - stmt `shouldParse` "def foo():\n []\n" - stmt `shouldParse` "def foo():\n [foo, bar]\n" - stmt `shouldParse` "def foo():\n [foo, \nbar]\n" + stmt `shouldParse` "def foo():\n []\n" + stmt `shouldParse` "def foo():\n [foo, bar]\n" + stmt `shouldParse` "def foo():\n [foo, \nbar]\n" stmt `shouldNotParse` "def foo():\n \"[foo, \nbar]\"\n" - stmt `shouldParse` "def foo():\n \"[foo, bar]\"\n" - stmt `shouldParse` "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" - stmt `shouldParse` "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" + stmt `shouldParse` "def foo():\n \"[foo, bar]\"\n" + stmt `shouldParse` "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" + stmt `shouldParse` "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" } - describe("Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`") { + describe( + "Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`" + ) { val p = neg("a" | "b") val p_a = p <<< "a" val p_b = p <<< "b" val p_c = p <<< "c" - it ("should preserve the invariant when performing optimization rewrites") { + it("should preserve the invariant when performing optimization rewrites") { p_a.accepts `shouldBe` false p_a.accepts `shouldBe` (!p_a.results.isEmpty) p_b.accepts `shouldBe` false diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 05c4995..46d6ed7 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -7,7 +7,16 @@ import org.scalatest.matchers.should.Matchers trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => - import parsers._ + // it is necessary to rename some combinators since names are already + // bound by scala test. + import parsers.{ + fail as err, + noneOf as nonOf, + oneOf as one, + not as neg, + succeed as succ, + * + } describe("lazyness of alt") { @@ -56,7 +65,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => describe("left recursion") { describe("A = A ~ a | empty") { - lazy val A: NT[?] = A ~ 'a' | succeed(42) + lazy val A: NT[?] = A ~ 'a' | succ(42) A `shouldParse` "" A `shouldParse` "a" @@ -64,7 +73,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("A = empty | A ~ a ") { - lazy val A: NT[?] = succeed(42) | A ~ 'a' + lazy val A: NT[?] = succ(42) | A ~ 'a' A `shouldParse` "" A `shouldParse` "a" @@ -76,7 +85,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => describe("one level indirect leftrecursion") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A // A `shouldParse` "1" // A `shouldParse` "12" @@ -92,8 +101,8 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => describe("two levels indirect leftrecursion") { lazy val num: Parser[Any] = some(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ C ~ '+' ~ num - lazy val C: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ C ~ '+' ~ num + lazy val C: NT[Any] = succ(()) ~ A A `shouldParse` "1" A `shouldParse` "12" @@ -109,7 +118,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => // From "Packrat parsers can support left-recursion" describe("super linear parse time") { - lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succeed(()) + lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succ(()) lazy val ones: NT[Any] = ones ~ '1' | '1' start `shouldParse` "" @@ -134,7 +143,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("A = empty ~ A ~ b | empty") { - lazy val A: NT[Any] = succeed("done") ~ A ~ 'b' | succeed("done") + lazy val A: NT[Any] = succ("done") ~ A ~ 'b' | succeed("done") A `shouldParse` "" A `shouldParse` "b" A `shouldParse` "bb" @@ -143,7 +152,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[?] = charParser('d') | succeed("done") + lazy val B: NT[?] = charParser('d') | succ("done") A `shouldParse` "c" A `shouldParse` "cb" @@ -179,8 +188,8 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => lazy val rr: NT[String] = "1" ~> rr | "1" lazy val ll: NT[String] = ll <~ "1" | "1" - ll `shouldParse` ("1" * 40) - rr `shouldParse` ("1" * 41) + ll `shouldParse` ("1" `repeat` 40) + rr `shouldParse` ("1" `repeat` 41) } // Grammar from Tillmann Rendel's GLL library @@ -190,10 +199,10 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => A `shouldParse` "a" A `shouldParse` "aa" A `shouldParse` "aaa" - A `shouldParse` ("a" * 100) + A `shouldParse` ("a" `repeat` 100) lazy val A2: Parser[Any] = some(some('a')) - A2 `shouldParse` ("a" * 1000) + A2 `shouldParse` ("a" `repeat` 1000) } describe("mixed mutual recursion") { @@ -216,7 +225,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => lazy val arrayEl: NT[Any] = (expression - | succeed("undefined")) + | succ("undefined")) expression `shouldParse` "" expression `shouldParse` "a" @@ -300,7 +309,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => // taken from Tillmann Rendel's GLL library describe("grammar with hidden left recursion") { lazy val S: NT[Any] = C ~ 'a' | 'd' - lazy val B: NT[Any] = succeed(()) | 'a' + lazy val B: NT[Any] = succ(()) | 'a' lazy val C: NT[Any] = charParser('b') | B ~ C ~ 'b' | 'b' ~ 'b' S `shouldNotParse` "" diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 6b29912..4ab0399 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -7,8 +7,7 @@ import org.scalatest.matchers.should.Matchers trait NegationTests extends CustomMatchers { self: AnyFunSpec & Matchers => - import parsers._ - import parsers.{ not => neg } + import parsers.{ not => neg, * } describe("parser \"not(aa)\"") { val p = neg("aa") diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index 1409679..bedf734 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -6,9 +6,8 @@ import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers class PythonParserTests extends AnyFunSpec with Matchers { - - val parsers = PythonParsers - import parsers._ + import PythonParsers._ + import Lexeme._ describe("indented python parser (lexeme based)") { indented(many(many(Id("A")) <~ NL)) `shouldParseWith` (List( @@ -27,8 +26,8 @@ class PythonParserTests extends AnyFunSpec with Matchers { describe("implicit line joining") { - implicit def keyword(kw: Symbol): Lexeme = KW(kw.name) - implicit def punctuation(p: String): Lexeme = Punct(p) + given keyword: Conversion[Symbol, Lexeme] = kw => KW(kw.name) + given punctuation: Conversion[String, Lexeme] = p => Punct(p) val p = many(WS | id | "(" | ")" | "[" | "]") val a = Id("A") @@ -183,7 +182,7 @@ class PythonParserTests extends AnyFunSpec with Matchers { a, "=", "yield", - 'from, + "from", a, "=", a, @@ -236,9 +235,9 @@ class PythonParserTests extends AnyFunSpec with Matchers { EOS ) - (stripComments(collect) parse sampleProg) `shouldBe` List(sampleProg) - (explicitJoin(collect) parse sampleProg) `shouldBe` List(sampleProg) - (implicitJoin(collect) parse sampleProg) `shouldBe` List(sampleProg) + (stripComments(collect) `parse` sampleProg) `shouldBe` List(sampleProg) + (explicitJoin(collect) `parse` sampleProg) `shouldBe` List(sampleProg) + (implicitJoin(collect) `parse` sampleProg) `shouldBe` List(sampleProg) preprocess(file_input) `shouldParse` sampleProg @@ -274,7 +273,7 @@ class PythonParserTests extends AnyFunSpec with Matchers { EOS ) - (preprocess(collect) parse sampleProg2) `shouldBe` List(sampleProg) + (preprocess(collect) `parse` sampleProg2) `shouldBe` List(sampleProg) preprocess(file_input) `shouldParse` sampleProg2 @@ -572,11 +571,11 @@ class PythonParserTests extends AnyFunSpec with Matchers { ) // TODO is already ambiguous - // (stmt parse List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size `shouldBe` 1 + // (stmt `parse` List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size `shouldBe` 1 // preprocess(file_input) `shouldParse` traceProg - // (stmt parse List[Lexeme]( + // (stmt `parse` List[Lexeme]( // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, // WS, WS, Id("print"), NL)).size `shouldBe` 1 @@ -808,9 +807,9 @@ class PythonParserTests extends AnyFunSpec with Matchers { ) preprocess(file_input) `shouldParse` traceProg2 - (preprocess(file_input) parse traceProg2).size `shouldBe` 1 + (preprocess(file_input) `parse` traceProg2).size `shouldBe` 1 - // suite should parse this: + // suite should `parse` this: val dummyin = List[Lexeme]( NL, WS, @@ -847,7 +846,7 @@ class PythonParserTests extends AnyFunSpec with Matchers { NL ) - // println((suite parse dummyin) mkString "\n\n") + // println((suite `parse` dummyin) mkString "\n\n") stmt `shouldNotParse` List[Lexeme](WS, WS, WS, Id("i"), NL) atom `shouldNotParse` List[Lexeme](WS, WS, WS, Id("i")) @@ -996,8 +995,7 @@ class PythonParserTests extends AnyFunSpec with Matchers { (aInput `parse` dummyin2).size `shouldBe` 1 } - // Helpers to allow writing more concise tests. - private implicit class ParserTests[T](p: => Parser[T]) { + extension [T](p: => Parser[T]) { def shouldParse(s: Iterable[Elem], tags: Tag*) = it(s"""should parse "$s" """, tags*) { accepts(p, s) `shouldBe` true From c05e450565e5685e215131af48484c58111d040f Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 19:09:07 +0100 Subject: [PATCH 29/95] Remove import statements in favor of cake pattern --- .../src/test/scala/BasicCombinatorsTest.scala | 16 ++-- artifact/src/test/scala/CustomMatchers.scala | 21 +--- .../test/scala/DerivativeParsersTests.scala | 96 +++++++++---------- artifact/src/test/scala/NegationTests.scala | 15 ++- .../src/test/scala/PythonParserTests.scala | 13 +-- 5 files changed, 73 insertions(+), 88 deletions(-) diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index 79c3a42..6bd8b83 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -6,9 +6,7 @@ import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers trait BasicCombinatorTests extends CustomMatchers { - self: AnyFunSpec & Matchers => - - import parsers.{succeed as succ, *} + self: AnyFunSpec & Matchers & RichParsers => describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' @@ -40,19 +38,19 @@ trait BasicCombinatorTests extends CustomMatchers { p `shouldParse` "baaa" } - describe("parser \"succ(a) b\"") { - val p = succ('a') ~ 'b' + describe("parser \"succeed(a) b\"") { + val p = succeed('a') ~ 'b' p `shouldParse` "b" p `shouldNotParse` "" } - describe("parser \"succ(a) succ(b)\"") { - val p = succ('a') ~ succ('b') + describe("parser \"succeed(a) succeed(b)\"") { + val p = succeed('a') ~ succeed('b') p `shouldParse` "" } - describe("parser \"succ(a) | succ(b)\"") { - val p = succ('a') | succ('b') + describe("parser \"succeed(a) | succeed(b)\"") { + val p = succeed('a') | succeed('b') p `shouldParse` "" } diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index 8bd921a..375bdd5 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -4,19 +4,10 @@ package test import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers import org.scalatest.matchers.BeMatcher +import org.scalatest.matchers.MatchResult +import org.scalatest.Tag -trait CustomMatchers { self: AnyFunSpec & Matchers => - - // Due to initialization problems we have to use this pattern - // of def and lazy val. - // - // Override _parsers in concrete tests suites with the - // appropriate parser implementation. - type Parsers = RichParsers - def _parsers: RichParsers - lazy val parsers = _parsers - import parsers.{Results, isSuccess, Parser, accepts, Elem} - +trait CustomMatchers { self: AnyFunSpec & Matchers & RichParsers => extension [T](p: => Parser[T]) { def shouldParse(s: Iterable[Elem], tags: Tag*) = it(s"""should parse "$s" """, tags*) { @@ -33,14 +24,12 @@ trait CustomMatchers { self: AnyFunSpec & Matchers => } } - class SuccessMatcher extends BeMatcher[Parser[?]] { - def apply(left: Parser[?]) = + class SuccessMatcher[T] extends BeMatcher[Parser[T]] { + def apply(left: Parser[T]) = MatchResult( isSuccess(left), left.toString + " was not successful", left.toString + " was successful" ) } - lazy val successful = new SuccessMatcher - lazy val failure = not(successful) } diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index fa09b35..33e35a2 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -8,6 +8,8 @@ import org.scalatest.matchers.should.Matchers class DerivativeParsersTests extends AnyFunSpec + with RichParsers + with DerivativeParsers with Matchers with CustomMatchers with BasicCombinatorTests @@ -17,27 +19,13 @@ class DerivativeParsersTests with Section4 with Section7 { - def _parsers: DerivativeParsers.type = DerivativeParsers - override lazy val parsers: DerivativeParsers.type = _parsers - - // it is necessary to rename some combinators since names are already - // bound by scala test. - import parsers.{ - fail as err, - noneOf as nonOf, - oneOf as one, - not as neg, - succeed as succ, - * - } - // This test illustrates how to write graph representations of the // parsers to a file. (To execute it replace `ignore` by `describe` and // run the tests. describe("printing graph representations of parsers") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succ(()) ~ A + lazy val B: NT[Any] = succeed(()) ~ A A.printToFile("test.png") } @@ -185,16 +173,16 @@ class DerivativeParsersTests } describe("flatMap uses fixed point computation") { - lazy val fm: NT[Int] = succ(1) | fm.flatMap { n => - if (n < 5) succeed(n + 1) else err + lazy val fm: NT[Int] = succeed(1) | fm.flatMap { n => + if (n < 5) succeed(n + 1) else fail } fm.results.toSet `shouldBe` Set(1, 2, 3, 4, 5) } describe("Stream preprocessing") { - lazy val ones: NT[Any] = succ(()) | '1' ~ ones - lazy val zeros: NT[Any] = succ(()) | '0' ~ zeros + lazy val ones: NT[Any] = succeed(()) | '1' ~ ones + lazy val zeros: NT[Any] = succeed(()) | '0' ~ zeros lazy val oneszeros: Parser[Any] = '1' ~ '1' ~ '0' ~ '0' @@ -456,38 +444,38 @@ class DerivativeParsersTests describe("Greedy repitition") { it("should return only the result of the longest match") { - greedySome(some('a')) `parse` "" `shouldBe` List() - greedyMany(some('a')) `parse` "" `shouldBe` List(List()) - greedySome(some('a')) `parse` "a" `shouldBe` List(List(List('a'))) - greedySome(some('a')) `parse` "aaa" `shouldBe` List( + parse(greedySome(some('a')), "") `shouldBe` List() + parse(greedyMany(some('a')), "") `shouldBe` List(List()) + parse(greedySome(some('a')), "a") `shouldBe` List(List(List('a'))) + parse(greedySome(some('a')), "aaa") `shouldBe` List( List(List('a', 'a', 'a')) ) } it("should also return longest match if other parser succed first") { lazy val p = some("ab") | some("a") | some("b") - greedySome(p) `parse` "ab" `shouldBe` List(List(List("ab"))) - greedySome(p) `parse` "abab" `shouldBe` List(List(List("ab", "ab"))) - greedySome(p) `parse` "abbab" `shouldBe` List( + parse(greedySome(p), "ab") `shouldBe` List(List(List("ab"))) + parse(greedySome(p), "abab") `shouldBe` List(List(List("ab", "ab"))) + parse(greedySome(p), "abbab") `shouldBe` List( List(List("ab"), List("b"), List("ab")) ) - greedySome(p) `parse` "abbaab" `shouldBe` List( + parse(greedySome(p), "abbaab") `shouldBe` List( List(List("ab"), List("b"), List("a", "a"), List("b")) ) - greedySome(p) `parse` "aaaab" `shouldBe` List( + parse(greedySome(p), "aaaab") `shouldBe` List( List(List("a", "a", "a", "a"), List("b")) ) lazy val q = "ab" | "a" | "b" - greedySome(q) `parse` "ab" `shouldBe` List(List("ab")) - greedySome(q) `parse` "abab" `shouldBe` List(List("ab", "ab")) - greedySome(q) `parse` "abbab" `shouldBe` List(List("ab", "b", "ab")) - greedySome(q) `parse` "abbaab" `shouldBe` List(List("ab", "b", "a", "ab")) - greedySome(q) `parse` "aaaab" `shouldBe` List(List("a", "a", "a", "ab")) + parse(greedySome(q), "ab") `shouldBe` List(List("ab")) + parse(greedySome(q), "abab") `shouldBe` List(List("ab", "ab")) + parse(greedySome(q), "abbab") `shouldBe` List(List("ab", "b", "ab")) + parse(greedySome(q), "abbaab") `shouldBe` List(List("ab", "b", "a", "ab")) + parse(greedySome(q), "aaaab") `shouldBe` List(List("a", "a", "a", "ab")) } // This shows that our implementation is only locally greedy - println(greedySome("ab" | "a") ~ "b" `parse` "abab") + println(parse(greedySome("ab" | "a") ~ "b", "abab")) } describe("how to locally rewrite biased choice") { @@ -511,7 +499,7 @@ class DerivativeParsersTests // If the right-hand-side `r` is locally known the parser can be // rewritten to: - val rewrite = p ~ r | (neg(p ~ always) &> (q ~ r)) + val rewrite = p ~ r | (not(p ~ always) &> (q ~ r)) rewrite `shouldNotParse` "foo" rewrite `shouldParse` "foooo" rewrite `shouldParse` "fb" @@ -573,10 +561,11 @@ class DerivativeParsersTests def unmask[T] = mapInPartial[T] { case '↩' => '\n' } // some lexers - val singleString: Parser[String] = consumed('"' ~ many(nonOf("\"\n")) ~ '"') - val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') + val singleString: Parser[String] = + consumed('"' ~ many(noneOf("\"\n")) ~ '"') + val comment: Parser[String] = consumed('#' ~ many(noneOf("\n")) ~ '\n') val multilineString: Parser[String] = - consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") + consumed("'''" ~ not(always ~ prefix("'''")) ~ "'''") singleString `shouldParse` "\"hello world\"" singleString `shouldNotParse` "\"hello\nworld\"" @@ -594,7 +583,10 @@ class DerivativeParsersTests )(collect) it("should only filter newlines in multiline strings") { - (p `parse` "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n") `should` be( + parse( + p, + "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n" + ) `should` be( List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n") ) } @@ -611,7 +603,7 @@ class DerivativeParsersTests val pairs = Map[Elem, Elem]('(' -> ')', '[' -> ']', '{' -> '}') val (opening, closing) = (pairs.keys.toList, pairs.values.toList) - lazy val dyck: NT[Any] = one(opening) >> { paren => + lazy val dyck: NT[Any] = oneOf(opening) >> { paren => many(dyck) ~ pairs(paren) } // '(' ~> many(dyck) <~ ')' @@ -620,8 +612,8 @@ class DerivativeParsersTests val parens = // we need to intersect with the outermost parenthesis to prevent // parsing something like "aaa()aaa" - (one(opening) >> { paren => always ~ pairs(paren) }) &> - transform[Any](noText | nonOf(opening) & nonOf(closing), err, skip)( + (oneOf(opening) >> { paren => always ~ pairs(paren) }) &> + transform[Any](noText | noneOf(opening) & noneOf(closing), fail, skip)( dyck ) @@ -656,14 +648,20 @@ class DerivativeParsersTests p => ilj(elj(mlj(indented(unmask(p))))) it("should mask perform line joining before checking indentation") { - (joiningIndent( - collect - ) `parse` " foo'''a \n a'''\n bar\n ( \n )\n") `should` be( + parse( + joiningIndent( + collect + ), + " foo'''a \n a'''\n bar\n ( \n )\n" + ) `should` be( List("foo'''a \n a'''\nbar\n( \n )\n") ) - (joiningIndent( - collect - ) `parse` " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n") `should` be( + parse( + joiningIndent( + collect + ), + " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n" + ) `should` be( List( "'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n" ) @@ -711,7 +709,7 @@ class DerivativeParsersTests describe( "Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`" ) { - val p = neg("a" | "b") + val p = not("a" | "b") val p_a = p <<< "a" val p_b = p <<< "b" val p_c = p <<< "c" diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 4ab0399..848f0be 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -5,12 +5,11 @@ import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers -trait NegationTests extends CustomMatchers { self: AnyFunSpec & Matchers => - - import parsers.{ not => neg, * } +trait NegationTests extends CustomMatchers { + self: AnyFunSpec & Matchers & RichParsers => describe("parser \"not(aa)\"") { - val p = neg("aa") + val p = not("aa") p `shouldParse` "a" p `shouldNotParse` "aa" p `shouldParse` "aac" @@ -18,7 +17,7 @@ trait NegationTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("parser \"not(aa) & lower*\"") { - val p = neg("aa") & many(lower) + val p = not("aa") & many(lower) p `shouldParse` "a" p `shouldParse` "bc" p `shouldParse` "ab" @@ -29,7 +28,7 @@ trait NegationTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("parser \"not(aa ~ .*) & lower*\"") { - val p = neg("aa" ~ many(any)) & many(lower) + val p = not("aa" ~ many(any)) & many(lower) p `shouldParse` "a" p `shouldParse` "bc" p `shouldParse` "ab" @@ -40,7 +39,7 @@ trait NegationTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("parser \"not(.* ~ abc ~ .*)\"") { - val p = neg(many(any) ~ "abc" ~ many(any)) + val p = not(many(any) ~ "abc" ~ many(any)) p `shouldParse` "" p `shouldParse` "xx" p `shouldParse` "xxabxx" @@ -50,7 +49,7 @@ trait NegationTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("parser \"not((baaa | ba) ~ aa ~ .*) & lower*\"") { - val p = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) + val p = not(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) p `shouldNotParse` "baaa" p `shouldNotParse` "baaaxx" p `shouldParse` "" diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index bedf734..f43c421 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -4,6 +4,7 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers +import org.scalatest.Tag class PythonParserTests extends AnyFunSpec with Matchers { import PythonParsers._ @@ -235,9 +236,9 @@ class PythonParserTests extends AnyFunSpec with Matchers { EOS ) - (stripComments(collect) `parse` sampleProg) `shouldBe` List(sampleProg) - (explicitJoin(collect) `parse` sampleProg) `shouldBe` List(sampleProg) - (implicitJoin(collect) `parse` sampleProg) `shouldBe` List(sampleProg) + parse(stripComments(collect), sampleProg) `shouldBe` List(sampleProg) + parse(explicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) + parse(implicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) preprocess(file_input) `shouldParse` sampleProg @@ -273,7 +274,7 @@ class PythonParserTests extends AnyFunSpec with Matchers { EOS ) - (preprocess(collect) `parse` sampleProg2) `shouldBe` List(sampleProg) + parse(preprocess(collect), sampleProg2) `shouldBe` List(sampleProg) preprocess(file_input) `shouldParse` sampleProg2 @@ -807,7 +808,7 @@ class PythonParserTests extends AnyFunSpec with Matchers { ) preprocess(file_input) `shouldParse` traceProg2 - (preprocess(file_input) `parse` traceProg2).size `shouldBe` 1 + parse(preprocess(file_input), traceProg2).size `shouldBe` 1 // suite should `parse` this: val dummyin = List[Lexeme]( @@ -992,7 +993,7 @@ class PythonParserTests extends AnyFunSpec with Matchers { ), List[Lexeme](NL, NL, a, NL)) - (aInput `parse` dummyin2).size `shouldBe` 1 + parse(aInput, dummyin2).size `shouldBe` 1 } extension [T](p: => Parser[T]) { From 7416699dd7316b79c3741b6c5c50e15c4e5058ea Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 19:18:22 +0100 Subject: [PATCH 30/95] All but charParser resolved --- artifact/src/test/scala/LeftrecTests.scala | 46 +++++++++------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 46d6ed7..76b0c22 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -5,44 +5,34 @@ import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers -trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => - - // it is necessary to rename some combinators since names are already - // bound by scala test. - import parsers.{ - fail as err, - noneOf as nonOf, - oneOf as one, - not as neg, - succeed as succ, - * - } +trait LeftrecTests extends CustomMatchers { + self: AnyFunSpec & Matchers & RichParsers => describe("lazyness of alt") { describe("p = p | .") { - lazy val p = p | any + lazy val p: NT[Any] = p | any p `shouldParse` "a" } describe("p = p ~ . | .") { - lazy val p: NT[?] = p ~ any | any + lazy val p: NT[Any] = p ~ any | any p `shouldParse` "a" } describe("p = . | p ~ .") { - lazy val p: NT[?] = any | p ~ any + lazy val p: NT[Any] = any | p ~ any p `shouldParse` "a" } describe("p = (. | .) >> { (. | p) ^^ id }") { - lazy val p: NT[Any] = (p | any) flatMap { _ => (any | p) map identity } + lazy val p: NT[Any] = (p | any) >> { _ => (any | p) ^^ identity } p `shouldParse` "aa" p `shouldParse` "aaaaa" } describe("p = (. | p) >> { a }") { - lazy val p: NT[Any] = (any | p) flatMap { _ => 'a' } + lazy val p: NT[Any] = (any | p) >> { _ => 'a' } p `shouldParse` "aa" p `shouldParse` "aaa" p `shouldParse` "aaaaaa" @@ -65,7 +55,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => describe("left recursion") { describe("A = A ~ a | empty") { - lazy val A: NT[?] = A ~ 'a' | succ(42) + lazy val A: NT[Any] = A ~ 'a' | succeed(42) A `shouldParse` "" A `shouldParse` "a" @@ -73,7 +63,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("A = empty | A ~ a ") { - lazy val A: NT[?] = succ(42) | A ~ 'a' + lazy val A: NT[Any] = succeed(42) | A ~ 'a' A `shouldParse` "" A `shouldParse` "a" @@ -85,7 +75,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => describe("one level indirect leftrecursion") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succ(()) ~ A + lazy val B: NT[Any] = succeed(()) ~ A // A `shouldParse` "1" // A `shouldParse` "12" @@ -101,8 +91,8 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => describe("two levels indirect leftrecursion") { lazy val num: Parser[Any] = some(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succ(()) ~ C ~ '+' ~ num - lazy val C: NT[Any] = succ(()) ~ A + lazy val B: NT[Any] = succeed(()) ~ C ~ '+' ~ num + lazy val C: NT[Any] = succeed(()) ~ A A `shouldParse` "1" A `shouldParse` "12" @@ -118,7 +108,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => // From "Packrat parsers can support left-recursion" describe("super linear parse time") { - lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succ(()) + lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succeed(()) lazy val ones: NT[Any] = ones ~ '1' | '1' start `shouldParse` "" @@ -133,7 +123,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("A = A ~ b | c") { - lazy val A: NT[?] = A ~ 'b' | 'c' + lazy val A: NT[Any] = A ~ 'b' | 'c' A `shouldParse` "c" A `shouldParse` "cb" @@ -143,7 +133,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => } describe("A = empty ~ A ~ b | empty") { - lazy val A: NT[Any] = succ("done") ~ A ~ 'b' | succeed("done") + lazy val A: NT[Any] = succeed("done") ~ A ~ 'b' | succeed("done") A `shouldParse` "" A `shouldParse` "b" A `shouldParse` "bb" @@ -152,7 +142,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[?] = charParser('d') | succ("done") + lazy val B: NT[?] = charParser('d') | succeed("done") A `shouldParse` "c" A `shouldParse` "cb" @@ -225,7 +215,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => lazy val arrayEl: NT[Any] = (expression - | succ("undefined")) + | succeed("undefined")) expression `shouldParse` "" expression `shouldParse` "a" @@ -309,7 +299,7 @@ trait LeftrecTests extends CustomMatchers { self: AnyFunSpec & Matchers => // taken from Tillmann Rendel's GLL library describe("grammar with hidden left recursion") { lazy val S: NT[Any] = C ~ 'a' | 'd' - lazy val B: NT[Any] = succ(()) | 'a' + lazy val B: NT[Any] = succeed(()) | 'a' lazy val C: NT[Any] = charParser('b') | B ~ C ~ 'b' | 'b' ~ 'b' S `shouldNotParse` "" From dcc3d13c6cb3e90d5bbb24afd544e803cfce0ee0 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 20:22:55 +0100 Subject: [PATCH 31/95] Resolve charParser conversion --- artifact/src/test/scala/LeftrecTests.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 76b0c22..9693ccc 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -142,7 +142,7 @@ trait LeftrecTests extends CustomMatchers { // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[?] = charParser('d') | succeed("done") + lazy val B: NT[?] = 'd' | succeed("done") A `shouldParse` "c" A `shouldParse` "cb" @@ -253,7 +253,8 @@ trait LeftrecTests extends CustomMatchers { describe("balanced smileys") { lazy val az: NT[Any] = acceptIf(c => c >= 'a' && c <= 'z') lazy val S: NT[Any] = many(az | ' ' | ':' | ':' ~ P | '(' ~ S ~ ')') - lazy val P: NT[Any] = charParser('(') | ')' + val tmp: Parser[Char] = '(' + lazy val P: NT[Any] = tmp | ')' S `shouldParse` "" S `shouldNotParse` ":((" @@ -300,7 +301,7 @@ trait LeftrecTests extends CustomMatchers { describe("grammar with hidden left recursion") { lazy val S: NT[Any] = C ~ 'a' | 'd' lazy val B: NT[Any] = succeed(()) | 'a' - lazy val C: NT[Any] = charParser('b') | B ~ C ~ 'b' | 'b' ~ 'b' + lazy val C: NT[Any] = 'b' | B ~ C ~ 'b' | 'b' ~ 'b' S `shouldNotParse` "" S `shouldNotParse` "aba" From 27765162648541473fce74bde7dca33e6d6d16ff Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 20:32:34 +0100 Subject: [PATCH 32/95] Use enum --- artifact/src/test/scala/LeftrecTests.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 9693ccc..99b3084 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -227,10 +227,12 @@ trait LeftrecTests extends CustomMatchers { } describe("terms") { + enum Term { + case BinOp(lhs: Term, op: String, rhs: Term) + case Num(n: Int) + } - trait Term - case class BinOp(lhs: Term, op: String, rhs: Term) extends Term - case class Num(n: Int) extends Term + import Term._ lazy val term: NT[Term] = (term ~ "+" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } From 3c97ee59989547867f1ab9aecee72366fb645483 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 6 Nov 2025 20:43:43 +0100 Subject: [PATCH 33/95] fix comment --- artifact/src/main/scala/examples/PythonParsers.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index fed9024..a098edc 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -2,9 +2,10 @@ package fcd import scala.language.implicitConversions -/** Additional Case Study: Python Parser \==================================== +/** Additional Case Study: Python Parser + * * This file contains an additional python parser implementation to support the - * claims in our paper: + * claims in our paper. * * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives * Submitted to OOPSLA 2016. From 6f3c88e18a6d585250d24a4991f531980ccf560b Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Fri, 7 Nov 2025 11:47:58 +0100 Subject: [PATCH 34/95] formatting --- .../src/main/scala/examples/paper/Paper.scala | 1 + .../main/scala/examples/paper/Section7.scala | 81 +++++++++---------- 2 files changed, 38 insertions(+), 44 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Paper.scala b/artifact/src/main/scala/examples/paper/Paper.scala index c8cca60..e4bacee 100644 --- a/artifact/src/main/scala/examples/paper/Paper.scala +++ b/artifact/src/main/scala/examples/paper/Paper.scala @@ -5,6 +5,7 @@ package fcd * * > import paper._ */ + object paper extends RichParsers with DerivativeParsers diff --git a/artifact/src/main/scala/examples/paper/Section7.scala b/artifact/src/main/scala/examples/paper/Section7.scala index 21c008e..25ac506 100644 --- a/artifact/src/main/scala/examples/paper/Section7.scala +++ b/artifact/src/main/scala/examples/paper/Section7.scala @@ -2,50 +2,45 @@ package fcd import scala.language.implicitConversions -/** - * Section 7 - Implementation - * ========================== - * This file contains all code examples from section 7 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * Section 7 introduces the implementation of our parser combinator library. In - * addition to repeating the few examples from the paper in this file we explain - * the relation between the implementation in the paper and in the artifact. - * - * As described in the paper, the core of the implementation builds on - * derivative based parsing as described by Matt Might et al, translated to an - * object oriented setting. - */ +/** Section 7 – Implementation This file contains all code examples from section + * 7 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * Section 7 introduces the implementation of our parser combinator library. In + * addition to repeating the few examples from the paper in this file we + * explain the relation between the implementation in the paper and in the + * artifact. + * + * As described in the paper, the core of the implementation builds on + * derivative based parsing as described by Matt Might et al, translated to an + * object oriented setting. + */ trait Section7 { self: RichParsers => - /** - * Section 7.1, introduces the concrete type of a parser as - * - * trait P[+R] { - * def results: Res[R] - * def derive: Elem => P[R] - * } - * - * The corresponding concrete type of this artifact can be found in - * `DerivativeParsers.scala` (corresponding to Figure 10) which contains the - * implementation of the interface defined in `Parsers.scala` - * (corresponding to Figure 1a.). - * - * Please note the following important differences: - * - `derive` is called `consume` in this artifact. - * - the trait `Parser[+R]` has default implementations for the various - * combinators. This corresponds to the later developments in Section 7.4 - * "Compaction by Dynamic Dispatch". - * - Instead of anonymous subclasses (such as `def fail[R] = new P[R] {...}`) - * the various combinators are implemented by named classes / objects - * (that is, `object Fail extends P[Nothing] { ... }`). - * - We added a special primitive parser `always` which is bisimilar to - * `many(any)` and thus dual (in some sense) to `fail`. Having it as a - * primitive gives rise to some optimizations. - */ + + /** Section 7.1, introduces the concrete type of a parser as + * + * trait P[+R] { def results: Res[R] def derive: Elem => P[R] } + * + * The corresponding concrete type of this artifact can be found in + * `DerivativeParsers.scala` (corresponding to Figure 10) which contains the + * implementation of the interface defined in `Parsers.scala` (corresponding + * to Figure 1a.). + * + * Please note the following important differences: + * - `derive` is called `consume` in this artifact. + * - the trait `Parser[+R]` has default implementations for the various + * combinators. This corresponds to the later developments in Section 7.4 + * "Compaction by Dynamic Dispatch". + * - Instead of anonymous subclasses (such as `def fail[R] = new P[R] + * {...}`) the various combinators are implemented by named classes / + * objects (that is, `object Fail extends P[Nothing] { ... }`). + * - We added a special primitive parser `always` which is bisimilar to + * `many(any)` and thus dual (in some sense) to `fail`. Having it as a + * primitive gives rise to some optimizations. + */ object section_7 { // ### Example. Derivative of some(a) @@ -55,7 +50,6 @@ trait Section7 { self: RichParsers => // > (as << 'a').printToFile("as_derive_a.png") val as = some('a') - // ### Example. Derivative with compaction // // You can observe the result of derivation an compaction by comparing @@ -81,5 +75,4 @@ trait Section7 { self: RichParsers => // grammars. Thus, it might be instructive to also inspect the tests in // `test/scala/LeftrecTests.scala`. } - } From 95ee30337da90dccd8bae405048fc74316e8f9e3 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Fri, 7 Nov 2025 12:49:08 +0100 Subject: [PATCH 35/95] Remove infix def --- artifact/src/main/scala/library/Parsers.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/artifact/src/main/scala/library/Parsers.scala b/artifact/src/main/scala/library/Parsers.scala index 05ac850..13dc42f 100644 --- a/artifact/src/main/scala/library/Parsers.scala +++ b/artifact/src/main/scala/library/Parsers.scala @@ -21,8 +21,8 @@ trait Parsers { def succeed[R](res: R): Parser[R] def acceptIf(cond: Elem => Boolean): Parser[Elem] - infix def map[R, U](p: Parser[R], f: R => U): Parser[U] - infix def flatMap[R, U](p: Parser[R], f: R => Parser[U]): Parser[U] + def map[R, U](p: Parser[R], f: R => U): Parser[U] + def flatMap[R, U](p: Parser[R], f: R => Parser[U]): Parser[U] def alt[R, U >: R](p: Parser[R], q: Parser[U]): Parser[U] def and[R, U](p: Parser[R], q: Parser[U]): Parser[(R, U)] def seq[R, U](p: Parser[R], q: Parser[U]): Parser[(R, U)] From 409b07fbfc6602958dfdab42573897b9959ef036 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Fri, 7 Nov 2025 12:51:59 +0100 Subject: [PATCH 36/95] Use trait parameter to avoid conflicting definitions --- .../src/test/scala/BasicCombinatorsTest.scala | 13 +++--- artifact/src/test/scala/CustomMatchers.scala | 9 ++-- .../test/scala/DerivativeParsersTests.scala | 46 ++++++++++--------- artifact/src/test/scala/LeftrecTests.scala | 27 +++++------ artifact/src/test/scala/NegationTests.scala | 17 +++---- 5 files changed, 60 insertions(+), 52 deletions(-) diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index 6bd8b83..8b3458c 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -3,10 +3,11 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.should.Matchers -trait BasicCombinatorTests extends CustomMatchers { - self: AnyFunSpec & Matchers & RichParsers => +trait BasicCombinatorTests[P <: RichParsers] { + self: AnyFunSpec & CustomMatchers[P] => + + import parsers.{ succeed as succ, *} describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' @@ -39,18 +40,18 @@ trait BasicCombinatorTests extends CustomMatchers { } describe("parser \"succeed(a) b\"") { - val p = succeed('a') ~ 'b' + val p = succ('a') ~ 'b' p `shouldParse` "b" p `shouldNotParse` "" } describe("parser \"succeed(a) succeed(b)\"") { - val p = succeed('a') ~ succeed('b') + val p = succ('a') ~ succ('b') p `shouldParse` "" } describe("parser \"succeed(a) | succeed(b)\"") { - val p = succeed('a') | succeed('b') + val p = succ('a') | succ('b') p `shouldParse` "" } diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index 375bdd5..754c079 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -3,11 +3,14 @@ package test import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers -import org.scalatest.matchers.BeMatcher -import org.scalatest.matchers.MatchResult +import org.scalatest.matchers.{BeMatcher, MatchResult} import org.scalatest.Tag -trait CustomMatchers { self: AnyFunSpec & Matchers & RichParsers => +trait CustomMatchers[P <: Parsers](val parsers: P) extends Matchers { + self: AnyFunSpec => + + import parsers._ + extension [T](p: => Parser[T]) { def shouldParse(s: Iterable[Elem], tags: Tag*) = it(s"""should parse "$s" """, tags*) { diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index 33e35a2..12899fd 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -4,20 +4,22 @@ package test import scala.language.higherKinds import language.implicitConversions import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.should.Matchers class DerivativeParsersTests extends AnyFunSpec - with RichParsers - with DerivativeParsers - with Matchers - with CustomMatchers + with CustomMatchers(paper) with BasicCombinatorTests with NegationTests - with LeftrecTests - with Section3 - with Section4 - with Section7 { + with LeftrecTests { + + import parsers.{ + succeed as succ, + not as neg, + fail as err, + noneOf as nonOf, + oneOf as onOf, + * + } // This test illustrates how to write graph representations of the // parsers to a file. (To execute it replace `ignore` by `describe` and @@ -25,7 +27,7 @@ class DerivativeParsersTests describe("printing graph representations of parsers") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A A.printToFile("test.png") } @@ -173,16 +175,16 @@ class DerivativeParsersTests } describe("flatMap uses fixed point computation") { - lazy val fm: NT[Int] = succeed(1) | fm.flatMap { n => - if (n < 5) succeed(n + 1) else fail + lazy val fm: NT[Int] = succ(1) | fm.flatMap { n => + if (n < 5) succ(n + 1) else err } fm.results.toSet `shouldBe` Set(1, 2, 3, 4, 5) } describe("Stream preprocessing") { - lazy val ones: NT[Any] = succeed(()) | '1' ~ ones - lazy val zeros: NT[Any] = succeed(()) | '0' ~ zeros + lazy val ones: NT[Any] = succ(()) | '1' ~ ones + lazy val zeros: NT[Any] = succ(()) | '0' ~ zeros lazy val oneszeros: Parser[Any] = '1' ~ '1' ~ '0' ~ '0' @@ -499,7 +501,7 @@ class DerivativeParsersTests // If the right-hand-side `r` is locally known the parser can be // rewritten to: - val rewrite = p ~ r | (not(p ~ always) &> (q ~ r)) + val rewrite = p ~ r | (neg(p ~ always) &> (q ~ r)) rewrite `shouldNotParse` "foo" rewrite `shouldParse` "foooo" rewrite `shouldParse` "fb" @@ -562,10 +564,10 @@ class DerivativeParsersTests // some lexers val singleString: Parser[String] = - consumed('"' ~ many(noneOf("\"\n")) ~ '"') - val comment: Parser[String] = consumed('#' ~ many(noneOf("\n")) ~ '\n') + consumed('"' ~ many(nonOf("\"\n")) ~ '"') + val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') val multilineString: Parser[String] = - consumed("'''" ~ not(always ~ prefix("'''")) ~ "'''") + consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") singleString `shouldParse` "\"hello world\"" singleString `shouldNotParse` "\"hello\nworld\"" @@ -603,7 +605,7 @@ class DerivativeParsersTests val pairs = Map[Elem, Elem]('(' -> ')', '[' -> ']', '{' -> '}') val (opening, closing) = (pairs.keys.toList, pairs.values.toList) - lazy val dyck: NT[Any] = oneOf(opening) >> { paren => + lazy val dyck: NT[Any] = onOf(opening) >> { paren => many(dyck) ~ pairs(paren) } // '(' ~> many(dyck) <~ ')' @@ -612,8 +614,8 @@ class DerivativeParsersTests val parens = // we need to intersect with the outermost parenthesis to prevent // parsing something like "aaa()aaa" - (oneOf(opening) >> { paren => always ~ pairs(paren) }) &> - transform[Any](noText | noneOf(opening) & noneOf(closing), fail, skip)( + (onOf(opening) >> { paren => always ~ pairs(paren) }) &> + transform[Any](noText | nonOf(opening) & nonOf(closing), err, skip)( dyck ) @@ -709,7 +711,7 @@ class DerivativeParsersTests describe( "Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`" ) { - val p = not("a" | "b") + val p = neg("a" | "b") val p_a = p <<< "a" val p_b = p <<< "b" val p_c = p <<< "c" diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 99b3084..8725873 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -3,10 +3,11 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.should.Matchers -trait LeftrecTests extends CustomMatchers { - self: AnyFunSpec & Matchers & RichParsers => +trait LeftrecTests[P <: RichParsers] { + self: AnyFunSpec & CustomMatchers[P] => + + import parsers.{succeed as succ, *} describe("lazyness of alt") { @@ -55,7 +56,7 @@ trait LeftrecTests extends CustomMatchers { describe("left recursion") { describe("A = A ~ a | empty") { - lazy val A: NT[Any] = A ~ 'a' | succeed(42) + lazy val A: NT[Any] = A ~ 'a' | succ(42) A `shouldParse` "" A `shouldParse` "a" @@ -63,7 +64,7 @@ trait LeftrecTests extends CustomMatchers { } describe("A = empty | A ~ a ") { - lazy val A: NT[Any] = succeed(42) | A ~ 'a' + lazy val A: NT[Any] = succ(42) | A ~ 'a' A `shouldParse` "" A `shouldParse` "a" @@ -75,7 +76,7 @@ trait LeftrecTests extends CustomMatchers { describe("one level indirect leftrecursion") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A // A `shouldParse` "1" // A `shouldParse` "12" @@ -91,8 +92,8 @@ trait LeftrecTests extends CustomMatchers { describe("two levels indirect leftrecursion") { lazy val num: Parser[Any] = some(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ C ~ '+' ~ num - lazy val C: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ C ~ '+' ~ num + lazy val C: NT[Any] = succ(()) ~ A A `shouldParse` "1" A `shouldParse` "12" @@ -108,7 +109,7 @@ trait LeftrecTests extends CustomMatchers { // From "Packrat parsers can support left-recursion" describe("super linear parse time") { - lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succeed(()) + lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succ(()) lazy val ones: NT[Any] = ones ~ '1' | '1' start `shouldParse` "" @@ -133,7 +134,7 @@ trait LeftrecTests extends CustomMatchers { } describe("A = empty ~ A ~ b | empty") { - lazy val A: NT[Any] = succeed("done") ~ A ~ 'b' | succeed("done") + lazy val A: NT[Any] = succ("done") ~ A ~ 'b' | succ("done") A `shouldParse` "" A `shouldParse` "b" A `shouldParse` "bb" @@ -142,7 +143,7 @@ trait LeftrecTests extends CustomMatchers { // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[?] = 'd' | succeed("done") + lazy val B: NT[?] = 'd' | succ("done") A `shouldParse` "c" A `shouldParse` "cb" @@ -215,7 +216,7 @@ trait LeftrecTests extends CustomMatchers { lazy val arrayEl: NT[Any] = (expression - | succeed("undefined")) + | succ("undefined")) expression `shouldParse` "" expression `shouldParse` "a" @@ -302,7 +303,7 @@ trait LeftrecTests extends CustomMatchers { // taken from Tillmann Rendel's GLL library describe("grammar with hidden left recursion") { lazy val S: NT[Any] = C ~ 'a' | 'd' - lazy val B: NT[Any] = succeed(()) | 'a' + lazy val B: NT[Any] = succ(()) | 'a' lazy val C: NT[Any] = 'b' | B ~ C ~ 'b' | 'b' ~ 'b' S `shouldNotParse` "" diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 848f0be..15d77f8 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -3,13 +3,14 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.should.Matchers -trait NegationTests extends CustomMatchers { - self: AnyFunSpec & Matchers & RichParsers => +trait NegationTests[P <: RichParsers] { + self: AnyFunSpec & CustomMatchers[P] => + + import parsers.{ not as neg, * } describe("parser \"not(aa)\"") { - val p = not("aa") + val p = neg("aa") p `shouldParse` "a" p `shouldNotParse` "aa" p `shouldParse` "aac" @@ -17,7 +18,7 @@ trait NegationTests extends CustomMatchers { } describe("parser \"not(aa) & lower*\"") { - val p = not("aa") & many(lower) + val p = neg("aa") & many(lower) p `shouldParse` "a" p `shouldParse` "bc" p `shouldParse` "ab" @@ -28,7 +29,7 @@ trait NegationTests extends CustomMatchers { } describe("parser \"not(aa ~ .*) & lower*\"") { - val p = not("aa" ~ many(any)) & many(lower) + val p = neg("aa" ~ many(any)) & many(lower) p `shouldParse` "a" p `shouldParse` "bc" p `shouldParse` "ab" @@ -39,7 +40,7 @@ trait NegationTests extends CustomMatchers { } describe("parser \"not(.* ~ abc ~ .*)\"") { - val p = not(many(any) ~ "abc" ~ many(any)) + val p = neg(many(any) ~ "abc" ~ many(any)) p `shouldParse` "" p `shouldParse` "xx" p `shouldParse` "xxabxx" @@ -49,7 +50,7 @@ trait NegationTests extends CustomMatchers { } describe("parser \"not((baaa | ba) ~ aa ~ .*) & lower*\"") { - val p = not(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) + val p = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) p `shouldNotParse` "baaa" p `shouldNotParse` "baaaxx" p `shouldParse` "" From fea56ab5b81c273e5026e0b6a2ba7660d9b197ce Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Fri, 7 Nov 2025 13:12:26 +0100 Subject: [PATCH 37/95] Fix PythonParserTests --- .../src/test/scala/PythonParserTests.scala | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index f43c421..287cf55 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -3,11 +3,13 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.should.Matchers -import org.scalatest.Tag -class PythonParserTests extends AnyFunSpec with Matchers { - import PythonParsers._ +class PythonParserTests + extends AnyFunSpec + with CustomMatchers[PythonParsers.type](PythonParsers) { + + import parsers._ + import parsers.given import Lexeme._ describe("indented python parser (lexeme based)") { @@ -995,20 +997,4 @@ class PythonParserTests extends AnyFunSpec with Matchers { parse(aInput, dummyin2).size `shouldBe` 1 } - - extension [T](p: => Parser[T]) { - def shouldParse(s: Iterable[Elem], tags: Tag*) = - it(s"""should parse "$s" """, tags*) { - accepts(p, s) `shouldBe` true - } - def shouldNotParse(s: Iterable[Elem], tags: Tag*) = - it(s"""should not parse "$s" """, tags*) { - accepts(p, s) `shouldBe` false - } - // for unambiguous parses - def shouldParseWith(s: Iterable[Elem], result: T) = - it(s"""should parse "$s" with correct result""") { - parse(p, s) `shouldBe` List(result) - } - } } From 5d18e4b75492fea303272dcb2140f453ab401b2b Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Fri, 7 Nov 2025 13:22:09 +0100 Subject: [PATCH 38/95] Now all compiles, but we have stackoverflow --- artifact/src/test/scala/BasicCombinatorsTest.scala | 4 ++-- artifact/src/test/scala/CustomMatchers.scala | 2 +- artifact/src/test/scala/LeftrecTests.scala | 7 +++---- artifact/src/test/scala/NegationTests.scala | 4 ++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index 8b3458c..9daa516 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -4,8 +4,8 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec -trait BasicCombinatorTests[P <: RichParsers] { - self: AnyFunSpec & CustomMatchers[P] => +trait BasicCombinatorTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => import parsers.{ succeed as succ, *} diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index 754c079..b709069 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -6,7 +6,7 @@ import org.scalatest.matchers.should.Matchers import org.scalatest.matchers.{BeMatcher, MatchResult} import org.scalatest.Tag -trait CustomMatchers[P <: Parsers](val parsers: P) extends Matchers { +trait CustomMatchers[+P <: Parsers](val parsers: P) extends Matchers { self: AnyFunSpec => import parsers._ diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 8725873..b65d0e7 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -4,8 +4,8 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec -trait LeftrecTests[P <: RichParsers] { - self: AnyFunSpec & CustomMatchers[P] => +trait LeftrecTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => import parsers.{succeed as succ, *} @@ -256,8 +256,7 @@ trait LeftrecTests[P <: RichParsers] { describe("balanced smileys") { lazy val az: NT[Any] = acceptIf(c => c >= 'a' && c <= 'z') lazy val S: NT[Any] = many(az | ' ' | ':' | ':' ~ P | '(' ~ S ~ ')') - val tmp: Parser[Char] = '(' - lazy val P: NT[Any] = tmp | ')' + lazy val P: NT[Any] = alt('(', ')') S `shouldParse` "" S `shouldNotParse` ":((" diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 15d77f8..9c26e32 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -4,8 +4,8 @@ package test import scala.language.implicitConversions import org.scalatest.funspec.AnyFunSpec -trait NegationTests[P <: RichParsers] { - self: AnyFunSpec & CustomMatchers[P] => +trait NegationTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => import parsers.{ not as neg, * } From fd67d2d034894481afc48235711b82f1c5aca207 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Sat, 8 Nov 2025 09:29:52 +0100 Subject: [PATCH 39/95] Update gitignore --- .gitignore | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 30aadb5..04aebfb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ # Scala *.class *.log -.scalafmt.conf # Mac .DS_Store @@ -10,12 +9,16 @@ .cache .history .lib -.bloop dist target lib_managed src_managed project -.bsp -.metals .scala-build + +# Tooling +.project +.metals +.bsp +.bloop +.scalafmt.conf From 3ffd98a929987a5f56ad6eb342897503b52ec852 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Sat, 8 Nov 2025 09:38:05 +0100 Subject: [PATCH 40/95] Formatting --- artifact/src/main/scala/library/Printable.scala | 7 ++++--- artifact/src/test/scala/BasicCombinatorsTest.scala | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/artifact/src/main/scala/library/Printable.scala b/artifact/src/main/scala/library/Printable.scala index 6dfb1e0..abd8654 100644 --- a/artifact/src/main/scala/library/Printable.scala +++ b/artifact/src/main/scala/library/Printable.scala @@ -18,7 +18,6 @@ trait Printable { results{results.toSet.mkString(", ")} - private lazy val printGraph: String = s"""strict digraph G { | ${printNode} @@ -39,7 +38,8 @@ abstract class NullaryPrintable(val name: String) extends Printable { def printNode = s"""$id [label="$name", shape=circle]""" } -abstract class UnaryPrintable(val name: String, _p: => Printable) extends Printable { +abstract class UnaryPrintable(val name: String, _p: => Printable) + extends Printable { private lazy val p = _p def printNode = s""" ${id} [shape=none, fontsize=8, fontname=mono, label=<$table>]; @@ -47,7 +47,8 @@ abstract class UnaryPrintable(val name: String, _p: => Printable) extends Printa |${p.printNode}""".stripMargin('|') } -abstract class BinaryPrintable(val name: String, p: Printable, q: Printable) extends Printable { +abstract class BinaryPrintable(val name: String, p: Printable, q: Printable) + extends Printable { def printNode = s""" ${id} [shape=none, fontsize=8, fontname=mono, label=<$table>]; | ${id}:sw -> ${p.id} diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index 9daa516..416813a 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -7,7 +7,7 @@ import org.scalatest.funspec.AnyFunSpec trait BasicCombinatorTests { self: AnyFunSpec & CustomMatchers[RichParsers] => - import parsers.{ succeed as succ, *} + import parsers.{succeed as succ, *} describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' From a455b3cf6a1c67fe75e6c224fbc790596712198c Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Sat, 8 Nov 2025 15:21:34 +0100 Subject: [PATCH 41/95] Be explicit about imports --- artifact/src/test/scala/CustomMatchers.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index b709069..8deb129 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -9,7 +9,7 @@ import org.scalatest.Tag trait CustomMatchers[+P <: Parsers](val parsers: P) extends Matchers { self: AnyFunSpec => - import parsers._ + import parsers.{Elem, Parser, accepts, isSuccess, parse} extension [T](p: => Parser[T]) { def shouldParse(s: Iterable[Elem], tags: Tag*) = From f9bd79aeaaa9a0ebcb061ca54f4e60fb8daadd5b Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Sat, 8 Nov 2025 18:08:29 +0100 Subject: [PATCH 42/95] Format DerivedOps --- .../src/main/scala/library/DerivedOps.scala | 66 +++++++++++-------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index fd8ac21..7e9f8b2 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -31,15 +31,16 @@ trait DerivedOps { self: Parsers & Syntax => // def always[T](t: T): Parser[T] = // many(any) map { _ => t } - def oneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { - t => s.exists(_ == t) + def oneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => + s.exists(_ == t) } - def noneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { - t => s.forall(_ != t) + def noneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => + s.forall(_ != t) } - def opt[T](p: Parser[T]): Parser[Option[T]] = alt(p ^^ { r => Some(r) }, succeed(None)) + def opt[T](p: Parser[T]): Parser[Option[T]] = + alt(p ^^ { r => Some(r) }, succeed(None)) def manyN[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) @@ -100,14 +101,19 @@ trait DerivedOps { self: Parsers & Syntax => // described by the function `f`. def repeat[T](f: Parser[T] => Parser[Parser[T]]): Parser[T] => Parser[T] = { val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - done(p) | nonterminal(f(p) >> rec) - }) + def rec: Parser[T] => Parser[T] = p => + cache.getOrElseUpdate( + p, { + done(p) | nonterminal(f(p) >> rec) + } + ) rec } // repeat is just an instance of repeatAll - def repeatAll[T](f: List[Parser[T]] => Parser[List[Parser[T]]]): List[Parser[T]] => Parser[List[T]] = ps => + def repeatAll[T]( + f: List[Parser[T]] => Parser[List[Parser[T]]] + ): List[Parser[T]] => Parser[List[T]] = ps => collect(ps) | f(ps) >> repeatAll(f) private def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } @@ -126,13 +132,11 @@ trait DerivedOps { self: Parsers & Syntax => def lookahead[T](p: Parser[Any], q: Parser[T]): Parser[T] = not(prefix(p)) &> q - //consumed(p) >> { in => q <<< in } - + // consumed(p) >> { in => q <<< in } // some extension point for optimization def prefix: Parser[Any] => Parser[Unit] = p => p ~> always - // per-element action performed on p def rep[T](f: Elem => Parser[T] => Parser[T]) = repeat[T] { p => any ^^ { f(_)(p) } } @@ -149,26 +153,31 @@ trait DerivedOps { self: Parsers & Syntax => def mapInPartial[T](f: PartialFunction[Elem, Elem]): Parser[T] => Parser[T] = mapIn(f orElse { case x => x }) - def inRegion[T](region: Parser[Any], f: Parser[Parser[T]] => Parser[Parser[T]]): Parser[T] => Parser[T] = { + def inRegion[T]( + region: Parser[Any], + f: Parser[Parser[T]] => Parser[Parser[T]] + ): Parser[T] => Parser[T] = { - // to prevent accessive re-parsing we introduce some caching on this - // parser combinator here. - val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] + // to prevent accessive re-parsing we introduce some caching on this + // parser combinator here. + val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - - lazy val dp = delegate(p) - nonterminal ( - done(p) | biasedAlt( - region &> f(dp) >> rec, - (any &> dp) >> rec)) - }) - rec - } + def rec: Parser[T] => Parser[T] = p => + cache.getOrElseUpdate( + p, { + lazy val dp = delegate(p) + nonterminal( + done(p) | biasedAlt(region &> f(dp) >> rec, (any &> dp) >> rec) + ) + } + ) + rec + } // Greedy repetition - def greedyMany[T](p: Parser[T]): Parser[List[T]] = greedySome(p) | succeed(Nil) + def greedyMany[T](p: Parser[T]): Parser[List[T]] = + greedySome(p) | succeed(Nil) // Instead of a class use a closure: def greedySome[T]: Parser[T] => Parser[List[T]] = { p => @@ -178,8 +187,7 @@ trait DerivedOps { self: Parsers & Syntax => def forceRead(curr: Parser[T]): Parser[List[T]] = withNext(curr, succeed(Nil)) | eat { el => - biasedAlt( forceRead(curr << el), - withNext(curr, greedySome(p) << el)) + biasedAlt(forceRead(curr << el), withNext(curr, greedySome(p) << el)) } forceRead(p) From 1f0b2f6f2d0f5f0882375b556717fb4aa240dc67 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Sat, 8 Nov 2025 18:10:36 +0100 Subject: [PATCH 43/95] Use explicit tuple declartion to avoid warnings --- artifact/src/main/scala/library/DerivedOps.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 7e9f8b2..e926b6e 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -17,12 +17,12 @@ trait DerivedOps { self: Parsers & Syntax => def some[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case p ~ ps => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } some_v } def many[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case p ~ ps => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } many_v } @@ -44,12 +44,12 @@ trait DerivedOps { self: Parsers & Syntax => def manyN[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else p ~ manyN(n - 1, p) ^^ { case r ~ rs => r :: rs } + else p ~ manyN(n - 1, p) ^^ { case (r, rs) => r :: rs } } def atMost[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else (p ~ atMost(n - 1, p) ^^ { case r ~ rs => r :: rs }) | succeed(Nil) + else (p ~ atMost(n - 1, p) ^^ { case (r, rs) => r :: rs }) | succeed(Nil) } def manySep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { @@ -59,7 +59,7 @@ trait DerivedOps { self: Parsers & Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case p ~ ps => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } some_v } @@ -70,7 +70,7 @@ trait DerivedOps { self: Parsers & Syntax => // --> in Haskell one would use `traverse` def distr[T](ps: List[Parser[T]]): Parser[List[T]] = ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - (p ~ l) ^^ { case a ~ b => a :: b } + (p ~ l) ^^ { case (a, b) => a :: b } } def join[T](p: Parser[Parser[T]]): Parser[T] = p >> done @@ -116,7 +116,7 @@ trait DerivedOps { self: Parsers & Syntax => ): List[Parser[T]] => Parser[List[T]] = ps => collect(ps) | f(ps) >> repeatAll(f) - private def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } + private def mkList[T] = (_: ~[T, List[T]]) match { case (x, xs) => x :: xs } val succeedForever: NT[Unit] = succeed(()) | (any ~> succeedForever) @@ -183,7 +183,7 @@ trait DerivedOps { self: Parsers & Syntax => def greedySome[T]: Parser[T] => Parser[List[T]] = { p => def withNext(p: Parser[T], ps: Parser[List[T]]): Parser[List[T]] = - done(p) ~ ps ^^ { case t ~ ts => t :: ts } + done(p) ~ ps ^^ { case (t, ts) => t :: ts } def forceRead(curr: Parser[T]): Parser[List[T]] = withNext(curr, succeed(Nil)) | eat { el => From 9c7283cf4c0c1cf1c72bd71c1807ed41ba00ca2e Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Sun, 9 Nov 2025 13:19:02 +0100 Subject: [PATCH 44/95] Eliminate warning and format --- artifact/src/main/scala/library/DerivativeParsers.scala | 4 +--- artifact/src/main/scala/library/DerivedOps.scala | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index d4165de..ee604db 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -167,9 +167,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // canonicalization rule (1) from PLDI 2016 override def seq[T](r: Parser[T]): Parser[(R ~ U) ~ T] = - (p seq (q seq r)) map { case (rr ~ (ru ~ rt)) => - ((rr, ru), rt) - } + (p seq (q seq r)) map { case (rr, (ru, rt)) => ((rr, ru), rt) } } class Done[R](val p: Parser[R]) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index e926b6e..dd530c3 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -76,8 +76,7 @@ trait DerivedOps { self: Parsers & Syntax => def join[T](p: Parser[Parser[T]]): Parser[T] = p >> done // A parser that captures the tokens consumed by `p` - def consumed[T](p: Parser[T]): Parser[List[Elem]] = - many(any) <& p + def consumed[T](p: Parser[T]): Parser[List[Elem]] = many(any) <& p def eat[R](f: Elem => Parser[R]): Parser[R] = any >> f From bae5297041ed2cdc9892bff72efcde55032b86df Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 10 Nov 2025 23:39:21 +0100 Subject: [PATCH 45/95] Avoid warnings --- artifact/src/main/scala/examples/PythonParsers.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index a098edc..617dc2e 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -182,7 +182,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { stripComments[T] compose explicitJoin[T] compose implicitJoin[T] def binOp[T, S](p: Parser[T], op: Parser[S], f: (T, S, T) => T) = { - lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case l ~ op ~ r => + lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case ((l, op), r) => f(l, op, r) } | p) ps @@ -324,7 +324,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val for_stmt: Parser[Any] = "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (":" ␣> suite ~ spacedOpt( ("else" ␣> ":") ␣> suite - ))) ^^ { case (exprs ~ (tests ~ (body ~ default))) => + ))) ^^ { case (exprs, (tests, (body, default))) => For(exprs, tests, body, default) } lazy val try_stmt: Parser[Any] = From 0f6be28529a825bcf8badefb9bd2ea42de3519bf Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 10 Nov 2025 23:46:16 +0100 Subject: [PATCH 46/95] Simplify list creation --- .../src/main/scala/library/DerivativeParsers.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index ee604db..118010c 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -41,7 +41,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => } object Fail extends NullaryPrintable("∅") with Parser[Nothing] { - override def results = List.empty + override def results = List() override def failed = true override def accepts = false override def consume: Elem => this.type = in => this @@ -95,7 +95,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => } case class Accept(elem: Elem) extends Parser[Elem] { - def results = List.empty + def results = List() def failed = false def accepts = false def consume = (in: Elem) => @@ -114,7 +114,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => class AcceptIf(f: Elem => Boolean) extends NullaryPrintable("acceptIf") with Parser[Elem] { - def results = List.empty + def results = List() def failed = false def accepts = false def consume = (in: Elem) => @@ -128,7 +128,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => class Not[R](val p: Parser[R]) extends UnaryPrintable("not", p) with Parser[Unit] { - def results = (if (p.results.isEmpty) List(()) else List.empty) + def results = (if (p.results.isEmpty) List(()) else List()) def failed = false // we never know, this is a conservative approx. def accepts = !p.accepts def consume: Elem => Parser[Unit] = in => (p consume in).not @@ -265,7 +265,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => protected object resultsFix extends Attributed { object results extends Attribute[List[R]]( - List.empty, + List(), (nw, ol) => (nw ++ ol).distinct, (nw, ol) => nw.toSet.subsetOf(ol.toSet) ) @@ -275,7 +275,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override protected def updateAttributes() = results.update() } - private val cache: mutable.HashMap[Elem, Parser[R]] = mutable.HashMap.empty + private val cache: mutable.HashMap[Elem, Parser[R]] = mutable.HashMap() // Wrapping in `nonterminal` is cecessary for left-recursive // grammars and for grammars like "DerivativeParsers / preprocessor" // that recursively derive. Optimizing the nonterminal node away causes From f177e940bca71b69cc43e8260d0bfe051601c57a Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 11 Nov 2025 00:09:51 +0100 Subject: [PATCH 47/95] Simplify --- .../scala/library/DerivativeParsers.scala | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 118010c..c69ca2b 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -32,11 +32,8 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // for optimization of biased choice def prefix: Parser[Unit] = { - if (accepts) { - always - } else { - eat { el => (p consume el).prefix } - } + if (accepts) always + else eat { el => (p consume el).prefix } } } @@ -44,19 +41,19 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def results = List() override def failed = true override def accepts = false - override def consume: Elem => this.type = in => this + override def consume = _ => this - override def alt[U >: Nothing](q: Parser[U]): q.type = q - override def seq[U](q: Parser[U]): this.type = this - override def and[U](q: Parser[U]): this.type = this - override def map[U](f: Nothing => U): this.type = this - override def flatMap[U](g: Nothing => Parser[U]): this.type = this + override def alt[U >: Nothing](q: Parser[U]) = q + override def seq[U](q: Parser[U]) = this + override def and[U](q: Parser[U]) = this + override def map[U](f: Nothing => U) = this + override def flatMap[U](g: Nothing => Parser[U]) = this override def mapResults[U]( f: (=> Results[Nothing]) => Results[U] - ): this.type = this + ) = this override def done = this - override def not: Parser[Unit] = Always + override def not = Always override def prefix = this override def toString: String = "∅" } @@ -65,11 +62,9 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def results = List(()) override def failed = false override def accepts = true - override def consume = in => Always - override def not: Parser[Unit] = fail - override def and[U](q: Parser[U]): Parser[(Unit, U)] = q map { r => - ((), r) - } + override def consume = _ => this + override def not = Fail + override def and[U](q: Parser[U]) = q map { ((), _) } // this is a valid optimization, however it almost never occurs. override def alt[U >: Unit](q: Parser[U]) = this From 329d75ae4e9d236c88a209964c0d585574aaf3ad Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 11 Nov 2025 00:27:39 +0100 Subject: [PATCH 48/95] Formatting --- artifact/src/main/scala/library/Syntax.scala | 47 +++++++++++++------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index a9cd946..5163451 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -1,23 +1,23 @@ package fcd trait Syntax { self: Parsers & DerivedOps => - extension[R] (p: Parser[R]) { + extension [R](p: Parser[R]) { def <<(in: Elem): Parser[R] = feed(p, in) def <<<(in: Seq[Elem]): Parser[R] = feedAll(p, in) def ~[U](q: Parser[U]) = seq(p, q) - def ~>[U](q: Parser[U]) = map(seq(p, q), { case (a, b) => b }) - def <~[U](q: Parser[U]) = map(seq(p, q), { case (a, b) => a }) + def <~[U](q: Parser[U]) = map(seq(p, q), _._1) + def ~>[U](q: Parser[U]) = map(seq(p, q), _._2) def |[U >: R](q: Parser[U]) = alt(p, q) def &[U](q: Parser[U]) = and(p, q) - def <&[U](q: Parser[U]) = map(and(p, q), _._1) - def &>[U](q: Parser[U]) = map(and(p, q), _._2) + def <&[U](q: Parser[U]) = map(and(p, q), _._1) + def &>[U](q: Parser[U]) = map(and(p, q), _._2) // biased Alternative def <|[U >: R](q: Parser[U]) = biasedAlt(p, q) def |>[U >: R](q: Parser[U]) = biasedAlt(q, p) - def ^^[U](f: R => U): Parser[U] = map(p,f) - def ^^^[U](u: => U): Parser[U] = map(p, _ => u ) + def ^^[U](f: R => U): Parser[U] = map(p, f) + def ^^^[U](u: => U): Parser[U] = map(p, _ => u) def >>[U](f: R => Parser[U]): Parser[U] = flatMap(p, f) def ? = opt(p) @@ -25,24 +25,39 @@ trait Syntax { self: Parsers & DerivedOps => def + = some(p) } - given liftToParser[R,U](using conv: R => U): Conversion[Parser[R], Parser[U]] = - p => map(p,conv) + given liftToParser[R, U](using + conv: R => U + ): Conversion[Parser[R], Parser[U]] = + p => map(p, conv) // tag nonterminals - this allows automatic insertion of nt-markers final case class NT[+R](parser: Parser[R]) given [R]: Conversion[NT[R], Parser[R]] = _.parser given [R]: Conversion[Parser[R], NT[R]] = parser => NT(nonterminal(parser)) - given tupleSeq3[T1, T2, T3, O]: Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { + given tupleSeq3[T1, T2, T3, O] + : Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { def apply(f: (T1, T2, T3) => O) = { case t1 ~ t2 ~ t3 => f(t1, t2, t3) } } - given tupleSeq4[T1, T2, T3, T4, O]: Conversion[(T1, T2, T3, T4) => O, (T1 ~ T2 ~ T3 ~ T4) => O] with { - def apply(f: (T1, T2, T3, T4) => O) = { case t1 ~ t2 ~ t3 ~ t4 => f(t1, t2, t3, t4) } + given tupleSeq4[T1, T2, T3, T4, O] + : Conversion[(T1, T2, T3, T4) => O, (T1 ~ T2 ~ T3 ~ T4) => O] with { + def apply(f: (T1, T2, T3, T4) => O) = { case t1 ~ t2 ~ t3 ~ t4 => + f(t1, t2, t3, t4) + } } - given tupleSeq5[T1, T2, T3, T4, T5, O]: Conversion[(T1, T2, T3, T4, T5) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O] with { - def apply(f: (T1, T2, T3, T4, T5) => O) = { case t1 ~ t2 ~ t3 ~ t4 ~ t5 => f(t1, t2, t3, t4, t5) } + given tupleSeq5[T1, T2, T3, T4, T5, O] + : Conversion[(T1, T2, T3, T4, T5) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O] + with { + def apply(f: (T1, T2, T3, T4, T5) => O) = { case t1 ~ t2 ~ t3 ~ t4 ~ t5 => + f(t1, t2, t3, t4, t5) + } } - given tupleSeq6[T1, T2, T3, T4, T5, T6, O]: Conversion[(T1, T2, T3, T4, T5, T6) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O] with { - def apply(f: (T1, T2, T3, T4, T5, T6) => O) = { case t1 ~ t2 ~ t3 ~ t4 ~ t5 ~ t6 => f(t1, t2, t3, t4, t5, t6) } + given tupleSeq6[T1, T2, T3, T4, T5, T6, O]: Conversion[ + (T1, T2, T3, T4, T5, T6) => O, + (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O + ] with { + def apply(f: (T1, T2, T3, T4, T5, T6) => O) = { + case t1 ~ t2 ~ t3 ~ t4 ~ t5 ~ t6 => f(t1, t2, t3, t4, t5, t6) + } } } From 58b9dc1f891171288c03999502746ef648be939c Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 11 Nov 2025 00:31:08 +0100 Subject: [PATCH 49/95] Avoid warning --- artifact/src/main/scala/library/Syntax.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 5163451..698b9fa 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -37,19 +37,19 @@ trait Syntax { self: Parsers & DerivedOps => given tupleSeq3[T1, T2, T3, O] : Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { - def apply(f: (T1, T2, T3) => O) = { case t1 ~ t2 ~ t3 => f(t1, t2, t3) } + def apply(f: (T1, T2, T3) => O) = { case ((t1, t2), t3) => f(t1, t2, t3) } } given tupleSeq4[T1, T2, T3, T4, O] : Conversion[(T1, T2, T3, T4) => O, (T1 ~ T2 ~ T3 ~ T4) => O] with { - def apply(f: (T1, T2, T3, T4) => O) = { case t1 ~ t2 ~ t3 ~ t4 => + def apply(f: (T1, T2, T3, T4) => O) = { case (((t1, t2), t3), t4) => f(t1, t2, t3, t4) } } given tupleSeq5[T1, T2, T3, T4, T5, O] : Conversion[(T1, T2, T3, T4, T5) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O] with { - def apply(f: (T1, T2, T3, T4, T5) => O) = { case t1 ~ t2 ~ t3 ~ t4 ~ t5 => - f(t1, t2, t3, t4, t5) + def apply(f: (T1, T2, T3, T4, T5) => O) = { + case ((((t1, t2), t3), t4), t5) => f(t1, t2, t3, t4, t5) } } given tupleSeq6[T1, T2, T3, T4, T5, T6, O]: Conversion[ @@ -57,7 +57,7 @@ trait Syntax { self: Parsers & DerivedOps => (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O ] with { def apply(f: (T1, T2, T3, T4, T5, T6) => O) = { - case t1 ~ t2 ~ t3 ~ t4 ~ t5 ~ t6 => f(t1, t2, t3, t4, t5, t6) + case (((((t1, t2), t3), t4), t5), t6) => f(t1, t2, t3, t4, t5, t6) } } } From 0977200b5af7c8ab9f56db56b5e01c4b72ea3680 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 11 Nov 2025 00:32:56 +0100 Subject: [PATCH 50/95] Remove implicit conversion --- artifact/src/main/scala/library/DerivedOps.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index dd530c3..179dbe4 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,7 +1,5 @@ package fcd -import scala.language.implicitConversions - trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) From 96d9180f000003296bf768d2d2065b957c458c8a Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 13 Nov 2025 13:17:50 +0100 Subject: [PATCH 51/95] Simplify --- artifact/src/main/scala/library/DerivativeParsers.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index c69ca2b..9177148 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -148,8 +148,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => extends BinaryPrintable("~", p, q) with Parser[R ~ U] { - def results = - (for { r <- p.results; u <- q.results } yield (new ~(r, u))).distinct + def results = (for { r <- p.results; u <- q.results } yield (r, u)).distinct // q.failed forces q, which might not terminate for grammars with // infinite many nonterminals, like: // def foo(p) = 'a' ~ foo(p << 'a') @@ -321,7 +320,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def alt[R, U >: R](p: Parser[R], q: Parser[U]) = p alt q def seq[R, U](p: Parser[R], q: Parser[U]) = p seq q - def and[R, U](p: Parser[R], q: Parser[U]): Parser[(R, U)] = p and q + def and[R, U](p: Parser[R], q: Parser[U]) = p and q def feed[R](in: Elem, p: => Parser[R]) = p consume in From 1e556096bf1fbf5238ff9452181a8a4b31badf92 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 18:16:15 +0100 Subject: [PATCH 52/95] DerivedOps, add implicit conversions --- artifact/src/main/scala/library/DerivedOps.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 179dbe4..dd530c3 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,5 +1,7 @@ package fcd +import scala.language.implicitConversions + trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) From 1e925c37d531b526c11fcbb630065e8c051f0633 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 18:22:31 +0100 Subject: [PATCH 53/95] Use processEscapes instead treatEscapes --- artifact/src/main/scala/examples/paper/Section4.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index e4b9bef..1c36371 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -45,7 +45,7 @@ trait Section4 { self: Section3 & RichParsers => /** Section 4.2 Modular Definitions as Combinators */ object section_4_2 { - def unescChar(c: Char): String = StringContext treatEscapes s"\\$c" + def unescChar(c: Char): String = StringContext processEscapes s"\\$c" // ### Example. Preprocessor that unescapes backslash escaped characters // From c52578380643ed068c85d0c269672d531636a1b2 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 18:41:41 +0100 Subject: [PATCH 54/95] Format Parsers --- artifact/src/main/scala/library/Parsers.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/artifact/src/main/scala/library/Parsers.scala b/artifact/src/main/scala/library/Parsers.scala index 13dc42f..d5fb305 100644 --- a/artifact/src/main/scala/library/Parsers.scala +++ b/artifact/src/main/scala/library/Parsers.scala @@ -40,7 +40,9 @@ trait Parsers { // For testing def isSuccess[R](p: Parser[R]): Boolean = !isFailure(p) def isFailure[R](p: Parser[R]): Boolean = !isSuccess(p) - def accepts[R](p: Parser[R], s: Iterable[Elem]): Boolean = isSuccess(feedAll(p, s)) + def accepts[R](p: Parser[R], s: Iterable[Elem]): Boolean = isSuccess( + feedAll(p, s) + ) // As optimization def always: Parser[Unit] From 27814207a63384c4825fdfc89a1264bf2751458d Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 18:43:57 +0100 Subject: [PATCH 55/95] Remove unneeded type hints --- artifact/src/main/scala/library/Syntax.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 698b9fa..5eb3364 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -2,8 +2,8 @@ package fcd trait Syntax { self: Parsers & DerivedOps => extension [R](p: Parser[R]) { - def <<(in: Elem): Parser[R] = feed(p, in) - def <<<(in: Seq[Elem]): Parser[R] = feedAll(p, in) + def <<(in: Elem) = feed(p, in) + def <<<(in: Seq[Elem]) = feedAll(p, in) def ~[U](q: Parser[U]) = seq(p, q) def <~[U](q: Parser[U]) = map(seq(p, q), _._1) def ~>[U](q: Parser[U]) = map(seq(p, q), _._2) @@ -16,9 +16,9 @@ trait Syntax { self: Parsers & DerivedOps => def <|[U >: R](q: Parser[U]) = biasedAlt(p, q) def |>[U >: R](q: Parser[U]) = biasedAlt(q, p) - def ^^[U](f: R => U): Parser[U] = map(p, f) - def ^^^[U](u: => U): Parser[U] = map(p, _ => u) - def >>[U](f: R => Parser[U]): Parser[U] = flatMap(p, f) + def ^^[U](f: R => U) = map(p, f) + def ^^^[U](u: => U) = map(p, _ => u) + def >>[U](f: R => Parser[U]) = flatMap(p, f) def ? = opt(p) def * = many(p) From f1bc993767fbb439b6c183126bf2917c4d707521 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 19:00:02 +0100 Subject: [PATCH 56/95] Prettify Section3 --- .../main/scala/examples/paper/Section3.scala | 113 ++++++++---------- 1 file changed, 49 insertions(+), 64 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index 287f5c6..ecc70a2 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -2,42 +2,35 @@ package fcd import scala.language.implicitConversions -/** - * Section 3 - First-class Derivatives: Gaining - * Fine Grained Control over the Input Stream - * =========================================== - * This file contains all code examples from section 3 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * The examples are grouped by subsections. For every subsection with - * examples we introduced a corresponding Scala object below. - * - * You can experiment with the examples of this file in the REPL by: - * - * > console - * scala> import paper.section_3_2._ - * scala> number.parse("42") - * res0: Results[Int] = List(42) - * - * You can reach the Scala console by entering 'console' at the - * sbt prompt. - * - * Additional note: All examples are parametrized by the parser combinator - * library to allow experimenting with different implementations. This should - * also support future research and alternate implementations. - * - * All the traits containing paper examples are eventually combined and - * instantiated to an object `paper` in `Paper.scala`. - */ - +/** Section 3 – Gaining Fine Grained Control over the Input Stream + * + * This file contains all code examples from section 3 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * The examples are grouped by subsections. For every subsection with examples + * we introduced a corresponding Scala object below. + * + * You can experiment with the examples of this file in the REPL by: + * + * > console scala> import paper.section_3_2._ scala> number.parse("42") res0: + * Results[Int] = List(42) + * + * You can reach the Scala console by entering 'console' at the sbt prompt. + * + * Additional note: All examples are parametrized by the parser combinator + * library to allow experimenting with different implementations. This should + * also support future research and alternate implementations. + * + * All the traits containing paper examples are eventually combined and + * instantiated to an object `paper` in `Paper.scala`. + */ trait Section3 { self: RichParsers => - /** - * Section 3.2 First-Class Derivatives - */ + + /** Section 3.2 First-Class Derivatives + */ object section_3_2 { // ### Example of Subsection 3.2: First-Class Derivatives (<<) @@ -85,7 +78,6 @@ trait Section3 { self: RichParsers => // This requires that you have graphviz installed on your computer. val q: Parser[List[Char]] = many('a') - // ### Example of Subsection 3.2: Combinator "nt" // // Difference: The combinator `nt` in the paper is called `nonterminal` in @@ -103,12 +95,13 @@ trait Section3 { self: RichParsers => // // The implicit conversions that wrap the production into `nonterminal` // calls are defined in the file Syntax.scala - val digit: Parser[Int] = acceptIf(_.isDigit) ^^ { s => Integer.valueOf(s.toString) } + val digit: Parser[Int] = acceptIf(_.isDigit) ^^ { s => + Integer.valueOf(s.toString) + } val number: Parser[Int] = - nonterminal( number ~ digit ^^ { case (n, d) => (n * 10) + d } - | digit - ) + nonterminal(number ~ digit ^^ { case (n, d) => (n * 10) + d } + | digit) // To get an overview of the available parser combinator refer to: // @@ -119,19 +112,17 @@ trait Section3 { self: RichParsers => } - /** - * Section 3.4 Implementation using First-Class Derivatives - */ + /** Section 3.4 Implementation using First-Class Derivatives + */ object section_3_4 { // Figure 4a. Definition of the combinator indented(p) in terms of <<. - def indented[T](p: Parser[T]): Parser[T] = + def indented[T](p: Parser[T]) = done(p) | (space ~ space) ~> readLine(p) def readLine[T](p: Parser[T]): Parser[T] = - ( no('\n') >> { c => readLine(p << c) } - | accept('\n') >> { c => indented(p << c) } - ) + (no('\n') >> { c => readLine(p << c) } + | accept('\n') >> { c => indented(p << c) }) // To inspect the virtual input stream of some parser `p` in `indented(p)` // one can use the following parser as kind of "mock-parser" @@ -154,27 +145,24 @@ trait Section3 { self: RichParsers => // please note the use of combinator `manyN(n, space)` which recognizes // n-many spaces. - def indentBy[T](n: Int): Parser[T] => Parser[T] = p => + def indentBy[T](n: Int)(p: Parser[T]) = done(p) | manyN(n, space) ~> readLine(n)(p) // Only change: pass the level of indentation as parameter around def readLine[T](n: Int)(p: Parser[T]): Parser[T] = - ( no('\n') >> { c => readLine(n)(p << c) } - | accept('\n') >> { c => indentBy(n)(p << c) } - ) + (no('\n') >> { c => readLine(n)(p << c) } + | accept('\n') >> { c => indentBy(n)(p << c) }) // Here we first read some spaces (at least one) and then invoke // `indentBy`. - def indented[T](p: Parser[T]): Parser[T] = consumed(some(space)) >> { case s => + def indented[T](p: Parser[T]) = consumed(some(space)) >> { s => // this simulates lookahead for greedy matching no(' ') >> { c => indentBy(s.size)(p) <<< s << c } } } - - /** - * Derived Combinators - */ + /** Derived Combinators + */ object section_3_5 { // Section 3.5 introduces `delegate` and `repeat`. The implementation of @@ -194,7 +182,6 @@ trait Section3 { self: RichParsers => def injectA[T](p: Parser[T]): Parser[T] = ((any ~ any) &> delegate(p)) >> { p2 => 'a' ~> p2 } - // Not in the paper: Example for usage of combinator `repeat`. // every two tokens recognize an intermediate token 'a'. // @@ -205,13 +192,13 @@ trait Section3 { self: RichParsers => // Please note, that since we repeatedly delimit with `any ~ any` the // resulting parser can only recognize words in { (xxa)* | x ∈ Σ } def injectAs[T] = repeat[T] { p => - ((any ~ any) &> delegate(p)) <~ 'a' + ((any ~ any) &> delegate(p)) <~ 'a' } // Figure 5b. Definition of the combinator `indented(p)` in terms of `delegate`. lazy val line = many(no('\n')) <~ '\n' def indented[T]: Parser[T] => Parser[T] = repeat[T] { p => - (space ~ space) ~> (line &> delegate(p)) + (space ~ space) ~> (line &> delegate(p)) } // To experiment with this implementation of indented you can selectively @@ -222,11 +209,9 @@ trait Section3 { self: RichParsers => // involving the indentation combinator. } - - /** - * Symmetrical to section_3_4 and section_3_4_improved we can define flexible - * indentation using delegate and repeat. - */ + /** Symmetrical to section_3_4 and section_3_4_improved we can define flexible + * indentation using delegate and repeat. + */ object section_3_5_improved { lazy val line = many(no('\n')) <~ '\n' @@ -234,7 +219,7 @@ trait Section3 { self: RichParsers => manyN(n, space) ~> (line &> delegate(p)) } - def indented[T](p: Parser[T]): Parser[T] = consumed(some(space)) >> { case s => + def indented[T](p: Parser[T]) = consumed(some(space)) >> { s => no(' ') >> { c => indentBy(s.size)(p) <<< s << c } } } From 18b61a5dcde8fb5efe6e7b4024d1b76ccb0a7373 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 19:05:58 +0100 Subject: [PATCH 57/95] Prettify Section4. Remove dangerous 'Any's --- .../main/scala/examples/paper/Section4.scala | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index 1c36371..f985053 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -2,9 +2,9 @@ package fcd import scala.language.implicitConversions -/** Section 4 - Applications +/** Section 4 – Applications * - * This file contains all code examples from section 5 of our paper. + * This file contains all code examples from section 4 of our paper. * * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To * appear in OOPSLA 2016. @@ -21,12 +21,12 @@ trait Section4 { self: Section3 & RichParsers => // very simplified grammar to illustrate parser selection import section_3_5_improved._ - lazy val stmt: NT[Any] = + lazy val stmt: NT[?] = ("while" ~ space ~ "(true):" ~ block | some('x') ~ '\n') lazy val stmts = many(stmt) - lazy val block: NT[Any] = '\n' ~ indented(stmts) + lazy val block: NT[?] = '\n' ~ indented(stmts) // ### Example: Retroactive selection of the while statement nonterminal // @@ -45,7 +45,7 @@ trait Section4 { self: Section3 & RichParsers => /** Section 4.2 Modular Definitions as Combinators */ object section_4_2 { - def unescChar(c: Char): String = StringContext processEscapes s"\\$c" + def unescChar(c: Char) = StringContext processEscapes s"\\$c" // ### Example. Preprocessor that unescapes backslash escaped characters // @@ -65,7 +65,7 @@ trait Section4 { self: Section3 & RichParsers => // ### Example Figure 6a. Combinators for interleaved parsing of fenced code // blocks. - val marker: Parser[Any] = lineEnd ~ "~~~" ~ lineEnd + val marker = lineEnd ~ "~~~" ~ lineEnd // We have two states: Inside the code block and outside the code block def inCode[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = @@ -78,7 +78,7 @@ trait Section4 { self: Section3 & RichParsers => | eat { c => inText(text << c, code) }) // Simple variant of balanced parenthesis - lazy val parens: NT[Any] = '(' ~ parens ~ ')' | succeed(()) + lazy val parens: NT[?] = '(' ~ parens ~ ')' | succeed(()) // Blocks of "a"s, such as: // @@ -87,7 +87,7 @@ trait Section4 { self: Section3 & RichParsers => // // aaaaa // aaaaa - val as: Parser[Any] = some(many('a') <~ lineEnd) + val as = some(many('a') <~ lineEnd) // Now we can retroactively combine the two parsers `parens` and `as` by // The resulting parser can parse for instance words like @@ -137,11 +137,11 @@ trait Section4 { self: Section3 & RichParsers => // a parser computing the table layout def head: Parser[Layout] = some('+' ~> manyCount('-')) <~ '+' - def body[T](layout: Layout, cell: Parser[T]): Parser[List[List[T]]] = + def body[T](layout: Layout, cell: Parser[T]) = many(rowLine(layout, layout.map(n => cell)) <~ rowSeparator(layout)) // given a layout, creates a parser for row separators - def rowSeparator(layout: Layout): Parser[Any] = + def rowSeparator(layout: Layout) = layout .map { n => List.fill(n)('-').mkString + "+" } .foldLeft("+")(_ + _) ~ lineEnd @@ -157,7 +157,7 @@ trait Section4 { self: Section3 & RichParsers => def delegateCells[T]( layout: Layout, cells: List[Parser[T]] - ): List[Parser[Parser[T]]] = + ) = layout.zip(cells).map { case (n, p) => map(delegateN(n, p), (_ << '\n')) <~ '|' } @@ -179,8 +179,8 @@ trait Section4 { self: Section3 & RichParsers => // |~~~ | // |aaaa| // +----+ - lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) - lazy val asAndTables: NT[Any] = as | table(combined) + lazy val combined: NT[?] = inText(asAndTables, spaced(parens)) + lazy val asAndTables: NT[?] = as | table(combined) // Again, some more examples of words that are recognized by `combined` can // be found in `DerivativeParsersTests.scala`. From 13a9aaae7b6c249d540c33cb6af9f46927f71bd9 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 19:54:18 +0100 Subject: [PATCH 58/95] Simplify PythonParsers --- .../main/scala/examples/PythonParsers.scala | 181 ++++++++---------- 1 file changed, 84 insertions(+), 97 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 617dc2e..ae6d8c2 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -70,19 +70,19 @@ trait PythonLexemes { self: Parsers & DerivedOps & Syntax => given kw: Conversion[Symbol, Parser[Elem]] = { kw => accept(KW(kw.name)) } given punct: Conversion[String, Parser[Elem]] = { p => accept(Punct(p)) } - lazy val string: Parser[Str] = any >> { + lazy val string = any >> { case s: Str => succeed(s) case _ => fail } - lazy val number: Parser[Num] = any >> { + lazy val number = any >> { case n: Num => succeed(n) case _ => fail } - lazy val id: Parser[Id] = any >> { + lazy val id = any >> { case id: Id => succeed(id) case _ => fail } - lazy val comment: Parser[Comment] = any >> { + lazy val comment = any >> { case c: Comment => succeed(c) case _ => fail } @@ -101,7 +101,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { p: Elem => Boolean, thn: Elem => Parser[T], els: Elem => Parser[T] - ): Parser[T] = + ) = eat { c => if (p(c)) thn(c) else els(c) } // Simply preprocesses the input stream and strips out comments @@ -128,7 +128,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val dyck: Parser[Any] = enclosed(many(dyck)) // the repetition of enclosed is unfortunate - lazy val extDyck: Parser[Any] = enclosed(always) &> + lazy val extDyck = enclosed(always) &> filter((opening ++ closing).toSeq contains _)(dyck) // From the python reference manual: @@ -196,12 +196,12 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val spaces = many(whitespace) extension [T](p: Parser[T]) { - def ␣[U](q: => Parser[U]): Parser[T ~ U] = p ~ (spaces ~> q) - def <␣[U](q: => Parser[U]): Parser[T] = p <~ (spaces ~ q) - def ␣>[U](q: => Parser[U]): Parser[U] = p ~> (spaces ~> q) + def ␣[U](q: => Parser[U]) = p ~ (spaces ~> q) + def <␣[U](q: => Parser[U]) = p <~ (spaces ~ q) + def ␣>[U](q: => Parser[U]) = p ~> (spaces ~> q) } - def listOf[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = + def listOf[T](p: Parser[T], sep: Parser[Any]) = someSep(p, spaces ~ sep ~ spaces) <~ opt(spaces ~ sep) def optList[T](p: Parser[List[T]]) = p | succeed(Nil) @@ -229,7 +229,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { "->" ␣> test )) ␣ (":" ␣> suite) ^^ this.FuncDef.apply - lazy val parameters: Parser[Any] = "(" ~> spacedOpt(typedargslist) <␣ ")" + lazy val parameters = "(" ~> spacedOpt(typedargslist) <␣ ")" // ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef] def fpdef(p: Parser[Any]): Parser[Any] = @@ -239,54 +239,53 @@ trait PythonParsers extends PythonLexemes, PythonAst { | "**" ␣ p) def testdefs(p: Parser[Any]) = someSep(p ~ spacedOpt("=" ␣> test), ",") - lazy val typedargslist: Parser[Any] = + lazy val typedargslist = testdefs(tfpdef) ~ spacedOpt("," ␣> fpdef(tfpdef)) | fpdef(tfpdef) - lazy val varargslist: Parser[Any] = + lazy val varargslist = testdefs(vfpdef) ~ spacedOpt("," ␣> fpdef(vfpdef)) | fpdef(vfpdef) - lazy val tfpdef: Parser[Any] = id ~ spacedOpt(":" ␣> test) - lazy val vfpdef: Parser[Any] = id + lazy val tfpdef = id ~ spacedOpt(":" ␣> test) + lazy val vfpdef = id // --- Statements --- - lazy val stmt: NT[Any] = simple_stmt | compound_stmt - lazy val simple_stmt: Parser[Any] = + lazy val stmt: NT[?] = simple_stmt | compound_stmt + lazy val simple_stmt = listOf(small_stmt, ";") <␣ NL ^^ this.Simple.apply - lazy val small_stmt: Parser[Any] = + lazy val small_stmt = (expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) - lazy val expr_stmt: Parser[Any] = + lazy val expr_stmt = (testlist_star_expr | testlist_star_expr ␣ augassign ␣ (yield_expr | testlist) | testlist_star_expr ~ some( spaces ~> "=" ␣> (yield_expr | testlist_star_expr) )) ^^ this.ExprStmt.apply - lazy val testlist_star_expr: Parser[Any] = listOf(test | star_expr, ",") + lazy val testlist_star_expr = listOf(test | star_expr, ",") - lazy val augassign: Parser[Any] = ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" + lazy val augassign = ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=") - lazy val del_stmt: Parser[Stmt] = "del" ␣> exprlist ^^ this.Del.apply - lazy val pass_stmt: Parser[Stmt] = "pass" ^^^ Pass - lazy val flow_stmt: Parser[Stmt] = + lazy val del_stmt = "del" ␣> exprlist ^^ this.Del.apply + lazy val pass_stmt = "pass" ^^^ Pass + lazy val flow_stmt = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt - lazy val break_stmt: Parser[Stmt] = "break" ^^^ Break - lazy val continue_stmt: Parser[Stmt] = "continue" ^^^ Continue - lazy val return_stmt: Parser[Stmt] = - "return" ~> spacedOpt(testlist) ^^ this.Return.apply - lazy val yield_stmt: Parser[Stmt] = yield_expr ^^ this.ExprStmt.apply - lazy val raise_stmt: Parser[Stmt] = + lazy val break_stmt = "break" ^^^ Break + lazy val continue_stmt = "continue" ^^^ Continue + lazy val return_stmt = "return" ~> spacedOpt(testlist) ^^ this.Return.apply + lazy val yield_stmt = yield_expr ^^ this.ExprStmt.apply + lazy val raise_stmt = "raise" ~> spacedOpt(test ~ spacedOpt("from" ␣ test)) ^^ this.Raise.apply - lazy val import_stmt: Parser[Any] = import_name | import_from - lazy val import_name: Parser[Any] = "import" ␣> dotted_as_names ^^ { n => + lazy val import_stmt = import_name | import_from + lazy val import_name = "import" ␣> dotted_as_names ^^ { n => Import(n) } // # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS - lazy val import_from: Parser[Any] = + lazy val import_from = ("from" ~> (spacedMany("." | "...") ~ dotted_name | some( "." | "..." )) ␣ @@ -295,129 +294,117 @@ trait PythonParsers extends PythonLexemes, PythonAst { case (from, names) => Import(names, Some(from)) } - lazy val import_as_name: Parser[Any] = id ~ spacedOpt("as" ␣ id) - lazy val dotted_as_name: Parser[Any] = - dotted_name ~ spacedOpt("as" ␣ id) - lazy val import_as_names: Parser[Any] = listOf(test | import_as_name, ",") - lazy val dotted_as_names: Parser[Any] = someSep(dotted_as_name, ",") - lazy val dotted_name: Parser[Any] = someSep(id, ".") - - lazy val global_stmt: Parser[Any] = - "global" ␣> someSep(id, ",") ^^ this.Global.apply - lazy val nonlocal_stmt: Parser[Any] = - "nonlocal" ␣> someSep(id, ",") ^^ this.Nonlocal.apply - lazy val assert_stmt: Parser[Any] = - "assert" ␣> someSep(test, ",") ^^ this.Assert.apply - - lazy val compound_stmt: Parser[Any] = + lazy val import_as_name = id ~ spacedOpt("as" ␣ id) + lazy val dotted_as_name = dotted_name ~ spacedOpt("as" ␣ id) + lazy val import_as_names = listOf(test | import_as_name, ",") + lazy val dotted_as_names = someSep(dotted_as_name, ",") + lazy val dotted_name = someSep(id, ".") + + lazy val global_stmt = "global" ␣> someSep(id, ",") ^^ this.Global.apply + lazy val nonlocal_stmt = "nonlocal" ␣> someSep(id, ",") ^^ this.Nonlocal.apply + lazy val assert_stmt = "assert" ␣> someSep(test, ",") ^^ this.Assert.apply + + lazy val compound_stmt = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt - lazy val async_stmt: Parser[Any] = + lazy val async_stmt = "async" ␣> (funcdef | with_stmt | for_stmt) - lazy val if_stmt: Parser[Any] = + lazy val if_stmt = "if" ␣> test ␣ (":" ␣> suite ~ spacedMany("elif" ␣> test ␣ (":" ␣> suite)) ~ spacedOpt(("else" ␣ ":") ␣> suite)) - lazy val while_stmt: Parser[Any] = - "while" ␣> test ␣ (":" ␣> suite ~ spacedOpt( - ("else" ␣ ":") ␣> suite - )) - lazy val for_stmt: Parser[Any] = + lazy val while_stmt = "while" ␣> test ␣ (":" ␣> suite ~ spacedOpt( + ("else" ␣ ":") ␣> suite + )) + lazy val for_stmt = "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (":" ␣> suite ~ spacedOpt( ("else" ␣> ":") ␣> suite ))) ^^ { case (exprs, (tests, (body, default))) => For(exprs, tests, body, default) } - lazy val try_stmt: Parser[Any] = + lazy val try_stmt = ("try" ␣ ":") ␣> suite ␣ (some(except_clause ␣ (":" ␣> suite)) ~ spacedOpt(("else" ␣ ":") ␣> suite) ~ spacedOpt(("finally" ␣ ":") ␣> suite) | (("finally" ␣ ":") ␣> suite)) - lazy val with_stmt: Parser[Any] = - "with" ␣> someSep(with_item, ",") ␣ (":" ␣> suite) - lazy val with_item: Parser[Any] = test ~ spacedOpt("as" ␣> expr) + lazy val with_stmt = "with" ␣> someSep(with_item, ",") ␣ (":" ␣> suite) + lazy val with_item = test ~ spacedOpt("as" ␣> expr) // # NB compile.c makes sure that the default except clause is last - lazy val except_clause: Parser[Any] = - "except" ~> spacedOpt(test ␣ opt("as" ␣> id)) + lazy val except_clause = "except" ~> spacedOpt(test ␣ opt("as" ␣> id)) // INDENTATION // changed to also allow empty lines - lazy val suite: Parser[Any] = - simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) + lazy val suite = simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) // --- Expressions --- lazy val test: NT[Any] = (or_test ~ spacedOpt("if" ␣> or_test ␣ ("else" ␣> test)) | lambdef) lazy val test_nocond: NT[Any] = or_test | lambdef_nocond - lazy val lambdef: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test) - lazy val lambdef_nocond: NT[Any] = + lazy val lambdef: NT[?] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test) + lazy val lambdef_nocond: NT[?] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) lazy val or_test: NT[Any] = someSep(and_test, "or") - lazy val and_test: NT[Any] = someSep(not_test, "and") + lazy val and_test: NT[?] = someSep(not_test, "and") lazy val not_test: NT[Any] = "not" ␣> not_test | comparison - lazy val comparison: NT[Any] = someSep(expr, comp_op) + lazy val comparison: NT[?] = someSep(expr, comp_op) // # <> isn't actually a valid comparison operator in Python. It's here for the // # sake of a __future__ import described in PEP 401 (which really works :-) - lazy val comp_op: Parser[Any] = ("<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" + lazy val comp_op = ("<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" | "in" | "not" ␣ "in" | "is" | "is" ␣ "not") lazy val expr: NT[Any] = binOp(xor_expr, "|", this.BinOp.apply) - lazy val xor_expr: NT[Any] = binOp(and_expr, "^", this.BinOp.apply) - lazy val and_expr: NT[Any] = binOp(shift_expr, "&", this.BinOp.apply) - lazy val shift_expr: NT[Any] = - binOp(arith_expr, "<<" | ">>", this.BinOp.apply) - lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", this.BinOp.apply) - lazy val term: NT[Any] = + lazy val xor_expr: NT[?] = binOp(and_expr, "^", this.BinOp.apply) + lazy val and_expr: NT[?] = binOp(shift_expr, "&", this.BinOp.apply) + lazy val shift_expr: NT[?] = binOp(arith_expr, "<<" | ">>", this.BinOp.apply) + lazy val arith_expr: NT[?] = binOp(term, "+" | "-", this.BinOp.apply) + lazy val term: NT[?] = binOp(factor, "*" | "@" | "/" | "%" | "//", this.BinOp.apply) lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power - lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor - - lazy val atom_expr: Parser[Any] = + lazy val power: NT[?] = atom_expr | atom_expr ␣ "**" ␣ factor + lazy val atom_expr = opt("await" ~ spaces) ~> atom ~ spacedMany(trailer) - lazy val atom: Parser[Any] = ("(" ␣> (yield_expr | testlist_comp) <␣ ")" + lazy val atom = ("(" ␣> (yield_expr | testlist_comp) <␣ ")" | "[" ~> spacedOpt(testlist_comp) <␣ "]" | "{" ~> spacedOpt(dictorsetmaker) <␣ "}" | id | number | some(string) | "..." | "None" | "True" | "False") - lazy val star_expr: Parser[Any] = "*" ␣ expr - lazy val yield_expr: Parser[Any] = - "yield" ~ spacedOpt("from" ␣ test | testlist) + lazy val star_expr = "*" ␣ expr + lazy val yield_expr = "yield" ~ spacedOpt("from" ␣ test | testlist) - lazy val testlist_comp: Parser[Any] = (listOf(test | star_expr, ",") + lazy val testlist_comp = (listOf(test | star_expr, ",") | (test | star_expr) ␣ comp_for) - lazy val trailer: Parser[Any] = ("(" ␣> optArgs <␣ ")" + lazy val trailer = ("(" ␣> optArgs <␣ ")" | "[" ␣> subscriptlist <␣ "]" | "." ␣> id) - lazy val subscriptlist: Parser[Any] = listOf(subscript, ",") - lazy val subscript: Parser[Any] = + lazy val subscriptlist = listOf(subscript, ",") + lazy val subscript = test | spacedOpt(test) ~ ":" ~ spacedOpt(test) ~ spacedOpt( ":" ~> spacedOpt(test) ) - lazy val exprlist: Parser[List[Any]] = listOf(expr | star_expr, ",") - lazy val testlist: Parser[Any] = listOf(test, ",") + lazy val exprlist = listOf(expr | star_expr, ",") + lazy val testlist = listOf(test, ",") - lazy val dictorsetmaker: Parser[Any] = + lazy val dictorsetmaker = ((listOf(test ␣ (":" ␣> test) | "**" ␣> expr, ",") | (test ␣ (":" ␣> test) | "**" ␣> expr) ␣ comp_for) | (listOf(test | star_expr, ",") | (test | star_expr) ␣ comp_for)) - lazy val classdef: Parser[Any] = + lazy val classdef = "class" ␣> (id ~ spacedOpt("(" ␣> optArgs <␣ ")")) ␣ (":" ␣> suite) - lazy val arglist: Parser[List[Any]] = listOf(argument, ",") - lazy val optArgs: Parser[List[Any]] = arglist | succeed(Nil) + lazy val arglist = listOf(argument, ",") + lazy val optArgs = arglist | succeed(Nil) - lazy val argument: Parser[Any] = - (test ~ spacedOpt(comp_for) - | test ␣ "=" ␣ test - | "**" ␣ test - | "*" ␣ test) + lazy val argument = (test ~ spacedOpt(comp_for) + | test ␣ "=" ␣ test + | "**" ␣ test + | "*" ␣ test) - lazy val comp_iter: NT[Any] = comp_for | comp_if + lazy val comp_iter: NT[?] = comp_for | comp_if lazy val comp_for = "for" ␣> exprlist ␣ ("in" ␣> or_test ~ spacedOpt(comp_iter)) lazy val comp_if = "if" ␣> test_nocond ~ spacedOpt(comp_iter) From 6ee3454e6d26152ee4be08e789ed4921163ec735 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 20:59:27 +0100 Subject: [PATCH 59/95] Replace wildcards with 'Any' --- .../main/scala/examples/PythonParsers.scala | 24 +++++++++---------- .../main/scala/examples/paper/Section4.scala | 10 ++++---- artifact/src/test/scala/LeftrecTests.scala | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index ae6d8c2..6718f49 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -249,7 +249,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val vfpdef = id // --- Statements --- - lazy val stmt: NT[?] = simple_stmt | compound_stmt + lazy val stmt: NT[Any] = simple_stmt | compound_stmt lazy val simple_stmt = listOf(small_stmt, ";") <␣ NL ^^ this.Simple.apply lazy val small_stmt = @@ -341,27 +341,27 @@ trait PythonParsers extends PythonLexemes, PythonAst { (or_test ~ spacedOpt("if" ␣> or_test ␣ ("else" ␣> test)) | lambdef) lazy val test_nocond: NT[Any] = or_test | lambdef_nocond - lazy val lambdef: NT[?] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test) - lazy val lambdef_nocond: NT[?] = + lazy val lambdef: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test) + lazy val lambdef_nocond: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) lazy val or_test: NT[Any] = someSep(and_test, "or") - lazy val and_test: NT[?] = someSep(not_test, "and") + lazy val and_test: NT[Any] = someSep(not_test, "and") lazy val not_test: NT[Any] = "not" ␣> not_test | comparison - lazy val comparison: NT[?] = someSep(expr, comp_op) + lazy val comparison: NT[Any] = someSep(expr, comp_op) // # <> isn't actually a valid comparison operator in Python. It's here for the // # sake of a __future__ import described in PEP 401 (which really works :-) lazy val comp_op = ("<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" | "in" | "not" ␣ "in" | "is" | "is" ␣ "not") lazy val expr: NT[Any] = binOp(xor_expr, "|", this.BinOp.apply) - lazy val xor_expr: NT[?] = binOp(and_expr, "^", this.BinOp.apply) - lazy val and_expr: NT[?] = binOp(shift_expr, "&", this.BinOp.apply) - lazy val shift_expr: NT[?] = binOp(arith_expr, "<<" | ">>", this.BinOp.apply) - lazy val arith_expr: NT[?] = binOp(term, "+" | "-", this.BinOp.apply) - lazy val term: NT[?] = + lazy val xor_expr: NT[Any] = binOp(and_expr, "^", this.BinOp.apply) + lazy val and_expr: NT[Any] = binOp(shift_expr, "&", this.BinOp.apply) + lazy val shift_expr: NT[Any] = binOp(arith_expr, "<<" | ">>", this.BinOp.apply) + lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", this.BinOp.apply) + lazy val term: NT[Any] = binOp(factor, "*" | "@" | "/" | "%" | "//", this.BinOp.apply) lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power - lazy val power: NT[?] = atom_expr | atom_expr ␣ "**" ␣ factor + lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor lazy val atom_expr = opt("await" ~ spaces) ~> atom ~ spacedMany(trailer) lazy val atom = ("(" ␣> (yield_expr | testlist_comp) <␣ ")" @@ -404,7 +404,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { | "**" ␣ test | "*" ␣ test) - lazy val comp_iter: NT[?] = comp_for | comp_if + lazy val comp_iter: NT[Any] = comp_for | comp_if lazy val comp_for = "for" ␣> exprlist ␣ ("in" ␣> or_test ~ spacedOpt(comp_iter)) lazy val comp_if = "if" ␣> test_nocond ~ spacedOpt(comp_iter) diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index f985053..873ea31 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -21,12 +21,12 @@ trait Section4 { self: Section3 & RichParsers => // very simplified grammar to illustrate parser selection import section_3_5_improved._ - lazy val stmt: NT[?] = + lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block | some('x') ~ '\n') lazy val stmts = many(stmt) - lazy val block: NT[?] = '\n' ~ indented(stmts) + lazy val block: NT[Any] = '\n' ~ indented(stmts) // ### Example: Retroactive selection of the while statement nonterminal // @@ -78,7 +78,7 @@ trait Section4 { self: Section3 & RichParsers => | eat { c => inText(text << c, code) }) // Simple variant of balanced parenthesis - lazy val parens: NT[?] = '(' ~ parens ~ ')' | succeed(()) + lazy val parens: NT[Any] = '(' ~ parens ~ ')' | succeed(()) // Blocks of "a"s, such as: // @@ -179,8 +179,8 @@ trait Section4 { self: Section3 & RichParsers => // |~~~ | // |aaaa| // +----+ - lazy val combined: NT[?] = inText(asAndTables, spaced(parens)) - lazy val asAndTables: NT[?] = as | table(combined) + lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) + lazy val asAndTables: NT[Any] = as | table(combined) // Again, some more examples of words that are recognized by `combined` can // be found in `DerivativeParsersTests.scala`. diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index b65d0e7..e794ab3 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -143,7 +143,7 @@ trait LeftrecTests { // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[?] = 'd' | succ("done") + lazy val B: NT[Any] = 'd' | succ("done") A `shouldParse` "c" A `shouldParse` "cb" From c2b46a3a62f93d0a7d66de0912b6171372af37c8 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 21:01:02 +0100 Subject: [PATCH 60/95] Format Leftrec tests --- artifact/src/test/scala/LeftrecTests.scala | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index e794ab3..7c3a152 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -143,7 +143,7 @@ trait LeftrecTests { // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[Any] = 'd' | succ("done") + lazy val B: NT[Any] = 'd' | succ("done") A `shouldParse` "c" A `shouldParse` "cb" @@ -202,21 +202,13 @@ trait LeftrecTests { (literal ~ '+' | condExpr) - lazy val condExpr: NT[Any] = - (condExpr ~ '?' - | eqExpr) + lazy val condExpr: NT[Any] = condExpr ~ '?' | eqExpr - lazy val eqExpr: NT[Any] = - (eqExpr ~ '*' - | literal) + lazy val eqExpr: NT[Any] = eqExpr ~ '*' | literal - lazy val literal: NT[Any] = - (many('a') - | '[' ~ arrayEl) + lazy val literal: NT[Any] = many('a') | '[' ~ arrayEl - lazy val arrayEl: NT[Any] = - (expression - | succ("undefined")) + lazy val arrayEl: NT[Any] = expression | succ("undefined") expression `shouldParse` "" expression `shouldParse` "a" From 074a17b81f36d9080b4bde9fab147bffc0d0ac96 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 21:09:00 +0100 Subject: [PATCH 61/95] Fix greedySome: Needs NT --- .../src/main/scala/library/DerivedOps.scala | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index dd530c3..b341c90 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -17,12 +17,16 @@ trait DerivedOps { self: Parsers & Syntax => def some[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => + p :: ps + } some_v } def many[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => + p :: ps + } many_v } @@ -59,7 +63,9 @@ trait DerivedOps { self: Parsers & Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => + p :: ps + } some_v } @@ -175,11 +181,10 @@ trait DerivedOps { self: Parsers & Syntax => } // Greedy repetition - def greedyMany[T](p: Parser[T]): Parser[List[T]] = - greedySome(p) | succeed(Nil) + def greedyMany[T](p: Parser[T]) = greedySome(p) | succeed(Nil) // Instead of a class use a closure: - def greedySome[T]: Parser[T] => Parser[List[T]] = { p => + def greedySome[T]: Parser[T] => NT[List[T]] = { p => def withNext(p: Parser[T], ps: Parser[List[T]]): Parser[List[T]] = done(p) ~ ps ^^ { case (t, ts) => t :: ts } From 44faf495e2d31f5cef08eba2d51c25241807f9a7 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 21:15:43 +0100 Subject: [PATCH 62/95] Rename lambda variable --- artifact/src/main/scala/library/Syntax.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 5eb3364..dc91ccd 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -33,7 +33,7 @@ trait Syntax { self: Parsers & DerivedOps => // tag nonterminals - this allows automatic insertion of nt-markers final case class NT[+R](parser: Parser[R]) given [R]: Conversion[NT[R], Parser[R]] = _.parser - given [R]: Conversion[Parser[R], NT[R]] = parser => NT(nonterminal(parser)) + given [R]: Conversion[Parser[R], NT[R]] = p => NT(nonterminal(p)) given tupleSeq3[T1, T2, T3, O] : Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { From 598398ddc906fa63dcb786f73edc39b31c1586aa Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 21:17:00 +0100 Subject: [PATCH 63/95] Add space --- artifact/src/main/scala/library/DerivedOps.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index b341c90..d368fa9 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -22,6 +22,7 @@ trait DerivedOps { self: Parsers & Syntax => } some_v } + def many[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => From 5a041a0ded97ba64d730dad3930296adaedca3cb Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 21:18:58 +0100 Subject: [PATCH 64/95] Remove type annotation for nested function --- artifact/src/main/scala/library/DerivedOps.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index d368fa9..c19d974 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -187,7 +187,7 @@ trait DerivedOps { self: Parsers & Syntax => // Instead of a class use a closure: def greedySome[T]: Parser[T] => NT[List[T]] = { p => - def withNext(p: Parser[T], ps: Parser[List[T]]): Parser[List[T]] = + def withNext(p: Parser[T], ps: Parser[List[T]]) = done(p) ~ ps ^^ { case (t, ts) => t :: ts } def forceRead(curr: Parser[T]): Parser[List[T]] = From 9fd35a8723d59d14a6fd9721e041ddbf2258605f Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 21:26:08 +0100 Subject: [PATCH 65/95] Simplify lambda --- artifact/src/main/scala/library/Syntax.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index dc91ccd..654ceb0 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -27,8 +27,7 @@ trait Syntax { self: Parsers & DerivedOps => given liftToParser[R, U](using conv: R => U - ): Conversion[Parser[R], Parser[U]] = - p => map(p, conv) + ): Conversion[Parser[R], Parser[U]] = map(_, conv) // tag nonterminals - this allows automatic insertion of nt-markers final case class NT[+R](parser: Parser[R]) From 9737777d39960d6a3c048714365072c7b44e9daa Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 21:49:37 +0100 Subject: [PATCH 66/95] Add spaces for readabilty --- artifact/src/main/scala/library/Syntax.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 654ceb0..3becb62 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -38,12 +38,14 @@ trait Syntax { self: Parsers & DerivedOps => : Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { def apply(f: (T1, T2, T3) => O) = { case ((t1, t2), t3) => f(t1, t2, t3) } } + given tupleSeq4[T1, T2, T3, T4, O] : Conversion[(T1, T2, T3, T4) => O, (T1 ~ T2 ~ T3 ~ T4) => O] with { def apply(f: (T1, T2, T3, T4) => O) = { case (((t1, t2), t3), t4) => f(t1, t2, t3, t4) } } + given tupleSeq5[T1, T2, T3, T4, T5, O] : Conversion[(T1, T2, T3, T4, T5) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O] with { @@ -51,6 +53,7 @@ trait Syntax { self: Parsers & DerivedOps => case ((((t1, t2), t3), t4), t5) => f(t1, t2, t3, t4, t5) } } + given tupleSeq6[T1, T2, T3, T4, T5, T6, O]: Conversion[ (T1, T2, T3, T4, T5, T6) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O From dce10181b01b50d2f028f83d5409f4afaf6dc7f2 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 22:05:48 +0100 Subject: [PATCH 67/95] Fix null pointer exception by using lazy val --- .../src/main/scala/library/DerivedOps.scala | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index c19d974..80c413c 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,7 +1,5 @@ package fcd -import scala.language.implicitConversions - trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) @@ -16,19 +14,19 @@ trait DerivedOps { self: Parsers & Syntax => } def some[T](p: Parser[T]): Parser[List[T]] = { - lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => + lazy val many_v: NT[List[T]] = NT(alt(some_v, succeed(Nil))) + lazy val some_v: Parser[List[T]] = seq(p, many_v.parser) ^^ { case (p, ps) => p :: ps } some_v } def many[T](p: Parser[T]): Parser[List[T]] = { - lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => + lazy val many_v: NT[List[T]] = NT(alt(some_v, succeed(Nil))) + lazy val some_v: Parser[List[T]] = seq(p, many_v.parser) ^^ { case (p, ps) => p :: ps } - many_v + many_v.parser } // val always: Parser[Unit] = many(any) map { _ => () } @@ -63,8 +61,8 @@ trait DerivedOps { self: Parsers & Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { - lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => + lazy val many_v: NT[List[T]] = NT(alt(sep ~> some_v, succeed(Nil))) + lazy val some_v: Parser[List[T]] = seq(p, many_v.parser) ^^ { case (p, ps) => p :: ps } some_v @@ -124,7 +122,7 @@ trait DerivedOps { self: Parsers & Syntax => private def mkList[T] = (_: ~[T, List[T]]) match { case (x, xs) => x :: xs } - val succeedForever: NT[Unit] = succeed(()) | (any ~> succeedForever) + lazy val succeedForever: NT[Unit] = NT(succeed(()) | (any ~> succeedForever.parser)) def rightDerivative[R](p: Parser[R], elem: Elem): Parser[R] = done(p << elem) | eat { c => rightDerivative(p << c, elem) } @@ -182,7 +180,7 @@ trait DerivedOps { self: Parsers & Syntax => } // Greedy repetition - def greedyMany[T](p: Parser[T]) = greedySome(p) | succeed(Nil) + def greedyMany[T](p: Parser[T]) = greedySome(p).parser | succeed(Nil) // Instead of a class use a closure: def greedySome[T]: Parser[T] => NT[List[T]] = { p => @@ -192,9 +190,9 @@ trait DerivedOps { self: Parsers & Syntax => def forceRead(curr: Parser[T]): Parser[List[T]] = withNext(curr, succeed(Nil)) | eat { el => - biasedAlt(forceRead(curr << el), withNext(curr, greedySome(p) << el)) + biasedAlt(forceRead(curr << el), withNext(curr, greedySome(p).parser << el)) } - forceRead(p) + NT(forceRead(p)) } } From 0c3ca1477992f97f24261c0e522a8c8eba9cb315 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 22:07:18 +0100 Subject: [PATCH 68/95] Reintroduce implicit conversion --- .../src/main/scala/library/DerivedOps.scala | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 80c413c..46991ca 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,5 +1,7 @@ package fcd +import scala.language.implicitConversions + trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) @@ -14,19 +16,19 @@ trait DerivedOps { self: Parsers & Syntax => } def some[T](p: Parser[T]): Parser[List[T]] = { - lazy val many_v: NT[List[T]] = NT(alt(some_v, succeed(Nil))) - lazy val some_v: Parser[List[T]] = seq(p, many_v.parser) ^^ { case (p, ps) => + lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } some_v } def many[T](p: Parser[T]): Parser[List[T]] = { - lazy val many_v: NT[List[T]] = NT(alt(some_v, succeed(Nil))) - lazy val some_v: Parser[List[T]] = seq(p, many_v.parser) ^^ { case (p, ps) => + lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } - many_v.parser + many_v } // val always: Parser[Unit] = many(any) map { _ => () } @@ -61,8 +63,8 @@ trait DerivedOps { self: Parsers & Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { - lazy val many_v: NT[List[T]] = NT(alt(sep ~> some_v, succeed(Nil))) - lazy val some_v: Parser[List[T]] = seq(p, many_v.parser) ^^ { case (p, ps) => + lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) + lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => p :: ps } some_v @@ -122,7 +124,7 @@ trait DerivedOps { self: Parsers & Syntax => private def mkList[T] = (_: ~[T, List[T]]) match { case (x, xs) => x :: xs } - lazy val succeedForever: NT[Unit] = NT(succeed(()) | (any ~> succeedForever.parser)) + lazy val succeedForever: NT[Unit] = succeed(()) | (any ~> succeedForever) def rightDerivative[R](p: Parser[R], elem: Elem): Parser[R] = done(p << elem) | eat { c => rightDerivative(p << c, elem) } @@ -180,7 +182,7 @@ trait DerivedOps { self: Parsers & Syntax => } // Greedy repetition - def greedyMany[T](p: Parser[T]) = greedySome(p).parser | succeed(Nil) + def greedyMany[T](p: Parser[T]) = greedySome(p) | succeed(Nil) // Instead of a class use a closure: def greedySome[T]: Parser[T] => NT[List[T]] = { p => @@ -190,9 +192,9 @@ trait DerivedOps { self: Parsers & Syntax => def forceRead(curr: Parser[T]): Parser[List[T]] = withNext(curr, succeed(Nil)) | eat { el => - biasedAlt(forceRead(curr << el), withNext(curr, greedySome(p).parser << el)) + biasedAlt(forceRead(curr << el), withNext(curr, greedySome(p) << el)) } - NT(forceRead(p)) + forceRead(p) } } From 76e283fb95aa11360bbc4f8226fed9ad800e3788 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 1 Dec 2025 22:33:35 +0100 Subject: [PATCH 69/95] Simplify lambda --- artifact/src/main/scala/library/DerivativeParsers.scala | 2 +- artifact/src/main/scala/library/DerivedOps.scala | 2 +- artifact/src/test/scala/PythonParserTests.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 9177148..f580ccb 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -343,7 +343,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // optimization: Once p accepts, p as a prefix will always accept. // often used to implement biased choice: (not(prefix(p)) &> q - override def prefix: Parser[Any] => Parser[Unit] = p => p.prefix + override def prefix: Parser[Any] => Parser[Unit] = _.prefix } object DerivativeParsers extends RichParsers with DerivativeParsers { diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 46991ca..ebc9150 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -141,7 +141,7 @@ trait DerivedOps { self: Parsers & Syntax => // consumed(p) >> { in => q <<< in } // some extension point for optimization - def prefix: Parser[Any] => Parser[Unit] = p => p ~> always + def prefix: Parser[Any] => Parser[Unit] = _ ~> always // per-element action performed on p def rep[T](f: Elem => Parser[T] => Parser[T]) = diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index 287cf55..04c1c92 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -30,7 +30,7 @@ class PythonParserTests describe("implicit line joining") { given keyword: Conversion[Symbol, Lexeme] = kw => KW(kw.name) - given punctuation: Conversion[String, Lexeme] = p => Punct(p) + given punctuation: Conversion[String, Lexeme] = Punct(_) val p = many(WS | id | "(" | ")" | "[" | "]") val a = Id("A") From 6d4b1d92bb109636b3a0c70fa5e420f5f4e3796a Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 2 Dec 2025 19:24:43 +0100 Subject: [PATCH 70/95] Use implicit definition to be able to use cbn for NT conversion --- artifact/src/main/scala/library/Syntax.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 3becb62..9ff53a8 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -32,7 +32,9 @@ trait Syntax { self: Parsers & DerivedOps => // tag nonterminals - this allows automatic insertion of nt-markers final case class NT[+R](parser: Parser[R]) given [R]: Conversion[NT[R], Parser[R]] = _.parser - given [R]: Conversion[Parser[R], NT[R]] = p => NT(nonterminal(p)) + + import scala.language.implicitConversions + implicit def toNT[R](parser: => Parser[R]): NT[R] = NT(nonterminal(parser)) given tupleSeq3[T1, T2, T3, O] : Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { From e2349d6acbc900bdf9dbc1e36afa601972e50cba Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 2 Dec 2025 20:57:42 +0100 Subject: [PATCH 71/95] Ignore test output 'test.png' --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 04aebfb..f634167 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,6 @@ project .bsp .bloop .scalafmt.conf + +# Testing +test.png From 5c0fc35111911e58ea69e604bc6105d8e5b5baf2 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 2 Dec 2025 21:55:01 +0100 Subject: [PATCH 72/95] Remove with keywords --- artifact/src/main/scala/library/CharSyntax.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/artifact/src/main/scala/library/CharSyntax.scala b/artifact/src/main/scala/library/CharSyntax.scala index b322220..356cc33 100644 --- a/artifact/src/main/scala/library/CharSyntax.scala +++ b/artifact/src/main/scala/library/CharSyntax.scala @@ -24,10 +24,10 @@ trait CharSyntax { self: Parsers & DerivedOps & Syntax => sealed trait Stringable[T] { def apply: T => String } - given Stringable[Char] with { def apply = _.toString } - given Stringable[List[Char]] with { def apply = _.mkString } - given Stringable[String] with { def apply = identity } - given stringList: Stringable[List[String]] with { def apply = _.mkString } + given Stringable[Char] { def apply = _.toString } + given Stringable[List[Char]] { def apply = _.mkString } + given Stringable[String] { def apply = identity } + given stringList: Stringable[List[String]] { def apply = _.mkString } given [T, U](using st: Stringable[T], su: Stringable[U]): Stringable[(T, U)] with { def apply = { case (l, r) => st.apply(l) ++ su.apply(r) } From 1c0881c117925114af1f960f7ac311a91aaf6af2 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 2 Dec 2025 22:33:45 +0100 Subject: [PATCH 73/95] Use round parentheses --- artifact/src/main/scala/library/CharSyntax.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/artifact/src/main/scala/library/CharSyntax.scala b/artifact/src/main/scala/library/CharSyntax.scala index 356cc33..d42dfc2 100644 --- a/artifact/src/main/scala/library/CharSyntax.scala +++ b/artifact/src/main/scala/library/CharSyntax.scala @@ -16,7 +16,7 @@ trait CharSyntax { self: Parsers & DerivedOps & Syntax => val spaces = many(space) val newline = acceptIf(_ == '\n') - def charRange(from: Char, to: Char) = acceptIf { c => c >= from && c <= to } + def charRange(from: Char, to: Char) = acceptIf(c => c >= from && c <= to) val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') @@ -36,8 +36,7 @@ trait CharSyntax { self: Parsers & DerivedOps & Syntax => given Conversion[String, Parser[String]] = string given Conversion[List[Char], String] = _.mkString - given [T](using st: Stringable[T]): Conversion[Parser[T], Parser[String]] = - p => p ^^ st.apply + given [T](using st: Stringable[T]): Conversion[Parser[T], Parser[String]] = _ ^^ st.apply given Conversion[Char, Parser[Char]] = accept From 0ba4b0a2c8abc54d48c592d3dfd511e75e0ad0d0 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 2 Dec 2025 22:46:41 +0100 Subject: [PATCH 74/95] Simplify some expressions --- .../src/main/scala/examples/PythonParsers.scala | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 6718f49..753452c 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -101,11 +101,10 @@ trait PythonParsers extends PythonLexemes, PythonAst { p: Elem => Boolean, thn: Elem => Parser[T], els: Elem => Parser[T] - ) = - eat { c => if (p(c)) thn(c) else els(c) } + ) = eat { c => if (p(c)) thn(c) else els(c) } // Simply preprocesses the input stream and strips out comments - def stripComments[T]: Parser[T] => Parser[T] = { p => + def stripComments[T](p: Parser[T]): Parser[T] = { lazy val stripped: Parser[T] = done(p) | switch(isComment, _ => stripped, c => stripComments(p << c)) stripped @@ -121,8 +120,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { val (opening, closing) = (pairs.keys, pairs.values) - def enclosed[T]: (=> Parser[T]) => Parser[T] = - p => oneOf(opening) >> { o => p <~ pairs(o) } + def enclosed[T](p: => Parser[T]) = oneOf(opening) >> { o => p <~ pairs(o) } // non empty Dyck language on these pairs lazy val dyck: Parser[Any] = enclosed(many(dyck)) @@ -280,9 +278,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val raise_stmt = "raise" ~> spacedOpt(test ~ spacedOpt("from" ␣ test)) ^^ this.Raise.apply lazy val import_stmt = import_name | import_from - lazy val import_name = "import" ␣> dotted_as_names ^^ { n => - Import(n) - } + lazy val import_name = "import" ␣> dotted_as_names ^^ { Import(_) } // # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS lazy val import_from = @@ -356,7 +352,8 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val expr: NT[Any] = binOp(xor_expr, "|", this.BinOp.apply) lazy val xor_expr: NT[Any] = binOp(and_expr, "^", this.BinOp.apply) lazy val and_expr: NT[Any] = binOp(shift_expr, "&", this.BinOp.apply) - lazy val shift_expr: NT[Any] = binOp(arith_expr, "<<" | ">>", this.BinOp.apply) + lazy val shift_expr: NT[Any] = + binOp(arith_expr, "<<" | ">>", this.BinOp.apply) lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", this.BinOp.apply) lazy val term: NT[Any] = binOp(factor, "*" | "@" | "/" | "%" | "//", this.BinOp.apply) From 7221b535df97c0f2a6bec07a5a15de48820da328 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 3 Dec 2025 05:07:40 +0100 Subject: [PATCH 75/95] Use enum to for the many case classes --- .../src/main/scala/examples/PythonAst.scala | 37 +++++++++-------- .../main/scala/examples/PythonParsers.scala | 40 ++++++++++--------- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonAst.scala b/artifact/src/main/scala/examples/PythonAst.scala index c8779d9..7bcb6c9 100644 --- a/artifact/src/main/scala/examples/PythonAst.scala +++ b/artifact/src/main/scala/examples/PythonAst.scala @@ -9,25 +9,24 @@ trait PythonAst { case class Decorated(decorators: Seq[Decorator], el: Any) extends Tree trait Def extends Tree - case class FuncDef(name: Any, params: Any, retAnnot: Option[Any], body: Any) extends Def - - trait Stmt extends Tree - case class Simple(small: Seq[Any]) extends Stmt - - case class Del(exprs: Seq[Any]) extends Stmt - case object Pass extends Stmt - case object Break extends Stmt - case object Continue extends Stmt - case class Return(expr: Option[Any]) extends Stmt - case class Raise(expr: Option[Any]) extends Stmt - case class ExprStmt(expr: Any) extends Stmt - case class Import(names: Any, from: Option[Any] = None) extends Stmt - - case class Global(ids: Seq[Any]) extends Stmt - case class Nonlocal(ids: Seq[Any]) extends Stmt - case class Assert(tests: Seq[Any]) extends Stmt - - case class For(exprs: Seq[Any], in: Any, body: Any, default: Any) extends Stmt + case class FuncDef(name: Any, params: Any, retAnnot: Option[Any], body: Any) + extends Def + + enum Stmt extends Tree { + case Simple(small: Seq[Any]) + case Del(exprs: Seq[Any]) + case Pass + case Break + case Continue + case Return(expr: Option[Any]) + case Raise(expr: Option[Any]) + case ExprStmt(expr: Any) + case Import(names: Any, from: Option[Any] = None) + case Global(ids: Seq[Any]) + case Nonlocal(ids: Seq[Any]) + case Assert(tests: Seq[Any]) + case For(exprs: Seq[Any], in: Any, body: Any, default: Any) + } trait Expr extends Tree case class BinOp(l: Any, op: Any, r: Any) extends Expr diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 753452c..9b7f86f 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -94,6 +94,8 @@ trait PythonLexemes { self: Parsers & DerivedOps & Syntax => trait PythonParsers extends PythonLexemes, PythonAst { self: Parsers & Syntax & DerivedOps => + import Stmt._ + // general toolbox def no(els: Elem*): Parser[Elem] = acceptIf(el => !(els contains el)) def no(els: Iterable[Elem]): Parser[Elem] = no(els.toSeq*) @@ -210,22 +212,22 @@ trait PythonParsers extends PythonLexemes, PythonAst { // --- Python Grammar --- // see: https://docs.python.org/3.5/reference/grammar.html lazy val file_input: NT[Program] = - emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ this.Program.apply + emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ Program.apply lazy val decorator: Parser[Decorator] = "@" ~> dotted_name ~ ("(" ~> optArgs <~ ")" | succeed( Nil - )) <~ NL ^^ this.Decorator.apply + )) <~ NL ^^ Decorator.apply lazy val decorators: Parser[List[Decorator]] = some(decorator) lazy val decorated: Parser[Decorated] = - decorators ~ (classdef | funcdef | async_funcdef) ^^ this.Decorated.apply + decorators ~ (classdef | funcdef | async_funcdef) ^^ Decorated.apply // --- Functions --- lazy val async_funcdef: Parser[FuncDef] = "async" ␣> funcdef lazy val funcdef: Parser[FuncDef] = "def" ␣> (id ␣ parameters ~ spacedOpt( "->" ␣> test - )) ␣ (":" ␣> suite) ^^ this.FuncDef.apply + )) ␣ (":" ␣> suite) ^^ FuncDef.apply lazy val parameters = "(" ~> spacedOpt(typedargslist) <␣ ")" @@ -249,7 +251,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { // --- Statements --- lazy val stmt: NT[Any] = simple_stmt | compound_stmt lazy val simple_stmt = - listOf(small_stmt, ";") <␣ NL ^^ this.Simple.apply + listOf(small_stmt, ";") <␣ NL ^^ Simple.apply lazy val small_stmt = (expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt @@ -260,23 +262,23 @@ trait PythonParsers extends PythonLexemes, PythonAst { | testlist_star_expr ␣ augassign ␣ (yield_expr | testlist) | testlist_star_expr ~ some( spaces ~> "=" ␣> (yield_expr | testlist_star_expr) - )) ^^ this.ExprStmt.apply + )) ^^ ExprStmt.apply lazy val testlist_star_expr = listOf(test | star_expr, ",") lazy val augassign = ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=") - lazy val del_stmt = "del" ␣> exprlist ^^ this.Del.apply + lazy val del_stmt = "del" ␣> exprlist ^^ Del.apply lazy val pass_stmt = "pass" ^^^ Pass lazy val flow_stmt = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt lazy val break_stmt = "break" ^^^ Break lazy val continue_stmt = "continue" ^^^ Continue - lazy val return_stmt = "return" ~> spacedOpt(testlist) ^^ this.Return.apply - lazy val yield_stmt = yield_expr ^^ this.ExprStmt.apply + lazy val return_stmt = "return" ~> spacedOpt(testlist) ^^ Return.apply + lazy val yield_stmt = yield_expr ^^ ExprStmt.apply lazy val raise_stmt = - "raise" ~> spacedOpt(test ~ spacedOpt("from" ␣ test)) ^^ this.Raise.apply + "raise" ~> spacedOpt(test ~ spacedOpt("from" ␣ test)) ^^ Raise.apply lazy val import_stmt = import_name | import_from lazy val import_name = "import" ␣> dotted_as_names ^^ { Import(_) } @@ -296,9 +298,9 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val dotted_as_names = someSep(dotted_as_name, ",") lazy val dotted_name = someSep(id, ".") - lazy val global_stmt = "global" ␣> someSep(id, ",") ^^ this.Global.apply - lazy val nonlocal_stmt = "nonlocal" ␣> someSep(id, ",") ^^ this.Nonlocal.apply - lazy val assert_stmt = "assert" ␣> someSep(test, ",") ^^ this.Assert.apply + lazy val global_stmt = "global" ␣> someSep(id, ",") ^^ Global.apply + lazy val nonlocal_stmt = "nonlocal" ␣> someSep(id, ",") ^^ Nonlocal.apply + lazy val assert_stmt = "assert" ␣> someSep(test, ",") ^^ Assert.apply lazy val compound_stmt = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt @@ -349,14 +351,14 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val comp_op = ("<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" | "in" | "not" ␣ "in" | "is" | "is" ␣ "not") - lazy val expr: NT[Any] = binOp(xor_expr, "|", this.BinOp.apply) - lazy val xor_expr: NT[Any] = binOp(and_expr, "^", this.BinOp.apply) - lazy val and_expr: NT[Any] = binOp(shift_expr, "&", this.BinOp.apply) + lazy val expr: NT[Any] = binOp(xor_expr, "|", BinOp.apply) + lazy val xor_expr: NT[Any] = binOp(and_expr, "^", BinOp.apply) + lazy val and_expr: NT[Any] = binOp(shift_expr, "&", BinOp.apply) lazy val shift_expr: NT[Any] = - binOp(arith_expr, "<<" | ">>", this.BinOp.apply) - lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", this.BinOp.apply) + binOp(arith_expr, "<<" | ">>", BinOp.apply) + lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", BinOp.apply) lazy val term: NT[Any] = - binOp(factor, "*" | "@" | "/" | "%" | "//", this.BinOp.apply) + binOp(factor, "*" | "@" | "/" | "%" | "//", BinOp.apply) lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor lazy val atom_expr = From e38bcaa22506e338fafd2767b1f27780ecdc79cb Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 3 Dec 2025 05:35:44 +0100 Subject: [PATCH 76/95] Encapsulate and simplify --- artifact/src/main/scala/library/Attributed.scala | 6 +++--- artifact/src/main/scala/library/DerivativeParsers.scala | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/artifact/src/main/scala/library/Attributed.scala b/artifact/src/main/scala/library/Attributed.scala index 96c0381..6c66055 100644 --- a/artifact/src/main/scala/library/Attributed.scala +++ b/artifact/src/main/scala/library/Attributed.scala @@ -74,7 +74,7 @@ trait Attributed { (2) It's been manually set (this.fixed); or (3) It needs to be computed (generation < FixedPoint.generation). */ - if (fixed || stabilized || (generation == FixedPoint.generation)) + if (fixed || stabilized || generation == FixedPoint.generation) return currentValue fix() @@ -84,8 +84,8 @@ trait Attributed { } // Subsumption tests for attributes: - protected def implies(a: Boolean, b: Boolean) = (!a) || b - protected def follows(a: Boolean, b: Boolean) = (!b) || a + protected def implies(a: Boolean, b: Boolean) = !a || b + protected def follows(a: Boolean, b: Boolean) = !b || a protected def updateAttributes(): Unit private def fix() = { diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index f580ccb..23696da 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -243,7 +243,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // This separation into two fixed points is essential to // prevent excessive recomputation. - protected object propertiesFix extends Attributed { + private object propertiesFix extends Attributed { object nullable extends Attribute[Boolean](false, _ || _, implies) object empty extends Attribute[Boolean](true, _ && _, follows) @@ -256,7 +256,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => } } - protected object resultsFix extends Attributed { + private object resultsFix extends Attributed { object results extends Attribute[List[R]]( List(), From dc0cb685679fe8a5950616a26b48ba69d990bbf9 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 3 Dec 2025 10:12:06 +0100 Subject: [PATCH 77/95] Add scalafmt file for all to see --- .gitignore | 1 - .scalafmt.conf | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 .scalafmt.conf diff --git a/.gitignore b/.gitignore index f634167..6a06f28 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,6 @@ project .metals .bsp .bloop -.scalafmt.conf # Testing test.png diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 0000000..8134e97 --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,2 @@ +version = "3.7.15" +runner.dialect = scala3 \ No newline at end of file From ef039aec497f7e6e39c1f44fffa95a7604e7f126 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 3 Dec 2025 23:25:59 +0100 Subject: [PATCH 78/95] Update Vagrantfile --- Vagrantfile | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 293a4da..b4805af 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -15,22 +15,21 @@ Vagrant.configure(2) do |config| config.vm.provision "shell", inline: <<-SHELL - # Refresh sources + # Refresh system sudo apt-get update -y + sudo apt-get upgrade -y # Graphviz for printing parsers to graphs sudo apt-get install -y graphviz - # Java - sudo apt-get install -y openjdk-7-jdk - # Sbt - sudo mkdir -p /home/vagrant/bin - pushd /home/vagrant/bin/ - sudo wget https://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/0.13.8/sbt-launch.jar - sudo cp /home/vagrant/configs/sbt.sh /home/vagrant/bin/sbt - sudo chmod u+x /home/vagrant/bin/sbt - sudo chmod +x /home/vagrant/bin/sbt + sudo apt-get install -y apt-transport-https curl gnupg + echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list + echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list + curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo -H gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import + sudo chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg + sudo apt-get update -y + sudo apt-get install -y sbt popd SHELL From c466f16224ab2a9a98b3f4d29a8e45951c371056 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 3 Dec 2025 23:49:36 +0100 Subject: [PATCH 79/95] Format NegationTests --- artifact/src/test/scala/NegationTests.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 9c26e32..ba9e496 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -7,7 +7,7 @@ import org.scalatest.funspec.AnyFunSpec trait NegationTests { self: AnyFunSpec & CustomMatchers[RichParsers] => - import parsers.{ not as neg, * } + import parsers.{not as neg, *} describe("parser \"not(aa)\"") { val p = neg("aa") From 22f0216eff8e62c9cefcf3f6444d3c3bd4fb8b61 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 3 Dec 2025 23:50:27 +0100 Subject: [PATCH 80/95] Avoid warning by replacing '~' with ',' --- artifact/src/test/scala/LeftrecTests.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 7c3a152..b0f3f8a 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -228,13 +228,13 @@ trait LeftrecTests { import Term._ lazy val term: NT[Term] = - (term ~ "+" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | term ~ "-" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } + (term ~ "+" ~ fact ^^ { case ((l, op), r) => BinOp(l, op, r) } + | term ~ "-" ~ fact ^^ { case ((l, op), r) => BinOp(l, op, r) } | fact) lazy val fact: NT[Term] = - (fact ~ "*" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | fact ~ "/" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } + (fact ~ "*" ~ num ^^ { case ((l, op), r) => BinOp(l, op, r) } + | fact ~ "/" ~ num ^^ { case ((l, op), r) => BinOp(l, op, r) } | num) lazy val num: Parser[Num] = some(digit) ^^ (ns => Num(ns.mkString.toInt)) From 8b065267e1b735b10d10216e7869b54da97fcbf4 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 3 Dec 2025 23:52:42 +0100 Subject: [PATCH 81/95] Update formatting settings --- .scalafmt.conf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index 8134e97..0577f2a 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,2 +1,5 @@ version = "3.7.15" -runner.dialect = scala3 \ No newline at end of file +runner.dialect = scala3 + +assumeStandardLibraryStripMargin = true +align.stripMargin = true \ No newline at end of file From 3da8beac87bf6b5344429d27ea8ce725d1cac32d Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 00:07:06 +0100 Subject: [PATCH 82/95] Fix formatting DerivativeParsers, multiline strings --- .../test/scala/DerivativeParsersTests.scala | 277 +++++++++--------- 1 file changed, 142 insertions(+), 135 deletions(-) diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index 12899fd..1ecc2dc 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -76,20 +76,18 @@ class DerivativeParsersTests lazy val xs = many(some('x') ~ '\n') - table(xs) `shouldParse` """+---+ - ^|xxx| - ^+---+ - ^""".stripMargin('^') - - table(xs) `shouldParse` """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') + table(xs) `shouldParse` "+---+\n|xxx|\n+---+\n" + + table(xs) `shouldParse` + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') } describe("Table parser with delegation") { @@ -122,52 +120,57 @@ class DerivativeParsersTests lazy val xs = many(some('x') ~ '\n') - table(xs) `shouldParse` """+---+ - ^|xxx| - ^+---+ - ^""".stripMargin('^') - - table(xs) `shouldParse` """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - - table(xs) `shouldNotParse` """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---x--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') + table(xs) `shouldParse` + """+---+ + ^|xxx| + ^+---+ + ^""".stripMargin('^') + + table(xs) `shouldParse` + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') + + table(xs) `shouldNotParse` + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---x--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') lazy val nestedTables: NT[Any] = table(xs | nestedTables) - nestedTables `shouldParse` """+---+--------+------------+ - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^|xxx||x|xxxx||xxxxxxxxxxxx| - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - - nestedTables `shouldNotParse` """+---+--------+------------+ - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^|xxx||x|oxxx||xxxxxxxxxxxx| - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') + nestedTables `shouldParse` + """+---+--------+------------+ + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^|xxx||x|xxxx||xxxxxxxxxxxx| + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') + + nestedTables `shouldNotParse` + """+---+--------+------------+ + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^|xxx||x|oxxx||xxxxxxxxxxxx| + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') // helper that should be in the stdlib def zipWith[A, B](l1: List[A => B], l2: List[A]): List[B] = @@ -293,24 +296,25 @@ class DerivativeParsersTests indent(xs) `shouldParse` " xx\n" indent(xs) `shouldParse` " xxxxx\n" indent(xs) `shouldParse` " xxxxx\n xxxxxxx\n" - indent(xs) `shouldParse` """ xxxxx - | xxxxxxx - | xxxxxxxx - | xxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxxxx - | xxxxxxxxxxxx - | xxxxxxxxxxxxx - | xxxxxxxxxxxxxx - | xxxxxxxxxxxxxxx - | xxxxxxxxxxxxxxxx - | xxxxxxxxxxxxxxx - | xxxxxxxxxxxxxx - |""".stripMargin('|') + indent(xs) `shouldParse` + """ xxxxx + | xxxxxxx + | xxxxxxxx + | xxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxxxx + | xxxxxxxxxxxx + | xxxxxxxxxxxxx + | xxxxxxxxxxxxxx + | xxxxxxxxxxxxxxx + | xxxxxxxxxxxxxxxx + | xxxxxxxxxxxxxxx + | xxxxxxxxxxxxxx + |""".stripMargin('|') indent(indent(xs)) `shouldParse` " xx\n" indent(indent(xs)) `shouldParse` " xxxxx\n" @@ -351,32 +355,35 @@ class DerivativeParsersTests as `shouldParse` "aa\naa\n" both `shouldParse` "a\n" - both `shouldParse` """aaa - |~~~ - |() - |~~~ - |aaaaa - |""".stripMargin('|') + both `shouldParse` + """aaa + |~~~ + |() + |~~~ + |aaaaa + |""".stripMargin('|') both `shouldParse` "a \n\n~~~ \n()\n~~~\naaa\n" - both `shouldNotParse` """aaa - |~~~ - |( - |~~~ - |aaaaa - |""".stripMargin('|') - - both `shouldParse` """aaa - |~~~ - |((()) - |~~~ - |aaaaa - | - |~~~ - |) - |~~~ - |""".stripMargin('|') + both `shouldNotParse` + """aaa + |~~~ + |( + |~~~ + |aaaaa + |""".stripMargin('|') + + both `shouldParse` + """aaa + |~~~ + |((()) + |~~~ + |aaaaa + | + |~~~ + |) + |~~~ + |""".stripMargin('|') } describe("Unescape") { @@ -390,42 +397,34 @@ class DerivativeParsersTests describe("Combined examples") { import section_4_2._ - combined `shouldParse` """aaa - ^""".stripMargin('^') - - combined `shouldParse` """+----+ - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined `shouldParse` """+----+ - ^|aa | - ^+----+ - ^""".stripMargin('^') - - combined `shouldParse` """+----+ - ^|aaaa| - ^|~~~ | - ^|(())| - ^|~~~ | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined `shouldParse` """+----+ - ^|aa | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined `shouldParse` """+----+ - ^|aa | - ^|~~~ | - ^|(())| - ^|~~~ | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') + combined `shouldParse` + """aaa + ^""".stripMargin('^') + + combined `shouldParse` "+----+\n|aaaa|\n+----+\n" + combined `shouldParse` "+----+\n|aa |\n+----+\n" + + combined `shouldParse` + """+----+ + ^|aaaa| + ^|~~~ | + ^|(())| + ^|~~~ | + ^|aaaa| + ^+----+ + ^""".stripMargin('^') + + combined `shouldParse` "+----+\n|aa |\n|aaaa|\n+----+\n" + + combined `shouldParse` + """+----+ + ^|aa | + ^|~~~ | + ^|(())| + ^|~~~ | + ^|aaaa| + ^+----+ + ^""".stripMargin('^') } @@ -593,7 +592,15 @@ class DerivativeParsersTests ) } // here we can already observe performance problems (about 400ms): - p `shouldParse` "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n some content that is not a program, but could be one \n. # ''' some comment \nIt contains newlines \n, \"and some Strings\". Even Multiline strings with '''newlines\n'''." + p `shouldParse` + """hello '''foo + |"bar''' test + | foo " bar'''foo " + | some content that is not a program, but could be one + |. # ''' some comment + |It contains newlines + |, "and some Strings". Even Multiline strings with '''newlines + |'''.""".stripMargin lazy val noText: Parser[Any] = comment | singleString | multilineString From 7a79411183540cb8d4a46a71ef29cacd4e753723 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 00:30:09 +0100 Subject: [PATCH 83/95] Fix most of the broken formatting --- .../src/test/scala/PythonParserTests.scala | 282 +++--------------- 1 file changed, 47 insertions(+), 235 deletions(-) diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index 04c1c92..d46f060 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -13,18 +13,10 @@ class PythonParserTests import Lexeme._ describe("indented python parser (lexeme based)") { - indented(many(many(Id("A")) <~ NL)) `shouldParseWith` (List( - WS, - WS, - Id("A"), - Id("A"), - NL, - WS, - WS, - Id("A"), - NL - ), - List(List(Id("A"), Id("A")), List(Id("A")))) + indented(many(many(Id("A")) <~ NL)) `shouldParseWith` ( + List(WS, WS, Id("A"), Id("A"), NL, WS, WS, Id("A"), NL), + List(List(Id("A"), Id("A")), List(Id("A"))) + ) } describe("implicit line joining") { @@ -38,87 +30,20 @@ class PythonParserTests dyck `shouldParse` List[Lexeme]("(", "(", ")", ")") dyck `shouldNotParse` List[Lexeme]("(", "(", ")") - extDyck `shouldParse` List[Lexeme]("(", a, "(", a, NL, a, ")", a, ")") - extDyck `shouldNotParse` List[Lexeme]( - a, - "(", - a, - "(", - a, - NL, - a, - ")", - a, - ")", - a - ) + extDyck `shouldParse` List("(", a, "(", a, NL, a, ")", a, ")") + extDyck `shouldNotParse` List(a, "(", a, "(", a, NL, a, ")", a, ")", a) - implicitJoin(p) `shouldParse` List[Lexeme](a, a, a, a, a) - implicitJoin(p) `shouldNotParse` List[Lexeme](a, a, a, NL, a, a) - implicitJoin(p) `shouldParse` List[Lexeme](a, a, "(", a, NL, a, ")", a) - implicitJoin(p) `shouldNotParse` List[Lexeme](a, a, "(", a, NL, a, a) - implicitJoin(p) `shouldNotParse` List[Lexeme]( - a, - a, - "(", - a, - "(", - NL, - a, - ")", - a - ) - implicitJoin(p) `shouldParse` List[Lexeme]( - a, - a, - "(", - a, - "(", - NL, - a, - ")", - ")", - a - ) - implicitJoin(p) `shouldParse` List[Lexeme]( - a, - a, - "(", - a, - "[", - NL, - a, - "]", - ")", - a - ) - implicitJoin(p) `shouldNotParse` List[Lexeme]( - a, - a, - "(", - a, - "[", - NL, - a, - ")", - "]", - a - ) + implicitJoin(p) `shouldParse` List(a, a, a, a, a) + implicitJoin(p) `shouldNotParse` List(a, a, a, NL, a, a) + implicitJoin(p) `shouldParse` List(a, a, "(", a, NL, a, ")", a) + implicitJoin(p) `shouldNotParse` List(a, a, "(", a, NL, a, a) + implicitJoin(p) `shouldNotParse` List(a, a, "(", a, "(", NL, a, ")", a) + implicitJoin(p) `shouldParse` List(a, a, "(", a, "(", NL, a, ")", ")", a) + implicitJoin(p) `shouldParse` List(a, a, "(", a, "[", NL, a, "]", ")", a) + implicitJoin(p) `shouldNotParse` List(a, a, "(", a, "[", NL, a, ")", "]", a) - explicitJoin(p) `shouldParse` List[Lexeme](a, a, a, BS, NL, a, a) - explicitJoin(p) `shouldParse` List[Lexeme]( - a, - a, - a, - BS, - NL, - a, - a, - BS, - NL, - a, - a - ) + explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a) + explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a, BS, NL, a, a) val input = List[Lexeme]( a, @@ -168,15 +93,13 @@ class PythonParserTests val inputWithoutExplicit = List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, NL, a, "]", ")", a) - val inputResult = - List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, a, "]", ")", a) + val inputResult = List(a, NL, a, a, a, "(", a, "[", a, a, a, "]", ")", a) val collect = consumed(many(any)) stripComments(collect) `shouldParseWith` (input, inputWithoutComments) - explicitJoin( - collect - ) `shouldParseWith` (inputWithoutComments, inputWithoutExplicit) + explicitJoin(collect) `shouldParseWith` + (inputWithoutComments, inputWithoutExplicit) implicitJoin(collect) `shouldParseWith` (inputWithoutExplicit, inputResult) preprocess(file_input) `shouldParse` List[Lexeme]( @@ -277,7 +200,6 @@ class PythonParserTests ) parse(preprocess(collect), sampleProg2) `shouldBe` List(sampleProg) - preprocess(file_input) `shouldParse` sampleProg2 // https://en.wikibooks.org/wiki/Python_Programming/Decorators @@ -475,90 +397,27 @@ class PythonParserTests EOS ) - argument `shouldParse` List[Lexeme]("*", Id("kwargs")) - argument `shouldParse` List[Lexeme]("**", Id("kwargs")) - arglist `shouldParse` List[Lexeme]("**", Id("kwargs2")) - arglist `shouldParse` List[Lexeme](Id("kwargs"), ",", WS, Id("kwargs")) - arglist `shouldParse` List[Lexeme]( - "*", - Id("kwargs"), - ",", - "*", - Id("kwargs") - ) - arglist `shouldParse` List[Lexeme]( - "**", - Id("kwargs"), - ",", - "**", - Id("kwargs") - ) - arglist `shouldParse` List[Lexeme]( - "*", - Id("kwargs"), - ",", - WS, - "*", - Id("kwargs") - ) - arglist `shouldParse` List[Lexeme]( - "**", - Id("kwargs"), - ",", - WS, - "**", - Id("kwargs") - ) + argument `shouldParse` List("*", Id("kwargs")) + argument `shouldParse` List("**", Id("kwargs")) + arglist `shouldParse` List("**", Id("kwargs2")) + arglist `shouldParse` List(Id("kwargs"), ",", WS, Id("kwargs")) + arglist `shouldParse` List("*", Id("kwargs"), ",", "*", Id("kwargs")) + arglist `shouldParse` List("**", Id("kwargs"), ",", "**", Id("kwargs")) + arglist `shouldParse` List("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) + arglist `shouldParse` List("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) + arglist `shouldParse` List("(", Id("args"), ",", WS, Id("kwargs"), ")") + arglist `shouldParse` List("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") - arglist `shouldParse` List[Lexeme]( - "(", - Id("args"), - ",", - WS, - Id("kwargs"), - ")" - ) - arglist `shouldParse` List[Lexeme]( - "(", - "*", - Id("args"), - ",", - WS, - Id("kwargs"), - ")" - ) - arglist `shouldParse` List[Lexeme]( - "(", - "*", - Id("args"), - ",", - WS, - "*", - Id("kwargs"), - ")" - ) - test `shouldParse` List[Lexeme]( - Id("f"), - "(", - Id("args"), - ",", - WS, - Id("kwargs"), - ")" - ) - test `shouldParse` List[Lexeme]( - Id("f"), - "(", - "*", - Id("args"), - ",", - WS, - "**", - Id("kwargs"), - ")" - ) + arglist `shouldParse` + List("(", "*", Id("args"), ",", WS, "*", Id("kwargs"), ")") + + test `shouldParse` + List(Id("f"), "(", Id("args"), ",", WS, Id("kwargs"), ")") + + test `shouldParse` + List(Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") - test `shouldParse` List[Lexeme]( + test `shouldParse` List( Id("print"), "(", Str("entering function "), @@ -582,7 +441,7 @@ class PythonParserTests // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, // WS, WS, Id("print"), NL)).size `shouldBe` 1 - stmt `shouldNotParse` List[Lexeme]( + stmt `shouldNotParse` List( "def", WS, Id("__call__"), @@ -813,7 +672,7 @@ class PythonParserTests parse(preprocess(file_input), traceProg2).size `shouldBe` 1 // suite should `parse` this: - val dummyin = List[Lexeme]( + val dummyin = List( NL, WS, "def", @@ -851,8 +710,8 @@ class PythonParserTests // println((suite `parse` dummyin) mkString "\n\n") - stmt `shouldNotParse` List[Lexeme](WS, WS, WS, Id("i"), NL) - atom `shouldNotParse` List[Lexeme](WS, WS, WS, Id("i")) + stmt `shouldNotParse` List(WS, WS, WS, Id("i"), NL) + atom `shouldNotParse` List(WS, WS, WS, Id("i")) // This is the skeleton of the python parsers (and it is unambiguous) lazy val aStmt: NT[Any] = aSimpleStmt | "def" ~> aBlock @@ -938,62 +797,15 @@ class PythonParserTests EOS ) - aInput `shouldNotParse` List[Lexeme]( - "def", - NL, - WS, - WS, - a, - NL, - WS, - a, - NL, - EOS - ) - - aInput `shouldParse` List[Lexeme]( - "def", - NL, - WS, - WS, - a, - NL, - NL, - WS, - WS, - a, - NL, - EOS - ) - - aInput `shouldNotParse` List[Lexeme]( - "def", - NL, - WS, - WS, - a, - NL, - NL, - WS, - a, - NL, - EOS - ) + aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, WS, a, NL, EOS) + aInput `shouldParse` List("def", NL, WS, WS, a, NL, NL, WS, WS, a, NL, EOS) + aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, NL, WS, a, NL, EOS) indentBy(WS ~ WS)(collect) `shouldParseWith` (List[Lexeme](WS, WS, a, NL), List[Lexeme](a, NL)) - indentBy(WS ~ WS)(collect) `shouldParseWith` (List[Lexeme]( - WS, - WS, - NL, - NL, - WS, - WS, - a, - NL - ), - List[Lexeme](NL, NL, a, NL)) + indentBy(WS ~ WS)(collect) `shouldParseWith` + (List(WS, WS, NL, NL, WS, WS, a, NL), List(NL, NL, a, NL)) parse(aInput, dummyin2).size `shouldBe` 1 } From e68d081bfdd45fcfd8b5a7ec98917b227048613f Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 01:03:10 +0100 Subject: [PATCH 84/95] Remove lambdas if possible --- .../src/main/scala/examples/PythonParsers.scala | 2 +- .../main/scala/examples/paper/Section4.scala | 11 +++++------ .../src/main/scala/library/DerivedOps.scala | 17 +++++++---------- .../src/test/scala/DerivativeParsersTests.scala | 13 +++++-------- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 9b7f86f..3978968 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -151,7 +151,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { // backslash that is not part of a string literal or comment, it is joined // with the following forming a single logical line, deleting the backslash // and the following end-of-line character. - def explicitJoin[T]: Parser[T] => Parser[T] = p => { + def explicitJoin[T](p: Parser[T]): Parser[T] = { lazy val join: NT[T] = done(p) | switch( _ == Punct("\\"), diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index 873ea31..855549a 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -117,12 +117,11 @@ trait Section4 { self: Section3 & RichParsers => // arbitrary positions. // // We will use this combinator in the following example - def spaced[T]: Parser[T] => Parser[T] = p => - done(p) | eat { - case ' ' => spaced(p) - case '\n' => spaced(p) - case c => spaced(p << c) - } + def spaced[T](p: Parser[T]): Parser[T] = done(p) | eat { + case ' ' => spaced(p) + case '\n' => spaced(p) + case c => spaced(p << c) + } // ### Example Figure 6c. Modular definition of a parser combinator for // ASCII-tables. diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index ebc9150..dbf8256 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -107,20 +107,18 @@ trait DerivedOps { self: Parsers & Syntax => // described by the function `f`. def repeat[T](f: Parser[T] => Parser[Parser[T]]): Parser[T] => Parser[T] = { val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => + def rec(p: Parser[T]): Parser[T] = cache.getOrElseUpdate( - p, { - done(p) | nonterminal(f(p) >> rec) - } + p, + { done(p) | nonterminal(f(p) >> rec) } ) rec } // repeat is just an instance of repeatAll - def repeatAll[T]( - f: List[Parser[T]] => Parser[List[Parser[T]]] - ): List[Parser[T]] => Parser[List[T]] = ps => - collect(ps) | f(ps) >> repeatAll(f) + def repeatAll[T](f: List[Parser[T]] => Parser[List[Parser[T]]])( + ps: List[Parser[T]] + ): Parser[List[T]] = collect(ps) | f(ps) >> repeatAll(f) private def mkList[T] = (_: ~[T, List[T]]) match { case (x, xs) => x :: xs } @@ -168,10 +166,9 @@ trait DerivedOps { self: Parsers & Syntax => // parser combinator here. val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => + def rec(p: Parser[T]): Parser[T] = cache.getOrElseUpdate( p, { - lazy val dp = delegate(p) nonterminal( done(p) | biasedAlt(region &> f(dp) >> rec, (any &> dp) >> rec) diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index 1ecc2dc..93b7629 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -448,12 +448,11 @@ class DerivativeParsersTests parse(greedySome(some('a')), "") `shouldBe` List() parse(greedyMany(some('a')), "") `shouldBe` List(List()) parse(greedySome(some('a')), "a") `shouldBe` List(List(List('a'))) - parse(greedySome(some('a')), "aaa") `shouldBe` List( - List(List('a', 'a', 'a')) - ) + parse(greedySome(some('a')), "aaa") `shouldBe` + List(List(List('a', 'a', 'a'))) } - it("should also return longest match if other parser succed first") { + it("should also return longest match if other parser succeeded first") { lazy val p = some("ab") | some("a") | some("b") parse(greedySome(p), "ab") `shouldBe` List(List(List("ab"))) parse(greedySome(p), "abab") `shouldBe` List(List(List("ab", "ab"))) @@ -538,15 +537,13 @@ class DerivativeParsersTests // parser combinator here. val cache = mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => + def rec(p: Parser[T]): Parser[T] = cache.getOrElseUpdate( p, { - lazy val dp = delegate(p) nonterminal( done(p) | biasedAlt( - (skip &> dp - | region &> f(dp)) >> rec, + (skip &> dp | region &> f(dp)) >> rec, (any &> dp) >> rec ) ) From 4940b6afb464d03069378d3b39292b758ee34e71 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 01:09:52 +0100 Subject: [PATCH 85/95] Use existing mkList to simplify --- .../src/main/scala/library/DerivedOps.scala | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index dbf8256..9c4329d 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -17,17 +17,13 @@ trait DerivedOps { self: Parsers & Syntax => def some[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => - p :: ps - } + lazy val some_v = seq(p, many_v) ^^ mkList some_v } def many[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => - p :: ps - } + lazy val some_v = seq(p, many_v) ^^ mkList many_v } @@ -49,12 +45,12 @@ trait DerivedOps { self: Parsers & Syntax => def manyN[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else p ~ manyN(n - 1, p) ^^ { case (r, rs) => r :: rs } + else p ~ manyN(n - 1, p) ^^ mkList } def atMost[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else (p ~ atMost(n - 1, p) ^^ { case (r, rs) => r :: rs }) | succeed(Nil) + else (p ~ atMost(n - 1, p) ^^ mkList) | succeed(Nil) } def manySep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { @@ -64,9 +60,7 @@ trait DerivedOps { self: Parsers & Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) ^^ { case (p, ps) => - p :: ps - } + lazy val some_v = seq(p, many_v) ^^ mkList some_v } @@ -76,9 +70,7 @@ trait DerivedOps { self: Parsers & Syntax => // distributive law - chains a list of parsers // --> in Haskell one would use `traverse` def distr[T](ps: List[Parser[T]]): Parser[List[T]] = - ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - (p ~ l) ^^ { case (a, b) => a :: b } - } + ps.foldRight(succeed[List[T]](Nil)) { (p, l) => (p ~ l) ^^ mkList } def join[T](p: Parser[Parser[T]]): Parser[T] = p >> done @@ -120,7 +112,7 @@ trait DerivedOps { self: Parsers & Syntax => ps: List[Parser[T]] ): Parser[List[T]] = collect(ps) | f(ps) >> repeatAll(f) - private def mkList[T] = (_: ~[T, List[T]]) match { case (x, xs) => x :: xs } + private def mkList[T](xs: (T, List[T])) = xs._1 :: xs._2 lazy val succeedForever: NT[Unit] = succeed(()) | (any ~> succeedForever) @@ -185,7 +177,7 @@ trait DerivedOps { self: Parsers & Syntax => def greedySome[T]: Parser[T] => NT[List[T]] = { p => def withNext(p: Parser[T], ps: Parser[List[T]]) = - done(p) ~ ps ^^ { case (t, ts) => t :: ts } + done(p) ~ ps ^^ mkList def forceRead(curr: Parser[T]): Parser[List[T]] = withNext(curr, succeed(Nil)) | eat { el => From 2c926515b54f5dad87ec4adfbe57b468aa84faa6 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 01:28:28 +0100 Subject: [PATCH 86/95] Fix type for tests." --- artifact/src/test/scala/PythonParserTests.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index d46f060..6677df5 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -93,7 +93,8 @@ class PythonParserTests val inputWithoutExplicit = List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, NL, a, "]", ")", a) - val inputResult = List(a, NL, a, a, a, "(", a, "[", a, a, a, "]", ")", a) + val inputResult = + List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, a, "]", ")", a) val collect = consumed(many(any)) From 0637b6ee882225d3b94485f09396a3e09ae3cfcf Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 01:37:01 +0100 Subject: [PATCH 87/95] Fix some more formatting in PythonParserTests --- .../src/test/scala/PythonParserTests.scala | 32 +++---------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index 6677df5..8d76ae0 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -123,17 +123,8 @@ class PythonParserTests EOS ) - preprocess(file_input) `shouldParse` List[Lexeme]( - a, - "=", - a, - ">>", - a, - "*", - a, - NL, - EOS - ) + preprocess(file_input) `shouldParse` + List(a, "=", a, ">>", a, "*", a, NL, EOS) val sampleProg = List[Lexeme]( "def", @@ -784,26 +775,13 @@ class PythonParserTests EOS ) - aInput `shouldParse` List[Lexeme]( - "def", - NL, - WS, - WS, - a, - NL, - WS, - WS, - a, - NL, - EOS - ) - + aInput `shouldParse` List("def", NL, WS, WS, a, NL, WS, WS, a, NL, EOS) aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, WS, a, NL, EOS) aInput `shouldParse` List("def", NL, WS, WS, a, NL, NL, WS, WS, a, NL, EOS) aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, NL, WS, a, NL, EOS) - indentBy(WS ~ WS)(collect) `shouldParseWith` (List[Lexeme](WS, WS, a, NL), - List[Lexeme](a, NL)) + indentBy(WS ~ WS)(collect) `shouldParseWith` + (List(WS, WS, a, NL), List(a, NL)) indentBy(WS ~ WS)(collect) `shouldParseWith` (List(WS, WS, NL, NL, WS, WS, a, NL), List(NL, NL, a, NL)) From 8cfd76a7d27db8b354cd5281289a91729c949da1 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 02:08:14 +0100 Subject: [PATCH 88/95] Exclude really bad cases from autoformatting. --- .../src/test/scala/PythonParserTests.scala | 638 +++--------------- 1 file changed, 76 insertions(+), 562 deletions(-) diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index 8d76ae0..fa02439 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -103,55 +103,26 @@ class PythonParserTests (inputWithoutComments, inputWithoutExplicit) implicitJoin(collect) `shouldParseWith` (inputWithoutExplicit, inputResult) + // format: off preprocess(file_input) `shouldParse` List[Lexeme]( - a, - ";", - a, - "=", - "yield", - "from", - a, - "=", - a, - ";", - NL, - NL, - a, - ";", - a, + a, ";", a, "=", "yield", "from", a, "=", a, ";", NL, NL, + a, ";", a, NL, EOS ) + // format: on preprocess(file_input) `shouldParse` List(a, "=", a, ">>", a, "*", a, NL, EOS) + // format: off val sampleProg = List[Lexeme]( - "def", - WS, - Id("fun"), - "(", - WS, - a, - WS, - ")", - ":", - NL, - WS, - WS, - a, - "+=", - WS, - a, - NL, - WS, - WS, - a, - "*=", - a, - NL, + "def", WS, Id("fun"), "(", WS, a, WS, ")", ":", NL, + WS, WS, a, "+=", WS, a, NL, + WS, WS, a, "*=", a, NL, EOS ) + // format: on parse(stripComments(collect), sampleProg) `shouldBe` List(sampleProg) parse(explicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) @@ -159,235 +130,39 @@ class PythonParserTests preprocess(file_input) `shouldParse` sampleProg + // format: off val sampleProg2 = List[Lexeme]( - "def", - WS, - Id("fun"), - "(", - NL, - WS, - a, - WS, - NL, - ")", - ":", - NL, - WS, - WS, - a, - "+=", - Comment("Test"), - BS, - NL, - WS, - a, - NL, - WS, - WS, - a, - "*=", - a, - NL, + "def", WS, Id("fun"), "(", NL, + WS, a, WS, NL, + ")", ":", NL, + WS, WS, a, "+=", Comment("Test"), BS, NL, + WS, a, NL, + WS, WS, a, "*=", a, NL, EOS ) + // format: on parse(preprocess(collect), sampleProg2) `shouldBe` List(sampleProg) preprocess(file_input) `shouldParse` sampleProg2 // https://en.wikibooks.org/wiki/Python_Programming/Decorators + // format: off val traceProg = List[Lexeme]( - Comment("define the Trace class that will be "), - NL, - Comment("invoked using decorators"), - NL, - "class", - WS, - Id("Trace"), - "(", - Id("object"), - ")", - ":", - NL, - WS, - WS, - WS, - WS, - "def", - WS, - Id("__init__"), - "(", - Id("self"), - ")", - ":", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("self"), - ".", - Id("f"), - WS, - "=", - WS, - Id("f"), - NL, - WS, - WS, - WS, - WS, - NL, - WS, - WS, - WS, - WS, - WS, - WS, - "def", - WS, - Id("__call__"), - "(", - Id("self"), - WS, - ",", - "*", - Id("args"), - ",", - WS, - "**", - Id("kwargs"), - ")", - ":", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("print"), - "(", - Str("entering function "), - WS, - "+", - WS, - Id("self"), - ".", - Id("f"), - ".", - Id("__name__"), - ")", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("i"), - "=", - Num("0"), - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - "for", - WS, - Id("arg"), - WS, - "in", - WS, - Id("args"), - ":", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("print"), - "(", - Str("arg {0}: {1}"), - ".", - Id("format"), - "(", - Id("i"), - ",", - Id("arg"), - ")", - ")", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("i"), - "=", - Id("i"), - "+", - Num("1"), - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - "return", - WS, - Id("self"), - ".", - Id("f"), - "(", - "*", - Id("args"), - ",", - WS, - "**", - Id("kwargs"), - ")", - NL, + Comment("define the Trace class that will be "), NL, + Comment("invoked using decorators"), NL, + "class", WS, Id("Trace"), "(", Id("object"), ")", ":", NL, + WS, WS, WS, WS, "def", WS, Id("__init__"), "(", Id("self"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, + WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, EOS ) + // format: on argument `shouldParse` List("*", Id("kwargs")) argument `shouldParse` List("**", Id("kwargs")) @@ -433,272 +208,54 @@ class PythonParserTests // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, // WS, WS, Id("print"), NL)).size `shouldBe` 1 + // format: off stmt `shouldNotParse` List( - "def", - WS, - Id("__call__"), - "(", - Id("self"), - WS, - ",", - "*", - Id("args"), - ",", - WS, - "**", - Id("kwargs"), - ")", - ":", - NL, - WS, - WS, - "for", - WS, - Id("arg"), - WS, - "in", - WS, - Id("args"), - ":", - NL, - WS, - WS, - WS, - WS, - Id("print"), - NL, - // this line is indented too far - WS, - WS, - WS, - WS, - WS, - WS, - Id("print"), - NL + "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, + WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + WS, WS, WS, WS, Id("print"), NL, // this line is indented too far + WS, WS, WS, WS, WS, WS, Id("print"), NL ) + // format: on // with empty lines + // format: off val traceProg2 = List[Lexeme]( - Comment("define the Trace class that will be "), + Comment("define the Trace class that will be "), NL, + Comment("invoked using decorators"), NL, + "class", WS, Id("Trace"), "(", Id("object"), ")", ":", NL, WS, + WS, WS, WS, "def", WS, Id("__init__"), "(", Id("self"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, NL, - Comment("invoked using decorators"), + WS, WS, WS, WS, "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), + ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, + WS, WS, NL, NL, - "class", - WS, - Id("Trace"), - "(", - Id("object"), - ")", - ":", NL, - WS, - WS, - WS, - WS, - "def", - WS, - Id("__init__"), - "(", - Id("self"), - ")", - ":", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("self"), - ".", - Id("f"), - WS, - "=", - WS, - Id("f"), - NL, - NL, - WS, - WS, - WS, - WS, - "def", - WS, - Id("__call__"), - "(", - Id("self"), - WS, - ",", - "*", - Id("args"), - ",", - WS, - "**", - Id("kwargs"), - ")", - ":", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("print"), - "(", - Str("entering function "), - WS, - "+", - WS, - Id("self"), - ".", - Id("f"), - ".", - Id("__name__"), - ")", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("i"), - "=", - Num("0"), - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - "for", - WS, - Id("arg"), - WS, - "in", - WS, - Id("args"), - ":", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("print"), - "(", - Str("arg {0}: {1}"), - ".", - Id("format"), - "(", - Id("i"), - ",", - Id("arg"), - ")", - ")", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - Id("i"), - "=", - Id("i"), - "+", - Num("1"), - NL, - WS, - WS, - NL, - NL, - NL, - NL, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - WS, - "return", - WS, - Id("self"), - ".", - Id("f"), - "(", - "*", - Id("args"), - ",", - WS, - "**", - Id("kwargs"), - ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, EOS ) + // format: off preprocess(file_input) `shouldParse` traceProg2 parse(preprocess(file_input), traceProg2).size `shouldBe` 1 // suite should `parse` this: + // format: off val dummyin = List( NL, - WS, - "def", - WS, - Id("f"), - "(", - ")", - ":", - NL, - WS, - WS, - "def", - WS, - Id("f"), - "(", - ")", - ":", - NL, - WS, - WS, - WS, - Id("print"), - NL, - WS, - WS, - WS, - Id("print"), - NL, - WS, - WS, - WS, - Id("i"), - NL + WS, "def", WS, Id("f"), "(", ")", ":", NL, + WS, WS, "def", WS, Id("f"), "(", ")", ":", NL, + WS, WS, WS, Id("print"), NL, + WS, WS, WS, Id("print"), NL, + WS, WS, WS, Id("i"), NL ) + // format: on // println((suite `parse` dummyin) mkString "\n\n") @@ -712,68 +269,25 @@ class PythonParserTests aSimpleStmt | NL ~> indented(some(many(emptyLine) ~> aStmt)) lazy val aInput: NT[Any] = NL.* ~> many(aStmt <~ NL.*) <~ EOS + // format: off val dummyin2 = List[Lexeme]( - "def", - NL, - WS, - a, - NL, - WS, - a, - NL, - WS, - "def", - NL, - WS, - WS, - a, - NL, - WS, - WS, - a, - NL, - WS, - WS, - a, - NL, - NL, - "def", - NL, - WS, - a, - NL, - WS, - a, - NL, - WS, - "def", - NL, - WS, - WS, - WS, - WS, - WS, - WS, - a, - NL, - WS, - WS, - WS, - WS, - WS, - WS, - a, - NL, - WS, - WS, - WS, - WS, - WS, - WS, - a, - NL, + "def", NL, + WS, a, NL, + WS, a, NL, + WS, "def", NL, + WS, WS, a, NL, + WS, WS, a, NL, + WS, WS, a, NL, + NL, "def", NL, + WS, a, NL, + WS, a, NL, + WS, "def", NL, + WS, WS, WS, WS, WS, WS, a, NL, + WS, WS, WS, WS, WS, WS, a, NL, + WS, WS, WS, WS, WS, WS, a, NL, EOS ) + // format: on aInput `shouldParse` List("def", NL, WS, WS, a, NL, WS, WS, a, NL, EOS) aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, WS, a, NL, EOS) From 835d48196238e4b0715540b297b24265167dfcee Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 4 Dec 2025 02:33:20 +0100 Subject: [PATCH 89/95] Update formatting. Enable unsafe binPack for call site Remove exclusion for simpler formatting cases. --- .scalafmt.conf | 5 +- .../src/test/scala/PythonParserTests.scala | 97 +++---------------- 2 files changed, 17 insertions(+), 85 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index 0577f2a..54f7147 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -2,4 +2,7 @@ version = "3.7.15" runner.dialect = scala3 assumeStandardLibraryStripMargin = true -align.stripMargin = true \ No newline at end of file +align.stripMargin = true + +binPack.unsafeCallSite = true +binPack.literalArgumentLists = true \ No newline at end of file diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index fa02439..77b0070 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -45,50 +45,11 @@ class PythonParserTests explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a) explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a, BS, NL, a, a) - val input = List[Lexeme]( - a, - NL, - Comment("Hey!!"), - a, - BS, - NL, - a, - a, - "(", - a, - "[", - a, - BS, - NL, - a, - NL, - a, - "]", - ")", - a - ) + val input = List[Lexeme](a, NL, Comment("Hey!!"), a, BS, NL, a, a, "(", a, + "[", a, BS, NL, a, NL, a, "]", ")", a) - val inputWithoutComments = List[Lexeme]( - a, - NL, - a, - BS, - NL, - a, - a, - "(", - a, - "[", - a, - BS, - NL, - a, - NL, - a, - "]", - ")", - a - ) + val inputWithoutComments = List[Lexeme](a, NL, a, BS, NL, a, a, "(", a, "[", + a, BS, NL, a, NL, a, "]", ")", a) val inputWithoutExplicit = List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, NL, a, "]", ")", a) @@ -103,26 +64,14 @@ class PythonParserTests (inputWithoutComments, inputWithoutExplicit) implicitJoin(collect) `shouldParseWith` (inputWithoutExplicit, inputResult) - // format: off - preprocess(file_input) `shouldParse` List[Lexeme]( - a, ";", a, "=", "yield", "from", a, "=", a, ";", NL, - NL, - a, ";", a, NL, - EOS - ) - // format: on + preprocess(file_input) `shouldParse` List(a, ";", a, "=", "yield", "from", + a, "=", a, ";", NL, NL, a, ";", a, NL, EOS) preprocess(file_input) `shouldParse` List(a, "=", a, ">>", a, "*", a, NL, EOS) - // format: off - val sampleProg = List[Lexeme]( - "def", WS, Id("fun"), "(", WS, a, WS, ")", ":", NL, - WS, WS, a, "+=", WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS - ) - // format: on + val sampleProg = List[Lexeme]("def", WS, Id("fun"), "(", WS, a, WS, ")", + ":", NL, WS, WS, a, "+=", WS, a, NL, WS, WS, a, "*=", a, NL, EOS) parse(stripComments(collect), sampleProg) `shouldBe` List(sampleProg) parse(explicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) @@ -130,17 +79,9 @@ class PythonParserTests preprocess(file_input) `shouldParse` sampleProg - // format: off - val sampleProg2 = List[Lexeme]( - "def", WS, Id("fun"), "(", NL, - WS, a, WS, NL, - ")", ":", NL, - WS, WS, a, "+=", Comment("Test"), BS, NL, - WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS - ) - // format: on + val sampleProg2 = List[Lexeme]("def", WS, Id("fun"), "(", NL, WS, a, WS, NL, + ")", ":", NL, WS, WS, a, "+=", Comment("Test"), BS, NL, WS, a, NL, WS, WS, + a, "*=", a, NL, EOS) parse(preprocess(collect), sampleProg2) `shouldBe` List(sampleProg) preprocess(file_input) `shouldParse` sampleProg2 @@ -184,20 +125,8 @@ class PythonParserTests test `shouldParse` List(Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") - test `shouldParse` List( - Id("print"), - "(", - Str("entering function "), - WS, - "+", - WS, - Id("self"), - ".", - Id("f"), - ".", - Id("__name__"), - ")" - ) + test `shouldParse` List(Id("print"), "(", Str("entering function "), WS, + "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") // TODO is already ambiguous // (stmt `parse` List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size `shouldBe` 1 From 8fa30623adadd974201c43b48419a3296494a5dd Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Thu, 15 Jan 2026 21:50:19 +0100 Subject: [PATCH 90/95] Remove unnecessary override --- artifact/src/main/scala/library/DerivativeParsers.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 23696da..6916fde 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -346,6 +346,4 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def prefix: Parser[Any] => Parser[Unit] = _.prefix } -object DerivativeParsers extends RichParsers with DerivativeParsers { - override type Elem = Char -} +object DerivativeParsers extends RichParsers with DerivativeParsers From 532e8b86f384ce8d641e5b8f4eefd961f5c908d4 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 19 Jan 2026 13:29:23 +0100 Subject: [PATCH 91/95] Use infix function instead backticks. --- .../src/test/scala/BasicCombinatorsTest.scala | 60 ++-- artifact/src/test/scala/CustomMatchers.scala | 12 +- .../test/scala/DerivativeParsersTests.scala | 283 +++++++++--------- artifact/src/test/scala/LeftrecTests.scala | 250 ++++++++-------- artifact/src/test/scala/NegationTests.scala | 58 ++-- .../src/test/scala/PythonParserTests.scala | 116 +++---- 6 files changed, 389 insertions(+), 390 deletions(-) diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index 416813a..3ef7466 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -12,61 +12,61 @@ trait BasicCombinatorTests { describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' - p `shouldParse` "abc" - p `shouldNotParse` "abcd" + p shouldParse "abc" + p shouldNotParse "abcd" } describe("parser \"ab | ac\"") { val p = ('a' ~ 'b') | ('a' ~ 'c') - p `shouldParse` "ab" - p `shouldParse` "ac" - p `shouldNotParse` "bc" - p `shouldNotParse` "a" - p `shouldNotParse` "abc" + p shouldParse "ab" + p shouldParse "ac" + p shouldNotParse "bc" + p shouldNotParse "a" + p shouldNotParse "abc" } describe("parser \"baaa | ba\"") { val p = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' - p `shouldParse` "baaa" - p `shouldParse` "ba" - ((p ~ 'c' ~ 'o') | (p ~ 'c')) `shouldParse` "bac" - ((p ~ 'c' ~ 'o') | (p ~ 'c')) `shouldParse` "baco" + p shouldParse "baaa" + p shouldParse "ba" + ((p ~ 'c' ~ 'o') | (p ~ 'c')) shouldParse "bac" + ((p ~ 'c' ~ 'o') | (p ~ 'c')) shouldParse "baco" } describe("parser \"(baaa | ba) aa\"") { val p = ("baaa" | "ba") ~ "aa" - p `shouldParse` "baaaaa" - p `shouldParse` "baaa" + p shouldParse "baaaaa" + p shouldParse "baaa" } describe("parser \"succeed(a) b\"") { val p = succ('a') ~ 'b' - p `shouldParse` "b" - p `shouldNotParse` "" + p shouldParse "b" + p shouldNotParse "" } describe("parser \"succeed(a) succeed(b)\"") { val p = succ('a') ~ succ('b') - p `shouldParse` "" + p shouldParse "" } describe("parser \"succeed(a) | succeed(b)\"") { val p = succ('a') | succ('b') - p `shouldParse` "" + p shouldParse "" } describe("parser \"(a a a | a a)+") { val p = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' - describe("some(_)") { some(p) `shouldParse` "aaaa" } - describe("_ ~ 'b'") { (p ~ 'b') `shouldParse` "aaab" } + describe("some(_)") { some(p) shouldParse "aaaa" } + describe("_ ~ 'b'") { (p ~ 'b') shouldParse "aaab" } describe("some(_) ~ 'b'") { - (some(p) ~ 'b') `shouldParse` "aab" - (some(p) ~ 'b') `shouldParse` "aaab" - (some(p) ~ 'b') `shouldParse` "aaaaab" + (some(p) ~ 'b') shouldParse "aab" + (some(p) ~ 'b') shouldParse "aaab" + (some(p) ~ 'b') shouldParse "aaaaab" } describe("some(_ ~ 'a') ~ 'b'") { - (some(p ~ 'a') ~ 'b') `shouldParse` "aaaab" - (some(p ~ 'a') ~ 'b') `shouldParse` "aaab" + (some(p ~ 'a') ~ 'b') shouldParse "aaaab" + (some(p ~ 'a') ~ 'b') shouldParse "aaab" } } @@ -75,12 +75,12 @@ trait BasicCombinatorTests { val largeInput = List.fill(100)('a').mkString - p `shouldParse` "a" - p `shouldParse` "aaaaaa" - p `shouldParse` largeInput - p `shouldNotParse` "" - p `shouldNotParse` "b" + largeInput - p `shouldNotParse` largeInput + "b" + p shouldParse "a" + p shouldParse "aaaaaa" + p shouldParse largeInput + p shouldNotParse "" + p shouldNotParse "b" + largeInput + p shouldNotParse largeInput + "b" } } diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index 8deb129..73d775d 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -12,18 +12,18 @@ trait CustomMatchers[+P <: Parsers](val parsers: P) extends Matchers { import parsers.{Elem, Parser, accepts, isSuccess, parse} extension [T](p: => Parser[T]) { - def shouldParse(s: Iterable[Elem], tags: Tag*) = + infix def shouldParse(s: Iterable[Elem], tags: Tag*) = it(s"""should parse "$s" """, tags*) { - accepts(p, s) `shouldBe` true + accepts(p, s) shouldBe true } - def shouldNotParse(s: Iterable[Elem], tags: Tag*) = + infix def shouldNotParse(s: Iterable[Elem], tags: Tag*) = it(s"""should not parse "$s" """, tags*) { - accepts(p, s) `shouldBe` false + accepts(p, s) shouldBe false } // for unambiguous parses - def shouldParseWith(s: Iterable[Elem], result: T) = + infix def shouldParseWith(s: Iterable[Elem], result: T) = it(s"""should parse "$s" with correct result""") { - parse(p, s) `shouldBe` List(result) + parse(p, s) shouldBe List(result) } } diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index 93b7629..41ccd5b 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -34,15 +34,15 @@ class DerivativeParsersTests describe("Examples in section 3") { import section_3_2._ - number `shouldParse` "42" + number shouldParse "42" } describe("Indentation with feed") { import section_3_4_improved._ val xs = many(some('x') ~ '\n') - indented(xs) `shouldParse` " xxx\n xxxx\n" - indented(xs) `shouldParse` " xxxxxxxxxx\n xxxxxxxxxx\n" + indented(xs) shouldParse " xxx\n xxxx\n" + indented(xs) shouldParse " xxxxxxxxxx\n xxxxxxxxxx\n" lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block @@ -50,16 +50,16 @@ class DerivativeParsersTests lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) - stmt `shouldParse` "while (true):\n xxxxx\n xxxxx\n" - stmt `shouldParse` "while (true):\n while (true):\n xxxxx\n xxxx\n" + stmt shouldParse "while (true):\n xxxxx\n xxxxx\n" + stmt shouldParse "while (true):\n while (true):\n xxxxx\n xxxx\n" } describe("Indentation with delegation") { import section_3_5_improved._ val xs = many(some('x') ~ '\n') - indented(xs) `shouldParse` " xxx\n xxxx\n" - indented(xs) `shouldParse` " xxxxxxxxxx\n xxxxxxxxxx\n" + indented(xs) shouldParse " xxx\n xxxx\n" + indented(xs) shouldParse " xxxxxxxxxx\n xxxxxxxxxx\n" lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block @@ -67,8 +67,8 @@ class DerivativeParsersTests lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) - stmt `shouldParse` "while (true):\n xxxxx\n xxxxx\n" - stmt `shouldParse` "while (true):\n while (true):\n xxxxx\n xxxx\n" + stmt shouldParse "while (true):\n xxxxx\n xxxxx\n" + stmt shouldParse "while (true):\n while (true):\n xxxxx\n xxxx\n" } describe("Simplified tables for paper") { @@ -76,9 +76,9 @@ class DerivativeParsersTests lazy val xs = many(some('x') ~ '\n') - table(xs) `shouldParse` "+---+\n|xxx|\n+---+\n" + table(xs) shouldParse "+---+\n|xxx|\n+---+\n" - table(xs) `shouldParse` + table(xs) shouldParse """+---+--------+------------+ ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| @@ -120,13 +120,13 @@ class DerivativeParsersTests lazy val xs = many(some('x') ~ '\n') - table(xs) `shouldParse` + table(xs) shouldParse """+---+ ^|xxx| ^+---+ ^""".stripMargin('^') - table(xs) `shouldParse` + table(xs) shouldParse """+---+--------+------------+ ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| @@ -137,7 +137,7 @@ class DerivativeParsersTests ^+---+--------+------------+ ^""".stripMargin('^') - table(xs) `shouldNotParse` + table(xs) shouldNotParse """+---+--------+------------+ ^|xxx|xxxxxxxx|xxxxxxxxxxxx| ^|xxx|xxxxxxxx|xxxxxxxxxxxx| @@ -150,7 +150,7 @@ class DerivativeParsersTests lazy val nestedTables: NT[Any] = table(xs | nestedTables) - nestedTables `shouldParse` + nestedTables shouldParse """+---+--------+------------+ ^|xxx|+-+----+|xxxxxxxxxxxx| ^|xxx||x|xxxx||xxxxxxxxxxxx| @@ -161,7 +161,7 @@ class DerivativeParsersTests ^+---+--------+------------+ ^""".stripMargin('^') - nestedTables `shouldNotParse` + nestedTables shouldNotParse """+---+--------+------------+ ^|xxx|+-+----+|xxxxxxxxxxxx| ^|xxx||x|oxxx||xxxxxxxxxxxx| @@ -182,7 +182,7 @@ class DerivativeParsersTests if (n < 5) succ(n + 1) else err } - fm.results.toSet `shouldBe` Set(1, 2, 3, 4, 5) + fm.results.toSet shouldBe Set(1, 2, 3, 4, 5) } describe("Stream preprocessing") { @@ -194,17 +194,17 @@ class DerivativeParsersTests def bin(p: Parser[Any]): NT[Any] = done(p) | (('a' ~> bin(p << '1')) | ('b' ~> bin(p << '0'))) - ones `shouldParse` "1111" + ones shouldParse "1111" bin(ones).accepts - bin(ones) `shouldParse` "aaaaa" - bin(ones) `shouldNotParse` "aaaaab" - bin(zeros) `shouldParse` "bbbbb" - bin(zeros) `shouldNotParse` "bbbbba" - bin(oneszeros) `shouldParse` "aabb" - bin(oneszeros) `shouldNotParse` "aabbb" - - bin(ones) `shouldNotParse` ("b" `repeat` 50) + bin(ones) shouldParse "aaaaa" + bin(ones) shouldNotParse "aaaaab" + bin(zeros) shouldParse "bbbbb" + bin(zeros) shouldNotParse "bbbbba" + bin(oneszeros) shouldParse "aabb" + bin(oneszeros) shouldNotParse "aabbb" + + bin(ones) shouldNotParse "b".repeat(50) } describe("Results of ambiguous parses") { @@ -246,12 +246,12 @@ class DerivativeParsersTests def IMAP[T](body: Parser[T]): Parser[T] = header >> feedNTimes(body) - IMAP(many('a')) `shouldParse` "{ 1 }a" - IMAP(many('a')) `shouldNotParse` "{ 1 }" - IMAP(many('a')) `shouldNotParse` "{ 1 }aa" - IMAP(many('a')) `shouldParse` "{ 7 }aaaaaaa" - IMAP(many('a')) `shouldNotParse` "{ 7 }aaaaaaaa" - IMAP(many('a')) `shouldNotParse` "{ 7 }" + IMAP(many('a')) shouldParse "{ 1 }a" + IMAP(many('a')) shouldNotParse "{ 1 }" + IMAP(many('a')) shouldNotParse "{ 1 }aa" + IMAP(many('a')) shouldParse "{ 7 }aaaaaaa" + IMAP(many('a')) shouldNotParse "{ 7 }aaaaaaaa" + IMAP(many('a')) shouldNotParse "{ 7 }" } // Usecase. interleaving parsers @@ -264,11 +264,11 @@ class DerivativeParsersTests val p = 'a' ~ 'a' ~ 'a' val q = 'b' ~ 'b' ~ 'b' - interleave(p, q) `shouldParse` "ababab" - interleave(p, q) `shouldNotParse` "abababab" - interleave(p, q) `shouldNotParse` "abab" - interleave(p, q) `shouldNotParse` "ab" - interleave(p, q) `shouldNotParse` "" + interleave(p, q) shouldParse "ababab" + interleave(p, q) shouldNotParse "abababab" + interleave(p, q) shouldNotParse "abab" + interleave(p, q) shouldNotParse "ab" + interleave(p, q) shouldNotParse "" } // Usecase. Indentation that also skips empty lines @@ -292,11 +292,11 @@ class DerivativeParsersTests val xs = many(some('x') ~ '\n') - indent(xs) `shouldParse` "" - indent(xs) `shouldParse` " xx\n" - indent(xs) `shouldParse` " xxxxx\n" - indent(xs) `shouldParse` " xxxxx\n xxxxxxx\n" - indent(xs) `shouldParse` + indent(xs) shouldParse "" + indent(xs) shouldParse " xx\n" + indent(xs) shouldParse " xxxxx\n" + indent(xs) shouldParse " xxxxx\n xxxxxxx\n" + indent(xs) shouldParse """ xxxxx | xxxxxxx | xxxxxxxx @@ -316,46 +316,46 @@ class DerivativeParsersTests | xxxxxxxxxxxxxx |""".stripMargin('|') - indent(indent(xs)) `shouldParse` " xx\n" - indent(indent(xs)) `shouldParse` " xxxxx\n" - indent(indent(xs)) `shouldParse` " xxxxx\n xxxxxxx\n" + indent(indent(xs)) shouldParse " xx\n" + indent(indent(xs)) shouldParse " xxxxx\n" + indent(indent(xs)) shouldParse " xxxxx\n xxxxxxx\n" - indent(indent(xs)) `shouldParse` " xxxxx\n\n xxxxxxx\n" - indent(indent(xs)) `shouldParse` " xxxxx\n \n xxxxxxx\n" - indent(indent(xs)) `shouldParse` " xxxxx\n \n\n \n xxxxxxx\n" - indent(indent(xs)) `shouldNotParse` " xxxxx\n \n\n \n xxxxxxx\n" - indent(indent(xs)) `shouldNotParse` " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) shouldParse " xxxxx\n\n xxxxxxx\n" + indent(indent(xs)) shouldParse " xxxxx\n \n xxxxxxx\n" + indent(indent(xs)) shouldParse " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) shouldNotParse " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) shouldNotParse " xxxxx\n \n\n \n xxxxxxx\n" } describe("Parens parser") { import section_4_2.parens - parens `shouldParse` "" - parens `shouldParse` "()" - parens `shouldParse` "(())" - parens `shouldNotParse` "(()" + parens shouldParse "" + parens shouldParse "()" + parens shouldParse "(())" + parens shouldNotParse "(()" } describe("Retroactively, allow spaces in arbitrary positions") { import section_4_2.{spaced, parens} val sp = spaced(parens) - sp `shouldParse` "((()))" - sp `shouldParse` "((( )))" - sp `shouldParse` "( (( )))" - sp `shouldParse` "( (( ))) " - sp `shouldParse` "( (\n (\n )) ) " - sp `shouldNotParse` "( ( ( )) " + sp shouldParse "((()))" + sp shouldParse "((( )))" + sp shouldParse "( (( )))" + sp shouldParse "( (( ))) " + sp shouldParse "( (\n (\n )) ) " + sp shouldNotParse "( ( ( )) " } describe("Allowing parens in code blocks") { import section_4_2._ - as `shouldParse` "aaa\n" - as `shouldParse` "\n" - as `shouldParse` "aa\naa\n" + as shouldParse "aaa\n" + as shouldParse "\n" + as shouldParse "aa\naa\n" - both `shouldParse` "a\n" - both `shouldParse` + both shouldParse "a\n" + both shouldParse """aaa |~~~ |() @@ -363,9 +363,9 @@ class DerivativeParsersTests |aaaaa |""".stripMargin('|') - both `shouldParse` "a \n\n~~~ \n()\n~~~\naaa\n" + both shouldParse "a \n\n~~~ \n()\n~~~\naaa\n" - both `shouldNotParse` + both shouldNotParse """aaa |~~~ |( @@ -373,7 +373,7 @@ class DerivativeParsersTests |aaaaa |""".stripMargin('|') - both `shouldParse` + both shouldParse """aaa |~~~ |((()) @@ -390,21 +390,21 @@ class DerivativeParsersTests import section_4_2._ - unescape(many('\n')) `shouldParse` """\n\n\n""" - unescape(many("\n" | "a")) `shouldParse` """\na\n\n""" - unescape(many("\n" | "a")) `shouldParse` """\na\n\naaa""" + unescape(many('\n')) shouldParse """\n\n\n""" + unescape(many("\n" | "a")) shouldParse """\na\n\n""" + unescape(many("\n" | "a")) shouldParse """\na\n\naaa""" } describe("Combined examples") { import section_4_2._ - combined `shouldParse` + combined shouldParse """aaa ^""".stripMargin('^') - combined `shouldParse` "+----+\n|aaaa|\n+----+\n" - combined `shouldParse` "+----+\n|aa |\n+----+\n" + combined shouldParse "+----+\n|aaaa|\n+----+\n" + combined shouldParse "+----+\n|aa |\n+----+\n" - combined `shouldParse` + combined shouldParse """+----+ ^|aaaa| ^|~~~ | @@ -414,9 +414,9 @@ class DerivativeParsersTests ^+----+ ^""".stripMargin('^') - combined `shouldParse` "+----+\n|aa |\n|aaaa|\n+----+\n" + combined shouldParse "+----+\n|aa |\n|aaaa|\n+----+\n" - combined `shouldParse` + combined shouldParse """+----+ ^|aa | ^|~~~ | @@ -431,47 +431,47 @@ class DerivativeParsersTests describe("Biased choice") { val p = biasedAlt("foo", some(letter)) ~ "bar" - p `shouldParse` "foobar" - p `shouldNotParse` "foozbar" - p `shouldParse` "barbar" + p shouldParse "foobar" + p shouldNotParse "foozbar" + p shouldParse "barbar" // this test shows, that we can only implement a locally biased choice val q = biasedAlt("foo", "f") ~ "oo" // should actually *not* parse "foo", but does: - q `shouldParse` "foo" + q shouldParse "foo" } describe("Greedy repitition") { it("should return only the result of the longest match") { - parse(greedySome(some('a')), "") `shouldBe` List() - parse(greedyMany(some('a')), "") `shouldBe` List(List()) - parse(greedySome(some('a')), "a") `shouldBe` List(List(List('a'))) - parse(greedySome(some('a')), "aaa") `shouldBe` + parse(greedySome(some('a')), "") shouldBe List() + parse(greedyMany(some('a')), "") shouldBe List(List()) + parse(greedySome(some('a')), "a") shouldBe List(List(List('a'))) + parse(greedySome(some('a')), "aaa") shouldBe List(List(List('a', 'a', 'a'))) } it("should also return longest match if other parser succeeded first") { lazy val p = some("ab") | some("a") | some("b") - parse(greedySome(p), "ab") `shouldBe` List(List(List("ab"))) - parse(greedySome(p), "abab") `shouldBe` List(List(List("ab", "ab"))) - parse(greedySome(p), "abbab") `shouldBe` List( + parse(greedySome(p), "ab") shouldBe List(List(List("ab"))) + parse(greedySome(p), "abab") shouldBe List(List(List("ab", "ab"))) + parse(greedySome(p), "abbab") shouldBe List( List(List("ab"), List("b"), List("ab")) ) - parse(greedySome(p), "abbaab") `shouldBe` List( + parse(greedySome(p), "abbaab") shouldBe List( List(List("ab"), List("b"), List("a", "a"), List("b")) ) - parse(greedySome(p), "aaaab") `shouldBe` List( + parse(greedySome(p), "aaaab") shouldBe List( List(List("a", "a", "a", "a"), List("b")) ) lazy val q = "ab" | "a" | "b" - parse(greedySome(q), "ab") `shouldBe` List(List("ab")) - parse(greedySome(q), "abab") `shouldBe` List(List("ab", "ab")) - parse(greedySome(q), "abbab") `shouldBe` List(List("ab", "b", "ab")) - parse(greedySome(q), "abbaab") `shouldBe` List(List("ab", "b", "a", "ab")) - parse(greedySome(q), "aaaab") `shouldBe` List(List("a", "a", "a", "ab")) + parse(greedySome(q), "ab") shouldBe List(List("ab")) + parse(greedySome(q), "abab") shouldBe List(List("ab", "ab")) + parse(greedySome(q), "abbab") shouldBe List(List("ab", "b", "ab")) + parse(greedySome(q), "abbaab") shouldBe List(List("ab", "b", "a", "ab")) + parse(greedySome(q), "aaaab") shouldBe List(List("a", "a", "a", "ab")) } // This shows that our implementation is only locally greedy @@ -494,15 +494,15 @@ class DerivativeParsersTests val r: Parser[Any] = ("oo" | "b") val ex: Parser[Any] = biasedAlt(p, q) ~ r - // ex `shouldNotParse` "foo" //-> fails + // ex shouldNotParse "foo" //-> fails // If the right-hand-side `r` is locally known the parser can be // rewritten to: val rewrite = p ~ r | (neg(p ~ always) &> (q ~ r)) - rewrite `shouldNotParse` "foo" - rewrite `shouldParse` "foooo" - rewrite `shouldParse` "fb" + rewrite shouldNotParse "foo" + rewrite shouldParse "foooo" + rewrite shouldParse "fb" } // Since "lexing" is performed after indentation checking, but indentation @@ -565,10 +565,10 @@ class DerivativeParsersTests val multilineString: Parser[String] = consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") - singleString `shouldParse` "\"hello world\"" - singleString `shouldNotParse` "\"hello\nworld\"" - singleString `shouldParse` "\"hello'''world\"" - multilineString `shouldParse` "'''Hello \" \n\" world'''" + singleString shouldParse "\"hello world\"" + singleString shouldNotParse "\"hello\nworld\"" + singleString shouldParse "\"hello'''world\"" + multilineString shouldParse "'''Hello \" \n\" world'''" // for testing val collect = consumed(always) ^^ { x => x.mkString } @@ -584,12 +584,12 @@ class DerivativeParsersTests parse( p, "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n" - ) `should` be( + ) should be( List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n") ) } // here we can already observe performance problems (about 400ms): - p `shouldParse` + p shouldParse """hello '''foo |"bar''' test | foo " bar'''foo " @@ -623,21 +623,21 @@ class DerivativeParsersTests dyck ) - parens `shouldParse` "()" - parens `shouldParse` "(())" - parens `shouldParse` "(()()())" - parens `shouldParse` "(()[]())" - parens `shouldParse` "(()[()[]]())" - parens `shouldNotParse` "(()[()[]())" - parens `shouldNotParse` "a (()) a" - parens `shouldNotParse` "(()" - parens `shouldParse` "( hello world ())" - parens `shouldParse` "( [# foo \"()) \n ()]{\" [ \" hello } world ())" - parens `shouldNotParse` "( [# foo \"()) \n ()]{\" [ \" hello world ())" - parens `shouldNotParse` "( [# foo \"()) \n ()]\" [ \" hello } world ())" - parens `shouldNotParse` "( [# foo \"()) \n )]{\" [ \" hello } world ())" - parens `shouldParse` "( hello \" ) \"world ())" - parens `shouldNotParse` "( hello \" ) \"" + parens shouldParse "()" + parens shouldParse "(())" + parens shouldParse "(()()())" + parens shouldParse "(()[]())" + parens shouldParse "(()[()[]]())" + parens shouldNotParse "(()[()[]())" + parens shouldNotParse "a (()) a" + parens shouldNotParse "(()" + parens shouldParse "( hello world ())" + parens shouldParse "( [# foo \"()) \n ()]{\" [ \" hello } world ())" + parens shouldNotParse "( [# foo \"()) \n ()]{\" [ \" hello world ())" + parens shouldNotParse "( [# foo \"()) \n ()]\" [ \" hello } world ())" + parens shouldNotParse "( [# foo \"()) \n )]{\" [ \" hello } world ())" + parens shouldParse "( hello \" ) \"world ())" + parens shouldNotParse "( hello \" ) \"" lazy val escapedNL = '\\' ~ '\n' @@ -659,7 +659,7 @@ class DerivativeParsersTests collect ), " foo'''a \n a'''\n bar\n ( \n )\n" - ) `should` be( + ) should be( List("foo'''a \n a'''\nbar\n( \n )\n") ) parse( @@ -667,18 +667,17 @@ class DerivativeParsersTests collect ), " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n" - ) `should` be( + ) should be( List( "'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n" ) ) } joiningIndent( - collect - ) `shouldParse` " '''some \n multiline \n'''\n ( # comment (\n )\n" + collect) shouldParse " '''some \n multiline \n'''\n ( # comment (\n )\n" joiningIndent( collect - ) `shouldNotParse` " '''some \n multiline \n''\n ( # comment (\n )\n" + ) shouldNotParse " '''some \n multiline \n''\n ( # comment (\n )\n" val WS: Parser[Any] = ' ' val spacesNoNl = some(WS) @@ -699,17 +698,17 @@ class DerivativeParsersTests lazy val stmts: NT[Any] = someSep(stmt, spaces) lazy val suite: NT[Any] = lineEnd ~> joiningIndent(stmts) - stmt `shouldParse` "def foo():\n '''hello\n '''\n" - stmt `shouldNotParse` "def foo():\n \"'''hello\n '''\"\n" - stmt `shouldParse` "def foo():\n '''hello\n ''' # some comment \n" - stmt `shouldNotParse` "def foo():\n # '''hello\n ''' some comment \n" - stmt `shouldParse` "def foo():\n []\n" - stmt `shouldParse` "def foo():\n [foo, bar]\n" - stmt `shouldParse` "def foo():\n [foo, \nbar]\n" - stmt `shouldNotParse` "def foo():\n \"[foo, \nbar]\"\n" - stmt `shouldParse` "def foo():\n \"[foo, bar]\"\n" - stmt `shouldParse` "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" - stmt `shouldParse` "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" + stmt shouldParse "def foo():\n '''hello\n '''\n" + stmt shouldNotParse "def foo():\n \"'''hello\n '''\"\n" + stmt shouldParse "def foo():\n '''hello\n ''' # some comment \n" + stmt shouldNotParse "def foo():\n # '''hello\n ''' some comment \n" + stmt shouldParse "def foo():\n []\n" + stmt shouldParse "def foo():\n [foo, bar]\n" + stmt shouldParse "def foo():\n [foo, \nbar]\n" + stmt shouldNotParse "def foo():\n \"[foo, \nbar]\"\n" + stmt shouldParse "def foo():\n \"[foo, bar]\"\n" + stmt shouldParse "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" + stmt shouldParse "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" } describe( @@ -721,12 +720,12 @@ class DerivativeParsersTests val p_c = p <<< "c" it("should preserve the invariant when performing optimization rewrites") { - p_a.accepts `shouldBe` false - p_a.accepts `shouldBe` (!p_a.results.isEmpty) - p_b.accepts `shouldBe` false - p_b.accepts `shouldBe` (!p_b.results.isEmpty) - p_c.accepts `shouldBe` true - p_c.accepts `shouldBe` (!p_c.results.isEmpty) + p_a.accepts shouldBe false + p_a.accepts shouldBe (!p_a.results.isEmpty) + p_b.accepts shouldBe false + p_b.accepts shouldBe (!p_b.results.isEmpty) + p_c.accepts shouldBe true + p_c.accepts shouldBe (!p_c.results.isEmpty) } } } diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index b0f3f8a..1ba0ed3 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -13,30 +13,30 @@ trait LeftrecTests { describe("p = p | .") { lazy val p: NT[Any] = p | any - p `shouldParse` "a" + p shouldParse "a" } describe("p = p ~ . | .") { lazy val p: NT[Any] = p ~ any | any - p `shouldParse` "a" + p shouldParse "a" } describe("p = . | p ~ .") { lazy val p: NT[Any] = any | p ~ any - p `shouldParse` "a" + p shouldParse "a" } describe("p = (. | .) >> { (. | p) ^^ id }") { lazy val p: NT[Any] = (p | any) >> { _ => (any | p) ^^ identity } - p `shouldParse` "aa" - p `shouldParse` "aaaaa" + p shouldParse "aa" + p shouldParse "aaaaa" } describe("p = (. | p) >> { a }") { lazy val p: NT[Any] = (any | p) >> { _ => 'a' } - p `shouldParse` "aa" - p `shouldParse` "aaa" - p `shouldParse` "aaaaaa" + p shouldParse "aa" + p shouldParse "aaa" + p shouldParse "aaaaaa" } } @@ -44,12 +44,12 @@ trait LeftrecTests { describe("p = . ~ p") { lazy val p: NT[Any] = any ~ p - p `shouldNotParse` "a" + p shouldNotParse "a" } describe("p = p ~ .") { lazy val p: NT[Any] = p ~ any - p `shouldNotParse` "a" + p shouldNotParse "a" } } @@ -58,17 +58,17 @@ trait LeftrecTests { describe("A = A ~ a | empty") { lazy val A: NT[Any] = A ~ 'a' | succ(42) - A `shouldParse` "" - A `shouldParse` "a" - A `shouldParse` "aa" + A shouldParse "" + A shouldParse "a" + A shouldParse "aa" } describe("A = empty | A ~ a ") { lazy val A: NT[Any] = succ(42) | A ~ 'a' - A `shouldParse` "" - A `shouldParse` "a" - A `shouldParse` "aa" + A shouldParse "" + A shouldParse "a" + A shouldParse "aa" } // Simple example of indirect leftrecursion from @@ -78,15 +78,15 @@ trait LeftrecTests { lazy val A: NT[Any] = B ~ '-' ~ num | num lazy val B: NT[Any] = succ(()) ~ A - // A `shouldParse` "1" - // A `shouldParse` "12" - // A `shouldParse` "12-32" - // A `shouldParse` "12-32-45" + // A shouldParse "1" + // A shouldParse "12" + // A shouldParse "12-32" + // A shouldParse "12-32-45" - B `shouldParse` "1" - B `shouldParse` "12" - B `shouldParse` "12-32" - B `shouldParse` "12-32-45" + B shouldParse "1" + B shouldParse "12" + B shouldParse "12-32" + B shouldParse "12-32-45" } describe("two levels indirect leftrecursion") { @@ -95,16 +95,16 @@ trait LeftrecTests { lazy val B: NT[Any] = succ(()) ~ C ~ '+' ~ num lazy val C: NT[Any] = succ(()) ~ A - A `shouldParse` "1" - A `shouldParse` "12" - C `shouldParse` "2" - C `shouldParse` "22" - B `shouldParse` "12+32" - A `shouldParse` "12+32-42" - A `shouldParse` "12+12-32+45-44" - A `shouldNotParse` "" - A `shouldNotParse` "12+13+14" - A `shouldNotParse` "12+13+14-14-56" + A shouldParse "1" + A shouldParse "12" + C shouldParse "2" + C shouldParse "22" + B shouldParse "12+32" + A shouldParse "12+32-42" + A shouldParse "12+12-32+45-44" + A shouldNotParse "" + A shouldNotParse "12+13+14" + A shouldNotParse "12+13+14-14-56" } // From "Packrat parsers can support left-recursion" @@ -112,32 +112,32 @@ trait LeftrecTests { lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succ(()) lazy val ones: NT[Any] = ones ~ '1' | '1' - start `shouldParse` "" - start `shouldParse` "1" - start `shouldParse` "12" - start `shouldParse` "11112" - start `shouldParse` "111111" - start `shouldParse` "1111112" + start shouldParse "" + start shouldParse "1" + start shouldParse "12" + start shouldParse "11112" + start shouldParse "111111" + start shouldParse "1111112" // Actually computing the result triggers a stackoverflow - // start `shouldParse` ("1" * 200) + // start shouldParse ("1" * 200) } describe("A = A ~ b | c") { lazy val A: NT[Any] = A ~ 'b' | 'c' - A `shouldParse` "c" - A `shouldParse` "cb" - A `shouldParse` "cbb" - A `shouldParse` "cbbbbbbbbbbbbb" - A `shouldNotParse` "cbbbbbbbbbbbbbc" + A shouldParse "c" + A shouldParse "cb" + A shouldParse "cbb" + A shouldParse "cbbbbbbbbbbbbb" + A shouldNotParse "cbbbbbbbbbbbbbc" } describe("A = empty ~ A ~ b | empty") { lazy val A: NT[Any] = succ("done") ~ A ~ 'b' | succ("done") - A `shouldParse` "" - A `shouldParse` "b" - A `shouldParse` "bb" + A shouldParse "" + A shouldParse "b" + A shouldParse "bb" } // should parse at most as many 'd's as it parses 'b's. @@ -145,24 +145,24 @@ trait LeftrecTests { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' lazy val B: NT[Any] = 'd' | succ("done") - A `shouldParse` "c" - A `shouldParse` "cb" - A `shouldParse` "dcb" - A `shouldParse` "cbb" - A `shouldParse` "ddcbb" - A `shouldNotParse` "dddcb" - A `shouldParse` "dddddcbbbbbbbbbbbbb" + A shouldParse "c" + A shouldParse "cb" + A shouldParse "dcb" + A shouldParse "cbb" + A shouldParse "ddcbb" + A shouldNotParse "dddcb" + A shouldParse "dddddcbbbbbbbbbbbbb" } describe("many(some(a))") { lazy val p = many(some('a')) - p `shouldParse` "" - p `shouldParse` "a" - p `shouldParse` "aaa" - p `shouldParse` "aaaaaaaaaa" - p `shouldNotParse` "b" - p `shouldNotParse` "aaab" + p shouldParse "" + p shouldParse "a" + p shouldParse "aaa" + p shouldParse "aaaaaaaaaa" + p shouldNotParse "b" + p shouldNotParse "aaab" } describe("del(ones)") { @@ -179,21 +179,21 @@ trait LeftrecTests { lazy val rr: NT[String] = "1" ~> rr | "1" lazy val ll: NT[String] = ll <~ "1" | "1" - ll `shouldParse` ("1" `repeat` 40) - rr `shouldParse` ("1" `repeat` 41) + ll shouldParse "1".repeat(40) + rr shouldParse "1".repeat(41) } // Grammar from Tillmann Rendel's GLL library describe("very ambiguous") { lazy val A: NT[Char] = A ~> A | A ~> A ~> A | 'a' - A `shouldNotParse` "" - A `shouldParse` "a" - A `shouldParse` "aa" - A `shouldParse` "aaa" - A `shouldParse` ("a" `repeat` 100) + A shouldNotParse "" + A shouldParse "a" + A shouldParse "aa" + A shouldParse "aaa" + A shouldParse "a".repeat(100) lazy val A2: Parser[Any] = some(some('a')) - A2 `shouldParse` ("a" `repeat` 1000) + A2 shouldParse "a".repeat(1000) } describe("mixed mutual recursion") { @@ -210,13 +210,13 @@ trait LeftrecTests { lazy val arrayEl: NT[Any] = expression | succ("undefined") - expression `shouldParse` "" - expression `shouldParse` "a" - expression `shouldParse` "aaaaa" - expression `shouldParse` "[" - expression `shouldParse` "[a" - expression `shouldParse` "[aaaaa" - expression `shouldParse` "[[[[a" + expression shouldParse "" + expression shouldParse "a" + expression shouldParse "aaaaa" + expression shouldParse "[" + expression shouldParse "[a" + expression shouldParse "[aaaaa" + expression shouldParse "[[[[a" } describe("terms") { @@ -239,9 +239,9 @@ trait LeftrecTests { lazy val num: Parser[Num] = some(digit) ^^ (ns => Num(ns.mkString.toInt)) - num `shouldParse` "12345" - term `shouldParse` "12+31" - term `shouldParse` "12*8+31*45" + num shouldParse "12345" + term shouldParse "12+31" + term shouldParse "12*8+31*45" } // Grammar and testcases from Tillmann Rendel's GLL library. @@ -250,44 +250,44 @@ trait LeftrecTests { lazy val S: NT[Any] = many(az | ' ' | ':' | ':' ~ P | '(' ~ S ~ ')') lazy val P: NT[Any] = alt('(', ')') - S `shouldParse` "" - S `shouldNotParse` ":((" - S `shouldParse` "i am sick today (:()" - S `shouldParse` "(:)" - S `shouldParse` "hacker cup: started :):)" - S `shouldNotParse` ")(" - S `shouldNotParse` "(((a)):()a(()(((:))a((:)():(((()()a)))(:a(::)(a)))(a)((a::():(a)():)a(a(a(:aa(:()(a(((((()))))))))" - S `shouldParse` "():)((()():(:())))::aa((((:(((:)))::a:(:))()a)):(a):::((()a((a(aa(():))(():())((::a)a)):)()" - S `shouldParse` ":(a):(:)aa)a(:()::():))a:aaa:)(:)((()()))a()(((()(:)))(:(aa:()())())a((a)a:(:()))(a((():)))" - S `shouldParse` ":a:)(:))()(()()a)aaa::a()()a:()()a::)((()(a(a))))try implementing sleep sort if you are stuck:(:)a)" - S `shouldNotParse` "(a())(::)(a))():(((a(()(:))a(:)))(:(:(:((():)(a))(:))(a)():(:(()aa):)(a((())a)a((a):)()(:(" - S `shouldParse` "(::a((a)a:()):):a)aa:)a(:::))(a())aa(a():))(:)a)((():)(:a:)a))):a(a)((:()(()())a))()a((()a))" - S `shouldParse` "()(((a)((aa)))a)a()(a)(aa:a)()(((:())aa)):()():():a:(a)(a())a:)::a:(aa:):()((a:)())aa)a(a:)" - S `shouldParse` ":)()((a)):(():a:a:)(:a)):)(()(:)::::(a(::a())(a):(:((((:(aa(()))a)(((((((((()a()a):)))((:)))))))))" - S `shouldParse` "a(a)::(((::)))())((a)(:((:a())):((::(:()(a)))i am trapped in a test case generator :(:(a(:::))" - S `shouldParse` "((:):::(()()):)(()()():())aaa)(:(a:)a:((())a(((a(:())aa():a:)((()):)(()(:)(a())a:()a)a():(" - S `shouldNotParse` "(:a))" - S `shouldParse` "::((:))(((:)(aaa)(a())()(a:)(:)(:)()):)a())aa)())(():a):()::):)a()())a()):):(:a)a):()(a)(a)" - S `shouldParse` "()a(:)(a:a):(())):a()():((a(:):a()()::)(a:)(()a((a:)(a)a(a:a:)(a)a(a:(()()()::a()a()(()a:())))" - S `shouldParse` "()((:a(a()()a))())((:a(:a)(()a((((a((a(()(:aa()()()))):)(():):)(:(a))():(())(():()):):(()a))" - S `shouldParse` "(((((((((())))))))))" - S `shouldParse` "(((((((((((((((((((())))))))))))))))))))" - S `shouldParse` "((((((((((:))))))))))" - S `shouldParse` "((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S `shouldNotParse` "(((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S `shouldParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S `shouldParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:))))))))))" - S `shouldParse` "((((((((((((:))))))))))((((((((((:())))))))))))" - S `shouldNotParse` "(((((((((()))))))))))" - S `shouldNotParse` "(((((((((((((((((((()))))))))))))))))))))" - S `shouldParse` "((((((((((:)))))))))))" - S `shouldParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:)))))))))))))))))))))))))))))))))))))))))))))))))))" - S `shouldParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))" - S `shouldParse` "((((((((((((:))))))))))((((((((((:)))))))))))))" - S `shouldNotParse` "((((((((((:))))))))))))" - S `shouldNotParse` "((((((((((((:))))))))))((((((((((:)))))))))))))))" - S `shouldNotParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))))))))" - S `shouldNotParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))))))" + S shouldParse "" + S shouldNotParse ":((" + S shouldParse "i am sick today (:()" + S shouldParse "(:)" + S shouldParse "hacker cup: started :):)" + S shouldNotParse ")(" + S shouldNotParse "(((a)):()a(()(((:))a((:)():(((()()a)))(:a(::)(a)))(a)((a::():(a)():)a(a(a(:aa(:()(a(((((()))))))))" + S shouldParse "():)((()():(:())))::aa((((:(((:)))::a:(:))()a)):(a):::((()a((a(aa(():))(():())((::a)a)):)()" + S shouldParse ":(a):(:)aa)a(:()::():))a:aaa:)(:)((()()))a()(((()(:)))(:(aa:()())())a((a)a:(:()))(a((():)))" + S shouldParse ":a:)(:))()(()()a)aaa::a()()a:()()a::)((()(a(a))))try implementing sleep sort if you are stuck:(:)a)" + S shouldNotParse "(a())(::)(a))():(((a(()(:))a(:)))(:(:(:((():)(a))(:))(a)():(:(()aa):)(a((())a)a((a):)()(:(" + S shouldParse "(::a((a)a:()):):a)aa:)a(:::))(a())aa(a():))(:)a)((():)(:a:)a))):a(a)((:()(()())a))()a((()a))" + S shouldParse "()(((a)((aa)))a)a()(a)(aa:a)()(((:())aa)):()():():a:(a)(a())a:)::a:(aa:):()((a:)())aa)a(a:)" + S shouldParse ":)()((a)):(():a:a:)(:a)):)(()(:)::::(a(::a())(a):(:((((:(aa(()))a)(((((((((()a()a):)))((:)))))))))" + S shouldParse "a(a)::(((::)))())((a)(:((:a())):((::(:()(a)))i am trapped in a test case generator :(:(a(:::))" + S shouldParse "((:):::(()()):)(()()():())aaa)(:(a:)a:((())a(((a(:())aa():a:)((()):)(()(:)(a())a:()a)a():(" + S shouldNotParse "(:a))" + S shouldParse "::((:))(((:)(aaa)(a())()(a:)(:)(:)()):)a())aa)())(():a):()::):)a()())a()):):(:a)a):()(a)(a)" + S shouldParse "()a(:)(a:a):(())):a()():((a(:):a()()::)(a:)(()a((a:)(a)a(a:a:)(a)a(a:(()()()::a()a()(()a:())))" + S shouldParse "()((:a(a()()a))())((:a(:a)(()a((((a((a(()(:aa()()()))):)(():):)(:(a))():(())(():()):):(()a))" + S shouldParse "(((((((((())))))))))" + S shouldParse "(((((((((((((((((((())))))))))))))))))))" + S shouldParse "((((((((((:))))))))))" + S shouldParse "((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S shouldNotParse "(((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S shouldParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:))))))))))" + S shouldParse "((((((((((((:))))))))))((((((((((:())))))))))))" + S shouldNotParse "(((((((((()))))))))))" + S shouldNotParse "(((((((((((((((((((()))))))))))))))))))))" + S shouldParse "((((((((((:)))))))))))" + S shouldParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:)))))))))))))))))))))))))))))))))))))))))))))))))))" + S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))" + S shouldParse "((((((((((((:))))))))))((((((((((:)))))))))))))" + S shouldNotParse "((((((((((:))))))))))))" + S shouldNotParse "((((((((((((:))))))))))((((((((((:)))))))))))))))" + S shouldNotParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))))))))" + S shouldNotParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))))))" } // This is grammar Γ₁ from Scott and Johnstone (2010, Sec. 5). @@ -297,13 +297,13 @@ trait LeftrecTests { lazy val B: NT[Any] = succ(()) | 'a' lazy val C: NT[Any] = 'b' | B ~ C ~ 'b' | 'b' ~ 'b' - S `shouldNotParse` "" - S `shouldNotParse` "aba" - S `shouldParse` "d" - S `shouldParse` "ba" - S `shouldParse` "bba" - S `shouldParse` "abba" - S `shouldParse` "aabbba" + S shouldNotParse "" + S shouldNotParse "aba" + S shouldParse "d" + S shouldParse "ba" + S shouldParse "bba" + S shouldParse "abba" + S shouldParse "aabbba" } } } diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index ba9e496..1cb92e5 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -11,50 +11,50 @@ trait NegationTests { describe("parser \"not(aa)\"") { val p = neg("aa") - p `shouldParse` "a" - p `shouldNotParse` "aa" - p `shouldParse` "aac" - p `shouldParse` "abc" + p shouldParse "a" + p shouldNotParse "aa" + p shouldParse "aac" + p shouldParse "abc" } describe("parser \"not(aa) & lower*\"") { val p = neg("aa") & many(lower) - p `shouldParse` "a" - p `shouldParse` "bc" - p `shouldParse` "ab" - p `shouldNotParse` "aa" - p `shouldParse` "abc" - p `shouldParse` "aac" - p `shouldParse` "aacdd" + p shouldParse "a" + p shouldParse "bc" + p shouldParse "ab" + p shouldNotParse "aa" + p shouldParse "abc" + p shouldParse "aac" + p shouldParse "aacdd" } describe("parser \"not(aa ~ .*) & lower*\"") { val p = neg("aa" ~ many(any)) & many(lower) - p `shouldParse` "a" - p `shouldParse` "bc" - p `shouldParse` "ab" - p `shouldNotParse` "aa" - p `shouldParse` "abc" - p `shouldNotParse` "aac" - p `shouldNotParse` "aacadasdasdasd" + p shouldParse "a" + p shouldParse "bc" + p shouldParse "ab" + p shouldNotParse "aa" + p shouldParse "abc" + p shouldNotParse "aac" + p shouldNotParse "aacadasdasdasd" } describe("parser \"not(.* ~ abc ~ .*)\"") { val p = neg(many(any) ~ "abc" ~ many(any)) - p `shouldParse` "" - p `shouldParse` "xx" - p `shouldParse` "xxabxx" - p `shouldNotParse` "xxabcxxx" - p `shouldNotParse` "xxabc" - p `shouldNotParse` "abcxxx" + p shouldParse "" + p shouldParse "xx" + p shouldParse "xxabxx" + p shouldNotParse "xxabcxxx" + p shouldNotParse "xxabc" + p shouldNotParse "abcxxx" } describe("parser \"not((baaa | ba) ~ aa ~ .*) & lower*\"") { val p = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) - p `shouldNotParse` "baaa" - p `shouldNotParse` "baaaxx" - p `shouldParse` "" - p `shouldParse` "baba" - p `shouldParse` "baacxx" + p shouldNotParse "baaa" + p shouldNotParse "baaaxx" + p shouldParse "" + p shouldParse "baba" + p shouldParse "baacxx" } } diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index 77b0070..561f85a 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -13,7 +13,7 @@ class PythonParserTests import Lexeme._ describe("indented python parser (lexeme based)") { - indented(many(many(Id("A")) <~ NL)) `shouldParseWith` ( + indented(many(many(Id("A")) <~ NL)) shouldParseWith ( List(WS, WS, Id("A"), Id("A"), NL, WS, WS, Id("A"), NL), List(List(Id("A"), Id("A")), List(Id("A"))) ) @@ -28,22 +28,22 @@ class PythonParserTests val a = Id("A") val BS = Punct("\\") - dyck `shouldParse` List[Lexeme]("(", "(", ")", ")") - dyck `shouldNotParse` List[Lexeme]("(", "(", ")") - extDyck `shouldParse` List("(", a, "(", a, NL, a, ")", a, ")") - extDyck `shouldNotParse` List(a, "(", a, "(", a, NL, a, ")", a, ")", a) + dyck shouldParse List[Lexeme]("(", "(", ")", ")") + dyck shouldNotParse List[Lexeme]("(", "(", ")") + extDyck shouldParse List("(", a, "(", a, NL, a, ")", a, ")") + extDyck shouldNotParse List(a, "(", a, "(", a, NL, a, ")", a, ")", a) - implicitJoin(p) `shouldParse` List(a, a, a, a, a) - implicitJoin(p) `shouldNotParse` List(a, a, a, NL, a, a) - implicitJoin(p) `shouldParse` List(a, a, "(", a, NL, a, ")", a) - implicitJoin(p) `shouldNotParse` List(a, a, "(", a, NL, a, a) - implicitJoin(p) `shouldNotParse` List(a, a, "(", a, "(", NL, a, ")", a) - implicitJoin(p) `shouldParse` List(a, a, "(", a, "(", NL, a, ")", ")", a) - implicitJoin(p) `shouldParse` List(a, a, "(", a, "[", NL, a, "]", ")", a) - implicitJoin(p) `shouldNotParse` List(a, a, "(", a, "[", NL, a, ")", "]", a) + implicitJoin(p) shouldParse List(a, a, a, a, a) + implicitJoin(p) shouldNotParse List(a, a, a, NL, a, a) + implicitJoin(p) shouldParse List(a, a, "(", a, NL, a, ")", a) + implicitJoin(p) shouldNotParse List(a, a, "(", a, NL, a, a) + implicitJoin(p) shouldNotParse List(a, a, "(", a, "(", NL, a, ")", a) + implicitJoin(p) shouldParse List(a, a, "(", a, "(", NL, a, ")", ")", a) + implicitJoin(p) shouldParse List(a, a, "(", a, "[", NL, a, "]", ")", a) + implicitJoin(p) shouldNotParse List(a, a, "(", a, "[", NL, a, ")", "]", a) - explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a) - explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a, BS, NL, a, a) + explicitJoin(p) shouldParse List(a, a, a, BS, NL, a, a) + explicitJoin(p) shouldParse List(a, a, a, BS, NL, a, a, BS, NL, a, a) val input = List[Lexeme](a, NL, Comment("Hey!!"), a, BS, NL, a, a, "(", a, "[", a, BS, NL, a, NL, a, "]", ")", a) @@ -59,32 +59,32 @@ class PythonParserTests val collect = consumed(many(any)) - stripComments(collect) `shouldParseWith` (input, inputWithoutComments) - explicitJoin(collect) `shouldParseWith` + stripComments(collect) shouldParseWith (input, inputWithoutComments) + explicitJoin(collect) shouldParseWith (inputWithoutComments, inputWithoutExplicit) - implicitJoin(collect) `shouldParseWith` (inputWithoutExplicit, inputResult) + implicitJoin(collect) shouldParseWith (inputWithoutExplicit, inputResult) - preprocess(file_input) `shouldParse` List(a, ";", a, "=", "yield", "from", - a, "=", a, ";", NL, NL, a, ";", a, NL, EOS) + preprocess(file_input) shouldParse List(a, ";", a, "=", "yield", "from", a, + "=", a, ";", NL, NL, a, ";", a, NL, EOS) - preprocess(file_input) `shouldParse` + preprocess(file_input) shouldParse List(a, "=", a, ">>", a, "*", a, NL, EOS) val sampleProg = List[Lexeme]("def", WS, Id("fun"), "(", WS, a, WS, ")", ":", NL, WS, WS, a, "+=", WS, a, NL, WS, WS, a, "*=", a, NL, EOS) - parse(stripComments(collect), sampleProg) `shouldBe` List(sampleProg) - parse(explicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) - parse(implicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) + parse(stripComments(collect), sampleProg) shouldBe List(sampleProg) + parse(explicitJoin(collect), sampleProg) shouldBe List(sampleProg) + parse(implicitJoin(collect), sampleProg) shouldBe List(sampleProg) - preprocess(file_input) `shouldParse` sampleProg + preprocess(file_input) shouldParse sampleProg val sampleProg2 = List[Lexeme]("def", WS, Id("fun"), "(", NL, WS, a, WS, NL, ")", ":", NL, WS, WS, a, "+=", Comment("Test"), BS, NL, WS, a, NL, WS, WS, a, "*=", a, NL, EOS) - parse(preprocess(collect), sampleProg2) `shouldBe` List(sampleProg) - preprocess(file_input) `shouldParse` sampleProg2 + parse(preprocess(collect), sampleProg2) shouldBe List(sampleProg) + preprocess(file_input) shouldParse sampleProg2 // https://en.wikibooks.org/wiki/Python_Programming/Decorators // format: off @@ -105,40 +105,40 @@ class PythonParserTests ) // format: on - argument `shouldParse` List("*", Id("kwargs")) - argument `shouldParse` List("**", Id("kwargs")) - arglist `shouldParse` List("**", Id("kwargs2")) - arglist `shouldParse` List(Id("kwargs"), ",", WS, Id("kwargs")) - arglist `shouldParse` List("*", Id("kwargs"), ",", "*", Id("kwargs")) - arglist `shouldParse` List("**", Id("kwargs"), ",", "**", Id("kwargs")) - arglist `shouldParse` List("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) - arglist `shouldParse` List("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) - arglist `shouldParse` List("(", Id("args"), ",", WS, Id("kwargs"), ")") - arglist `shouldParse` List("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") - - arglist `shouldParse` + argument shouldParse List("*", Id("kwargs")) + argument shouldParse List("**", Id("kwargs")) + arglist shouldParse List("**", Id("kwargs2")) + arglist shouldParse List(Id("kwargs"), ",", WS, Id("kwargs")) + arglist shouldParse List("*", Id("kwargs"), ",", "*", Id("kwargs")) + arglist shouldParse List("**", Id("kwargs"), ",", "**", Id("kwargs")) + arglist shouldParse List("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) + arglist shouldParse List("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) + arglist shouldParse List("(", Id("args"), ",", WS, Id("kwargs"), ")") + arglist shouldParse List("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") + + arglist shouldParse List("(", "*", Id("args"), ",", WS, "*", Id("kwargs"), ")") - test `shouldParse` + test shouldParse List(Id("f"), "(", Id("args"), ",", WS, Id("kwargs"), ")") - test `shouldParse` + test shouldParse List(Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") - test `shouldParse` List(Id("print"), "(", Str("entering function "), WS, - "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") + test shouldParse List(Id("print"), "(", Str("entering function "), WS, "+", + WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") // TODO is already ambiguous - // (stmt `parse` List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size `shouldBe` 1 + // (stmt `parse` List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size shouldBe 1 - // preprocess(file_input) `shouldParse` traceProg + // preprocess(file_input) shouldParse traceProg // (stmt `parse` List[Lexeme]( // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, - // WS, WS, Id("print"), NL)).size `shouldBe` 1 + // WS, WS, Id("print"), NL)).size shouldBe 1 // format: off - stmt `shouldNotParse` List( + stmt shouldNotParse List( "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, WS, WS, WS, WS, Id("print"), NL, // this line is indented too far @@ -171,8 +171,8 @@ class PythonParserTests ) // format: off - preprocess(file_input) `shouldParse` traceProg2 - parse(preprocess(file_input), traceProg2).size `shouldBe` 1 + preprocess(file_input) shouldParse traceProg2 + parse(preprocess(file_input), traceProg2).size shouldBe 1 // suite should `parse` this: // format: off @@ -188,8 +188,8 @@ class PythonParserTests // println((suite `parse` dummyin) mkString "\n\n") - stmt `shouldNotParse` List(WS, WS, WS, Id("i"), NL) - atom `shouldNotParse` List(WS, WS, WS, Id("i")) + stmt shouldNotParse List(WS, WS, WS, Id("i"), NL) + atom shouldNotParse List(WS, WS, WS, Id("i")) // This is the skeleton of the python parsers (and it is unambiguous) lazy val aStmt: NT[Any] = aSimpleStmt | "def" ~> aBlock @@ -218,17 +218,17 @@ class PythonParserTests ) // format: on - aInput `shouldParse` List("def", NL, WS, WS, a, NL, WS, WS, a, NL, EOS) - aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, WS, a, NL, EOS) - aInput `shouldParse` List("def", NL, WS, WS, a, NL, NL, WS, WS, a, NL, EOS) - aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, NL, WS, a, NL, EOS) + aInput shouldParse List("def", NL, WS, WS, a, NL, WS, WS, a, NL, EOS) + aInput shouldNotParse List("def", NL, WS, WS, a, NL, WS, a, NL, EOS) + aInput shouldParse List("def", NL, WS, WS, a, NL, NL, WS, WS, a, NL, EOS) + aInput shouldNotParse List("def", NL, WS, WS, a, NL, NL, WS, a, NL, EOS) - indentBy(WS ~ WS)(collect) `shouldParseWith` + indentBy(WS ~ WS)(collect) shouldParseWith (List(WS, WS, a, NL), List(a, NL)) - indentBy(WS ~ WS)(collect) `shouldParseWith` + indentBy(WS ~ WS)(collect) shouldParseWith (List(WS, WS, NL, NL, WS, WS, a, NL), List(NL, NL, a, NL)) - parse(aInput, dummyin2).size `shouldBe` 1 + parse(aInput, dummyin2).size shouldBe 1 } } From 100b4495746678d0b94df03a87e1bb20cdeca0a4 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Mon, 19 Jan 2026 13:32:39 +0100 Subject: [PATCH 92/95] bump version --- artifact/build.sbt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/artifact/build.sbt b/artifact/build.sbt index f22061b..5d5e47f 100644 --- a/artifact/build.sbt +++ b/artifact/build.sbt @@ -1,8 +1,8 @@ name := "first-class-derivatives" -version := "1.0" +version := "2.0.0" -scalaVersion := "3.7.3" +scalaVersion := "3.7.4" scalacOptions ++= Seq("-feature", "-deprecation") From e5b30be508dc1cbfdd26444a34b2b67a86c768e3 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 28 Jan 2026 16:53:05 +0100 Subject: [PATCH 93/95] Update formatting --- artifact/src/main/scala/examples/PythonParsers.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 3978968..8aadae7 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -310,9 +310,8 @@ trait PythonParsers extends PythonLexemes, PythonAst { "if" ␣> test ␣ (":" ␣> suite ~ spacedMany("elif" ␣> test ␣ (":" ␣> suite)) ~ spacedOpt(("else" ␣ ":") ␣> suite)) - lazy val while_stmt = "while" ␣> test ␣ (":" ␣> suite ~ spacedOpt( - ("else" ␣ ":") ␣> suite - )) + lazy val while_stmt = + "while" ␣> test ␣ (":" ␣> suite ~ spacedOpt(("else" ␣ ":") ␣> suite)) lazy val for_stmt = "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (":" ␣> suite ~ spacedOpt( ("else" ␣> ":") ␣> suite From c89a8f7d91b448a754b7b25625b548a0eb1c5a65 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 28 Jan 2026 16:54:53 +0100 Subject: [PATCH 94/95] Update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6a06f28..e909882 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ project .metals .bsp .bloop +.vagrant # Testing test.png From 833dc761bad29486a19f31f3b4627631a6755273 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Wed, 25 Feb 2026 04:43:38 +0100 Subject: [PATCH 95/95] Fix mis-casting of KW and Punct --- .../main/scala/examples/PythonParsers.scala | 150 +++++++++--------- .../src/test/scala/PythonParserTests.scala | 142 +++++++++-------- 2 files changed, 148 insertions(+), 144 deletions(-) diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 8aadae7..2889a5c 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -67,8 +67,8 @@ trait PythonLexemes { self: Parsers & DerivedOps & Syntax => type Elem = Lexeme given lex: Conversion[Elem, Parser[Elem]] = accept(_) - given kw: Conversion[Symbol, Parser[Elem]] = { kw => accept(KW(kw.name)) } - given punct: Conversion[String, Parser[Elem]] = { p => accept(Punct(p)) } + given kw: Conversion[String, Parser[Elem]] = kw => accept(KW(kw)) + given punct: Conversion[Char, Parser[Elem]] = p => accept(Punct(p.toString)) lazy val string = any >> { case s: Str => succeed(s) @@ -215,7 +215,7 @@ trait PythonParsers extends PythonLexemes, PythonAst { emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ Program.apply lazy val decorator: Parser[Decorator] = - "@" ~> dotted_name ~ ("(" ~> optArgs <~ ")" | succeed( + '@' ~> dotted_name ~ ('(' ~> optArgs <~ ')' | succeed( Nil )) <~ NL ^^ Decorator.apply lazy val decorators: Parser[List[Decorator]] = some(decorator) @@ -226,32 +226,32 @@ trait PythonParsers extends PythonLexemes, PythonAst { lazy val async_funcdef: Parser[FuncDef] = "async" ␣> funcdef lazy val funcdef: Parser[FuncDef] = "def" ␣> (id ␣ parameters ~ spacedOpt( - "->" ␣> test - )) ␣ (":" ␣> suite) ^^ FuncDef.apply + Punct("->") ␣> test + )) ␣ (':' ␣> suite) ^^ FuncDef.apply - lazy val parameters = "(" ~> spacedOpt(typedargslist) <␣ ")" + lazy val parameters = '(' ~> spacedOpt(typedargslist) <␣ ')' // ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef] def fpdef(p: Parser[Any]): Parser[Any] = - ("*" ~ spacedOpt(p) - ~ spacedOpt("," ␣> testdefs(p)) - ~ spacedOpt("," ␣> ("**" ␣ p)) - | "**" ␣ p) - def testdefs(p: Parser[Any]) = someSep(p ~ spacedOpt("=" ␣> test), ",") + ('*' ~ spacedOpt(p) + ~ spacedOpt(',' ␣> testdefs(p)) + ~ spacedOpt(',' ␣> (Punct("**") ␣ p)) + | Punct("**") ␣ p) + def testdefs(p: Parser[Any]) = someSep(p ~ spacedOpt('=' ␣> test), ',') lazy val typedargslist = - testdefs(tfpdef) ~ spacedOpt("," ␣> fpdef(tfpdef)) | fpdef(tfpdef) + testdefs(tfpdef) ~ spacedOpt(',' ␣> fpdef(tfpdef)) | fpdef(tfpdef) lazy val varargslist = - testdefs(vfpdef) ~ spacedOpt("," ␣> fpdef(vfpdef)) | fpdef(vfpdef) + testdefs(vfpdef) ~ spacedOpt(',' ␣> fpdef(vfpdef)) | fpdef(vfpdef) - lazy val tfpdef = id ~ spacedOpt(":" ␣> test) + lazy val tfpdef = id ~ spacedOpt(':' ␣> test) lazy val vfpdef = id // --- Statements --- lazy val stmt: NT[Any] = simple_stmt | compound_stmt lazy val simple_stmt = - listOf(small_stmt, ";") <␣ NL ^^ Simple.apply + listOf(small_stmt, ';') <␣ NL ^^ Simple.apply lazy val small_stmt = (expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt @@ -261,14 +261,15 @@ trait PythonParsers extends PythonLexemes, PythonAst { (testlist_star_expr | testlist_star_expr ␣ augassign ␣ (yield_expr | testlist) | testlist_star_expr ~ some( - spaces ~> "=" ␣> (yield_expr | testlist_star_expr) + spaces ~> '=' ␣> (yield_expr | testlist_star_expr) )) ^^ ExprStmt.apply - lazy val testlist_star_expr = listOf(test | star_expr, ",") + lazy val testlist_star_expr = listOf(test | star_expr, ',') - lazy val augassign = ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" - | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" - | "//=") + lazy val augassign = (Punct("+=") | Punct("-=") | Punct("*=") | Punct( + "@=") | Punct("/=") | Punct("%=") + | Punct("&=") | Punct("|=") | Punct("^=") | Punct("<<=") | Punct( + ">>=") | Punct("**=") | Punct("//=")) lazy val del_stmt = "del" ␣> exprlist ^^ Del.apply lazy val pass_stmt = "pass" ^^^ Pass lazy val flow_stmt = @@ -284,46 +285,46 @@ trait PythonParsers extends PythonLexemes, PythonAst { // # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS lazy val import_from = - ("from" ~> (spacedMany("." | "...") ~ dotted_name | some( - "." | "..." + ("from" ~> (spacedMany('.' | Punct("...")) ~ dotted_name | some( + '.' | Punct("...") )) ␣ "import" - ␣> ("*" | "(" ␣> import_as_names <␣ ")" | import_as_names)) ^^ { + ␣> ('*' | '(' ␣> import_as_names <␣ ')' | import_as_names)) ^^ { case (from, names) => Import(names, Some(from)) } lazy val import_as_name = id ~ spacedOpt("as" ␣ id) lazy val dotted_as_name = dotted_name ~ spacedOpt("as" ␣ id) - lazy val import_as_names = listOf(test | import_as_name, ",") - lazy val dotted_as_names = someSep(dotted_as_name, ",") - lazy val dotted_name = someSep(id, ".") + lazy val import_as_names = listOf(test | import_as_name, ',') + lazy val dotted_as_names = someSep(dotted_as_name, ',') + lazy val dotted_name = someSep(id, '.') - lazy val global_stmt = "global" ␣> someSep(id, ",") ^^ Global.apply - lazy val nonlocal_stmt = "nonlocal" ␣> someSep(id, ",") ^^ Nonlocal.apply - lazy val assert_stmt = "assert" ␣> someSep(test, ",") ^^ Assert.apply + lazy val global_stmt = "global" ␣> someSep(id, ',') ^^ Global.apply + lazy val nonlocal_stmt = "nonlocal" ␣> someSep(id, ',') ^^ Nonlocal.apply + lazy val assert_stmt = "assert" ␣> someSep(test, ',') ^^ Assert.apply lazy val compound_stmt = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt lazy val async_stmt = "async" ␣> (funcdef | with_stmt | for_stmt) lazy val if_stmt = - "if" ␣> test ␣ (":" ␣> suite ~ - spacedMany("elif" ␣> test ␣ (":" ␣> suite)) ~ - spacedOpt(("else" ␣ ":") ␣> suite)) + "if" ␣> test ␣ (':' ␣> suite ~ + spacedMany("elif" ␣> test ␣ (':' ␣> suite)) ~ + spacedOpt(("else" ␣ ':') ␣> suite)) lazy val while_stmt = - "while" ␣> test ␣ (":" ␣> suite ~ spacedOpt(("else" ␣ ":") ␣> suite)) + "while" ␣> test ␣ (':' ␣> suite ~ spacedOpt(("else" ␣ ':') ␣> suite)) lazy val for_stmt = - "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (":" ␣> suite ~ spacedOpt( - ("else" ␣> ":") ␣> suite + "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (':' ␣> suite ~ spacedOpt( + ("else" ␣> ':') ␣> suite ))) ^^ { case (exprs, (tests, (body, default))) => For(exprs, tests, body, default) } lazy val try_stmt = - ("try" ␣ ":") ␣> suite ␣ (some(except_clause ␣ (":" ␣> suite)) ~ - spacedOpt(("else" ␣ ":") ␣> suite) ~ - spacedOpt(("finally" ␣ ":") ␣> suite) - | (("finally" ␣ ":") ␣> suite)) - lazy val with_stmt = "with" ␣> someSep(with_item, ",") ␣ (":" ␣> suite) + ("try" ␣ ':') ␣> suite ␣ (some(except_clause ␣ (':' ␣> suite)) ~ + spacedOpt(("else" ␣ ':') ␣> suite) ~ + spacedOpt(("finally" ␣ ':') ␣> suite) + | (("finally" ␣ ':') ␣> suite)) + lazy val with_stmt = "with" ␣> someSep(with_item, ',') ␣ (':' ␣> suite) lazy val with_item = test ~ spacedOpt("as" ␣> expr) // # NB compile.c makes sure that the default except clause is last @@ -338,69 +339,70 @@ trait PythonParsers extends PythonLexemes, PythonAst { (or_test ~ spacedOpt("if" ␣> or_test ␣ ("else" ␣> test)) | lambdef) lazy val test_nocond: NT[Any] = or_test | lambdef_nocond - lazy val lambdef: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test) + lazy val lambdef: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (':' ␣> test) lazy val lambdef_nocond: NT[Any] = - "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) + "lambda" ~> spacedOpt(varargslist) ␣ (':' ␣> test_nocond) lazy val or_test: NT[Any] = someSep(and_test, "or") lazy val and_test: NT[Any] = someSep(not_test, "and") lazy val not_test: NT[Any] = "not" ␣> not_test | comparison lazy val comparison: NT[Any] = someSep(expr, comp_op) // # <> isn't actually a valid comparison operator in Python. It's here for the // # sake of a __future__ import described in PEP 401 (which really works :-) - lazy val comp_op = ("<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" - | "in" | "not" ␣ "in" | "is" | "is" ␣ "not") + lazy val comp_op = + Punct("<") | ">" | Punct("==") | Punct(">=") | Punct("<=") | Punct( + "<>") | Punct("!=") | "in" | "not" ␣ "in" | "is" | "is" ␣ "not" - lazy val expr: NT[Any] = binOp(xor_expr, "|", BinOp.apply) - lazy val xor_expr: NT[Any] = binOp(and_expr, "^", BinOp.apply) - lazy val and_expr: NT[Any] = binOp(shift_expr, "&", BinOp.apply) + lazy val expr: NT[Any] = binOp(xor_expr, '|', BinOp.apply) + lazy val xor_expr: NT[Any] = binOp(and_expr, '^', BinOp.apply) + lazy val and_expr: NT[Any] = binOp(shift_expr, '&', BinOp.apply) lazy val shift_expr: NT[Any] = - binOp(arith_expr, "<<" | ">>", BinOp.apply) - lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", BinOp.apply) + binOp(arith_expr, Punct("<<") | Punct(">>"), BinOp.apply) + lazy val arith_expr: NT[Any] = binOp(term, Punct("+") | "-", BinOp.apply) lazy val term: NT[Any] = - binOp(factor, "*" | "@" | "/" | "%" | "//", BinOp.apply) - lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power - lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor + binOp(factor, Punct("*") | '@' | '/' | '%' | Punct("//"), BinOp.apply) + lazy val factor: NT[Any] = (Punct("+") | '-' | '~') ␣ factor | power + lazy val power: NT[Any] = atom_expr | atom_expr ␣ Punct("**") ␣ factor lazy val atom_expr = opt("await" ~ spaces) ~> atom ~ spacedMany(trailer) - lazy val atom = ("(" ␣> (yield_expr | testlist_comp) <␣ ")" - | "[" ~> spacedOpt(testlist_comp) <␣ "]" - | "{" ~> spacedOpt(dictorsetmaker) <␣ "}" - | id | number | some(string) | "..." + lazy val atom = ('(' ␣> (yield_expr | testlist_comp) <␣ ')' + | '[' ~> spacedOpt(testlist_comp) <␣ ']' + | '{' ~> spacedOpt(dictorsetmaker) <␣ '}' + | id | number | some(string) | Punct("...") | "None" | "True" | "False") - lazy val star_expr = "*" ␣ expr + lazy val star_expr = '*' ␣ expr lazy val yield_expr = "yield" ~ spacedOpt("from" ␣ test | testlist) - lazy val testlist_comp = (listOf(test | star_expr, ",") + lazy val testlist_comp = (listOf(test | star_expr, ',') | (test | star_expr) ␣ comp_for) - lazy val trailer = ("(" ␣> optArgs <␣ ")" - | "[" ␣> subscriptlist <␣ "]" - | "." ␣> id) - lazy val subscriptlist = listOf(subscript, ",") + lazy val trailer = ('(' ␣> optArgs <␣ ')' + | '[' ␣> subscriptlist <␣ ']' + | '.' ␣> id) + lazy val subscriptlist = listOf(subscript, ',') lazy val subscript = - test | spacedOpt(test) ~ ":" ~ spacedOpt(test) ~ spacedOpt( - ":" ~> spacedOpt(test) + test | spacedOpt(test) ~ ':' ~ spacedOpt(test) ~ spacedOpt( + ':' ~> spacedOpt(test) ) - lazy val exprlist = listOf(expr | star_expr, ",") - lazy val testlist = listOf(test, ",") + lazy val exprlist = listOf(expr | star_expr, ',') + lazy val testlist = listOf(test, ',') lazy val dictorsetmaker = - ((listOf(test ␣ (":" ␣> test) | "**" ␣> expr, ",") - | (test ␣ (":" ␣> test) | "**" ␣> expr) ␣ comp_for) - | (listOf(test | star_expr, ",") + ((listOf(test ␣ (':' ␣> test) | Punct("**") ␣> expr, ',') + | (test ␣ (':' ␣> test) | Punct("**") ␣> expr) ␣ comp_for) + | (listOf(test | star_expr, ',') | (test | star_expr) ␣ comp_for)) lazy val classdef = - "class" ␣> (id ~ spacedOpt("(" ␣> optArgs <␣ ")")) ␣ (":" ␣> suite) + "class" ␣> (id ~ spacedOpt('(' ␣> optArgs <␣ ')')) ␣ (':' ␣> suite) - lazy val arglist = listOf(argument, ",") + lazy val arglist = listOf(argument, ',') lazy val optArgs = arglist | succeed(Nil) lazy val argument = (test ~ spacedOpt(comp_for) - | test ␣ "=" ␣ test - | "**" ␣ test - | "*" ␣ test) + | test ␣ '=' ␣ test + | Punct("**") ␣ test + | '*' ␣ test) lazy val comp_iter: NT[Any] = comp_for | comp_if lazy val comp_for = diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index 561f85a..d325005 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -20,42 +20,41 @@ class PythonParserTests } describe("implicit line joining") { + given kw: Conversion[String, Lexeme] = KW(_) + given punct: Conversion[Char, Lexeme] = p => Punct(p.toString) - given keyword: Conversion[Symbol, Lexeme] = kw => KW(kw.name) - given punctuation: Conversion[String, Lexeme] = Punct(_) - - val p = many(WS | id | "(" | ")" | "[" | "]") + val p = many(WS | id | '(' | ')' | '[' | ']') val a = Id("A") val BS = Punct("\\") - dyck shouldParse List[Lexeme]("(", "(", ")", ")") - dyck shouldNotParse List[Lexeme]("(", "(", ")") - extDyck shouldParse List("(", a, "(", a, NL, a, ")", a, ")") - extDyck shouldNotParse List(a, "(", a, "(", a, NL, a, ")", a, ")", a) + dyck shouldParse List[Lexeme]('(', '(', ')', ')') + dyck shouldNotParse List[Lexeme]('(', '(', ')') + extDyck shouldParse List('(', a, '(', a, NL, a, ')', a, ')') + extDyck shouldNotParse List(a, '(', a, '(', a, NL, a, ')', a, ')', a) implicitJoin(p) shouldParse List(a, a, a, a, a) implicitJoin(p) shouldNotParse List(a, a, a, NL, a, a) - implicitJoin(p) shouldParse List(a, a, "(", a, NL, a, ")", a) - implicitJoin(p) shouldNotParse List(a, a, "(", a, NL, a, a) - implicitJoin(p) shouldNotParse List(a, a, "(", a, "(", NL, a, ")", a) - implicitJoin(p) shouldParse List(a, a, "(", a, "(", NL, a, ")", ")", a) - implicitJoin(p) shouldParse List(a, a, "(", a, "[", NL, a, "]", ")", a) - implicitJoin(p) shouldNotParse List(a, a, "(", a, "[", NL, a, ")", "]", a) + implicitJoin(p) shouldParse List(a, a, '(', a, NL, a, ')', a) + implicitJoin(p) shouldNotParse List(a, a, '(', a, NL, a, a) + implicitJoin(p) shouldNotParse List(a, a, '(', a, '(', NL, a, ')', a) + implicitJoin(p) shouldParse List(a, a, '(', a, '(', NL, a, ')', ')', a) + implicitJoin(p) shouldParse List(a, a, '(', a, '[', NL, a, ']', ')', a) + implicitJoin(p) shouldNotParse List(a, a, '(', a, '[', NL, a, ')', ']', a) explicitJoin(p) shouldParse List(a, a, a, BS, NL, a, a) explicitJoin(p) shouldParse List(a, a, a, BS, NL, a, a, BS, NL, a, a) - val input = List[Lexeme](a, NL, Comment("Hey!!"), a, BS, NL, a, a, "(", a, - "[", a, BS, NL, a, NL, a, "]", ")", a) + val input = List[Lexeme](a, NL, Comment("Hey!!"), a, BS, NL, a, a, '(', a, + '[', a, BS, NL, a, NL, a, ']', ')', a) - val inputWithoutComments = List[Lexeme](a, NL, a, BS, NL, a, a, "(", a, "[", - a, BS, NL, a, NL, a, "]", ")", a) + val inputWithoutComments = List[Lexeme](a, NL, a, BS, NL, a, a, '(', a, '[', + a, BS, NL, a, NL, a, ']', ')', a) val inputWithoutExplicit = - List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, NL, a, "]", ")", a) + List[Lexeme](a, NL, a, a, a, '(', a, '[', a, a, NL, a, ']', ')', a) val inputResult = - List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, a, "]", ")", a) + List[Lexeme](a, NL, a, a, a, '(', a, '[', a, a, a, ']', ')', a) val collect = consumed(many(any)) @@ -64,14 +63,14 @@ class PythonParserTests (inputWithoutComments, inputWithoutExplicit) implicitJoin(collect) shouldParseWith (inputWithoutExplicit, inputResult) - preprocess(file_input) shouldParse List(a, ";", a, "=", "yield", "from", a, - "=", a, ";", NL, NL, a, ";", a, NL, EOS) + preprocess(file_input) shouldParse List(a, ';', a, '=', "yield", "from", a, + '=', a, ';', NL, NL, a, ';', a, NL, EOS) preprocess(file_input) shouldParse - List(a, "=", a, ">>", a, "*", a, NL, EOS) + List(a, '=', a, Punct(">>"), a, '*', a, NL, EOS) - val sampleProg = List[Lexeme]("def", WS, Id("fun"), "(", WS, a, WS, ")", - ":", NL, WS, WS, a, "+=", WS, a, NL, WS, WS, a, "*=", a, NL, EOS) + val sampleProg = List[Lexeme]("def", WS, Id("fun"), '(', WS, a, WS, ')', + ':', NL, WS, WS, a, Punct("+="), WS, a, NL, WS, WS, a, Punct("*="), a, NL, EOS) parse(stripComments(collect), sampleProg) shouldBe List(sampleProg) parse(explicitJoin(collect), sampleProg) shouldBe List(sampleProg) @@ -79,9 +78,9 @@ class PythonParserTests preprocess(file_input) shouldParse sampleProg - val sampleProg2 = List[Lexeme]("def", WS, Id("fun"), "(", NL, WS, a, WS, NL, - ")", ":", NL, WS, WS, a, "+=", Comment("Test"), BS, NL, WS, a, NL, WS, WS, - a, "*=", a, NL, EOS) + val sampleProg2 = List[Lexeme]("def", WS, Id("fun"), '(', NL, WS, a, WS, NL, + ')', ':', NL, WS, WS, a, Punct("+="), Comment("Test"), BS, NL, WS, a, NL, WS, WS, + a, Punct("*="), a, NL, EOS) parse(preprocess(collect), sampleProg2) shouldBe List(sampleProg) preprocess(file_input) shouldParse sampleProg2 @@ -91,56 +90,59 @@ class PythonParserTests val traceProg = List[Lexeme]( Comment("define the Trace class that will be "), NL, Comment("invoked using decorators"), NL, - "class", WS, Id("Trace"), "(", Id("object"), ")", ":", NL, - WS, WS, WS, WS, "def", WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, - WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, + "class", WS, Id("Trace"), '(', Id("object"), ')', ':', NL, + WS, WS, WS, WS, "def", WS, Id("__init__"), '(', Id("self"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), '.', Id("f"), WS, '=', WS, Id("f"), NL, + WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, "def", WS, Id("__call__"), '(', Id("self"), WS, ',', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("entering function "), WS, '+', WS, Id("self"), '.', Id("f"), '.', Id("__name__"), ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("arg {0}: {1}"), '.', Id("format"), '(', Id("i"), ',', Id("arg"), ')', ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Id("i"), '+', Num("1"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), '.', Id("f"), '(', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', NL, EOS ) // format: on - argument shouldParse List("*", Id("kwargs")) - argument shouldParse List("**", Id("kwargs")) - arglist shouldParse List("**", Id("kwargs2")) - arglist shouldParse List(Id("kwargs"), ",", WS, Id("kwargs")) - arglist shouldParse List("*", Id("kwargs"), ",", "*", Id("kwargs")) - arglist shouldParse List("**", Id("kwargs"), ",", "**", Id("kwargs")) - arglist shouldParse List("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) - arglist shouldParse List("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) - arglist shouldParse List("(", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") + argument shouldParse List('*', Id("kwargs")) + argument shouldParse List(Punct("**"), Id("kwargs")) + arglist shouldParse List(Punct("**"), Id("kwargs2")) + arglist shouldParse List(Id("kwargs"), ',', WS, Id("kwargs")) + arglist shouldParse List('*', Id("kwargs"), ',', '*', Id("kwargs")) + arglist shouldParse List(Punct("**"), Id("kwargs"), ',', Punct("**"), + Id("kwargs")) + arglist shouldParse List('*', Id("kwargs"), ',', WS, '*', Id("kwargs")) + arglist shouldParse List(Punct("**"), Id("kwargs"), ',', WS, Punct("**"), + Id("kwargs")) + arglist shouldParse List('(', Id("args"), ',', WS, Id("kwargs"), ')') + arglist shouldParse List('(', '*', Id("args"), ',', WS, Id("kwargs"), ')') arglist shouldParse - List("(", "*", Id("args"), ",", WS, "*", Id("kwargs"), ")") + List('(', '*', Id("args"), ',', WS, '*', Id("kwargs"), ')') test shouldParse - List(Id("f"), "(", Id("args"), ",", WS, Id("kwargs"), ")") + List(Id("f"), '(', Id("args"), ',', WS, Id("kwargs"), ')') test shouldParse - List(Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") + List(Id("f"), '(', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), + ')') - test shouldParse List(Id("print"), "(", Str("entering function "), WS, "+", - WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") + test shouldParse List(Id("print"), '(', Str("entering function "), WS, '+', + WS, Id("self"), '.', Id("f"), '.', Id("__name__"), ')') // TODO is already ambiguous - // (stmt `parse` List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size shouldBe 1 + // (stmt `parse` List[Lexeme](Id("self"), '.', Id("f"), WS, '=', WS, Id("f"), NL)).size shouldBe 1 // preprocess(file_input) shouldParse traceProg // (stmt `parse` List[Lexeme]( - // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, // WS, WS, Id("print"), NL)).size shouldBe 1 // format: off stmt shouldNotParse List( - "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + "def", WS, Id("__call__"), '(', Id("self"), WS, ',', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', ':', NL, + WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, WS, WS, WS, WS, Id("print"), NL, // this line is indented too far WS, WS, WS, WS, WS, WS, Id("print"), NL ) @@ -151,22 +153,22 @@ class PythonParserTests val traceProg2 = List[Lexeme]( Comment("define the Trace class that will be "), NL, Comment("invoked using decorators"), NL, - "class", WS, Id("Trace"), "(", Id("object"), ")", ":", NL, WS, - WS, WS, WS, "def", WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, + "class", WS, Id("Trace"), '(', Id("object"), ')', ':', NL, WS, + WS, WS, WS, "def", WS, Id("__init__"), '(', Id("self"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), '.', Id("f"), WS, '=', WS, Id("f"), NL, NL, - WS, WS, WS, WS, "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), - ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, + WS, WS, WS, WS, "def", WS, Id("__call__"), '(', Id("self"), WS, ',', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("entering function "), WS, '+', WS, Id("self"), '.', Id("f"), '.', Id("__name__"), + ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ':', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), '(', Str("arg {0}: {1}"), '.', Id("format"), '(', Id("i"), ',', Id("arg"), ')', ')', NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), '=', Id("i"), '+', Num("1"), NL, WS, WS, NL, NL, NL, NL, - WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), '.', Id("f"), '(', '*', Id("args"), ',', WS, Punct("**"), Id("kwargs"), ')', NL, EOS ) // format: off @@ -178,8 +180,8 @@ class PythonParserTests // format: off val dummyin = List( NL, - WS, "def", WS, Id("f"), "(", ")", ":", NL, - WS, WS, "def", WS, Id("f"), "(", ")", ":", NL, + WS, "def", WS, Id("f"), '(', ')', ':', NL, + WS, WS, "def", WS, Id("f"), '(', ')', ':', NL, WS, WS, WS, Id("print"), NL, WS, WS, WS, Id("print"), NL, WS, WS, WS, Id("i"), NL