-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce an abstraction for handling stateful traversal of the tree (#…
…1018) While still being able to accumulate errors, produce partial results, etc. The base principle is to build a *representation* of the computation (parsing, visiting, optimizing, generating, etc) that will eventually get `run`. This representation keeps track of a state (of arbitrary type if we wanted to, but we practically stick to `RemorphContext` for now) along with the result it is producing. This way, we can interleave any step of the computation with updates to this state, like counting the statements processed thus far (for any definition of what a statement is, including arbitrarily nested subqueries), the branch of the IR tree currently being processed, etc. Similarly, at any point of the computation, we can inspect the current state, to guide said computation (by customizing error messages with contextual information for example).
- Loading branch information
Showing
31 changed files
with
797 additions
and
741 deletions.
There are no files selected for viewing
23 changes: 23 additions & 0 deletions
23
core/src/main/scala/com/databricks/labs/remorph/Phase.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package com.databricks.labs.remorph | ||
|
||
import com.databricks.labs.remorph.intermediate.{LogicalPlan, TreeNode} | ||
import org.antlr.v4.runtime.ParserRuleContext | ||
|
||
sealed trait Phase | ||
|
||
case object Init extends Phase | ||
|
||
case class SourceCode(source: String, filename: String = "-- test source --") extends Phase | ||
|
||
case class Parsed(tree: ParserRuleContext, sources: Option[SourceCode] = None) extends Phase | ||
|
||
case class Ast(unoptimizedPlan: LogicalPlan, parsed: Option[Parsed] = None) extends Phase | ||
|
||
case class Optimized(optimizedPlan: TreeNode[_], ast: Option[Ast] = None) extends Phase | ||
|
||
case class Generating( | ||
currentNode: TreeNode[_], | ||
totalStatements: Int = 0, | ||
transpiledStatements: Int = 0, | ||
optimized: Option[Optimized] = None) | ||
extends Phase |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 19 additions & 5 deletions
24
core/src/main/scala/com/databricks/labs/remorph/generators/Generator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,24 @@ | ||
package com.databricks.labs.remorph.generators | ||
|
||
import com.databricks.labs.remorph.{KoResult, Result, WorkflowStage} | ||
import com.databricks.labs.remorph.{Phase, Transformation, TransformationConstructors, WorkflowStage} | ||
import com.databricks.labs.remorph.intermediate.{TreeNode, UnexpectedNode} | ||
|
||
trait Generator[In <: TreeNode[In], Out] { | ||
def generate(ctx: GeneratorContext, tree: In): Result[Out] | ||
def unknown(tree: In): Result[Out] = | ||
KoResult(WorkflowStage.GENERATE, UnexpectedNode(tree.getClass.getSimpleName)) | ||
trait Generator[In <: TreeNode[In], Out] extends TransformationConstructors[Phase] { | ||
def generate(ctx: GeneratorContext, tree: In): Transformation[Phase, Out] | ||
def unknown(tree: In): Transformation[Phase, Nothing] = | ||
ko(WorkflowStage.GENERATE, UnexpectedNode(tree.getClass.getSimpleName)) | ||
} | ||
|
||
trait CodeGenerator[In <: TreeNode[In]] extends Generator[In, String] { | ||
|
||
private def generateAndJoin( | ||
ctx: GeneratorContext, | ||
trees: Seq[In], | ||
separator: String): Transformation[Phase, String] = { | ||
trees.map(generate(ctx, _)).sequence.map(_.mkString(separator)) | ||
} | ||
|
||
def commas(ctx: GeneratorContext, trees: Seq[In]): Transformation[Phase, String] = generateAndJoin(ctx, trees, ", ") | ||
def spaces(ctx: GeneratorContext, trees: Seq[In]): Transformation[Phase, String] = generateAndJoin(ctx, trees, " ") | ||
|
||
} |
68 changes: 68 additions & 0 deletions
68
core/src/main/scala/com/databricks/labs/remorph/generators/package.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package com.databricks.labs.remorph | ||
|
||
import com.databricks.labs.remorph.intermediate.UncaughtException | ||
|
||
import scala.util.control.NonFatal | ||
|
||
package object generators { | ||
|
||
implicit class TBAInterpolator(sc: StringContext) extends TransformationConstructors[Phase] { | ||
def code(args: Any*): Transformation[Phase, String] = { | ||
|
||
args | ||
.map { | ||
case tba: Transformation[_, _] => tba.asInstanceOf[Transformation[Phase, String]] | ||
case x => ok(x.toString) | ||
} | ||
.sequence | ||
.map { a => | ||
val stringParts = sc.parts.iterator | ||
val arguments = a.iterator | ||
val sb = new StringBuilder(StringContext.treatEscapes(stringParts.next())) | ||
while (arguments.hasNext) { | ||
try { | ||
sb.append(StringContext.treatEscapes(arguments.next())) | ||
sb.append(StringContext.treatEscapes(stringParts.next())) | ||
} catch { | ||
case NonFatal(e) => | ||
return lift(KoResult(WorkflowStage.GENERATE, UncaughtException(e))) | ||
} | ||
} | ||
sb.toString() | ||
|
||
} | ||
} | ||
} | ||
|
||
implicit class TBAOps(sql: Transformation[Phase, String]) { | ||
def nonEmpty: Transformation[Phase, Boolean] = sql.map(_.nonEmpty) | ||
def isEmpty: Transformation[Phase, Boolean] = sql.map(_.isEmpty) | ||
} | ||
|
||
implicit class TBASeqOps(tbas: Seq[Transformation[Phase, String]]) extends TransformationConstructors[Phase] { | ||
|
||
def mkCode: Transformation[Phase, String] = mkCode("", "", "") | ||
|
||
def mkCode(sep: String): Transformation[Phase, String] = mkCode("", sep, "") | ||
|
||
def mkCode(start: String, sep: String, end: String): Transformation[Phase, String] = { | ||
tbas.sequence.map(_.mkString(start, sep, end)) | ||
} | ||
|
||
/** | ||
* Combine multiple Transformation[RemorphContext, String] into a Transformation[ RemorphContext, Seq[String] ]. | ||
* The resulting Transformation will run each individual Transformation in sequence, accumulating all the effects | ||
* along the way. | ||
* | ||
* For example, when a Transformation in the input Seq modifies the state, TBAs that come after it in the input | ||
* Seq will see the modified state. | ||
*/ | ||
def sequence: Transformation[Phase, Seq[String]] = | ||
tbas.foldLeft(ok(Seq.empty[String])) { case (agg, item) => | ||
for { | ||
aggSeq <- agg | ||
i <- item | ||
} yield aggSeq :+ i | ||
} | ||
} | ||
} |
9 changes: 4 additions & 5 deletions
9
core/src/main/scala/com/databricks/labs/remorph/generators/py/BasePythonGenerator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,13 @@ | ||
package com.databricks.labs.remorph.generators.py | ||
|
||
import com.databricks.labs.remorph.PartialResult | ||
import com.databricks.labs.remorph.generators.{Generator, GeneratorContext} | ||
import com.databricks.labs.remorph.generators._ | ||
import com.databricks.labs.remorph.intermediate.{RemorphError, TreeNode, UnexpectedNode} | ||
|
||
abstract class BasePythonGenerator[In <: TreeNode[In]] extends Generator[In, String] { | ||
def commas(ctx: GeneratorContext, nodes: Seq[In]): Python = nodes.map(generate(ctx, _)).mkPython(", ") | ||
abstract class BasePythonGenerator[In <: TreeNode[In]] extends CodeGenerator[In] { | ||
|
||
def partialResult(tree: In): Python = partialResult(tree, UnexpectedNode(tree.toString)) | ||
def partialResult(trees: Seq[Any], err: RemorphError): Python = | ||
PartialResult(s"# FIXME: ${trees.mkString(" | ")} !!!", err) | ||
def partialResult(tree: Any, err: RemorphError): Python = PartialResult(s"# FIXME: $tree !!!", err) | ||
lift(PartialResult(s"# FIXME: ${trees.mkString(" | ")} !!!", err)) | ||
def partialResult(tree: Any, err: RemorphError): Python = lift(PartialResult(s"# FIXME: $tree !!!", err)) | ||
} |
Oops, something went wrong.