Browse Source

Working on the compiler

Cesar Rodas 9 months ago
parent
commit
1368191040

+ 24 - 0
Cargo.lock

@@ -598,6 +598,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
 
 [[package]]
+name = "convert_case"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
+
+[[package]]
 name = "core-foundation"
 version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -763,8 +769,10 @@ version = "0.99.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce"
 dependencies = [
+ "convert_case",
  "proc-macro2",
  "quote",
+ "rustc_version",
  "syn 2.0.71",
 ]
 
@@ -2211,6 +2219,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
 
 [[package]]
+name = "rustc_version"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
+dependencies = [
+ "semver",
+]
+
+[[package]]
 name = "rustix"
 version = "0.37.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2309,6 +2326,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "semver"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
+
+[[package]]
 name = "serde"
 version = "1.0.204"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -3163,6 +3186,7 @@ dependencies = [
  "borsh",
  "chrono",
  "cucumber",
+ "derive_more",
  "futures",
  "hmac",
  "num",

+ 7 - 121
utxo/src/filter_expr/program.rs → utxo/src/filter_expr/compiler.rs

@@ -1,29 +1,10 @@
-use std::collections::HashMap;
-
 use super::{
     expr::{Expr, ExprOp},
     opcode::OpCode,
-    parser::parse_query,
-    runtime::execute,
     value::Value,
     Addr, Error, Register,
 };
-use crate::Transaction;
-
-#[derive(Debug, Clone)]
-pub struct Program {
-    /// Debug op-codes, with the unresolved labels.
-    dbg_opcodes: Vec<OpCode>,
-    /// The list of opcodes that make up the program
-    opcodes: Vec<OpCode>,
-    /// If the program has some boilerplate that can be skipped. This is non-zero when the initial
-    /// program has been executed at compile time, and the register has been populated to
-    /// `initial_register`. Everytime a new Runtime is created, instead of executing the program,
-    /// it can safely start-off a different Address, and clone the initial_register.
-    start_at: Addr,
-    /// The state of the register
-    initial_register: Vec<Value>,
-}
+use std::collections::HashMap;
 
 pub struct Compiler<'a> {
     expr: &'a Expr,
@@ -58,7 +39,7 @@ impl<'a> Compiler<'a> {
         let mut return_value = self.next_register();
         Ok(match expr {
             Expr::Variable(name) => (
-                vec![OpCode::LOAD_EXTERNAL(return_value, name.into())],
+                vec![OpCode::LOAD_EXTERNAL(return_value, name.clone())],
                 return_value,
             ),
             Expr::String(string) => (
@@ -89,7 +70,6 @@ impl<'a> Compiler<'a> {
                         return_value = term_return_value;
                     }
                     ExprOp::Eq => {
-                        let cmp = self.next_register();
                         let last_value = self.next_register();
                         let mut iter = compiled_terms.into_iter();
                         let (mut term_opcodes, return_from_expr) =
@@ -100,8 +80,8 @@ impl<'a> Compiler<'a> {
 
                         for (mut term_opcodes, term_return) in iter {
                             opcodes.append(&mut term_opcodes);
-                            opcodes.push(OpCode::EQ(cmp, last_value, term_return));
-                            opcodes.push(OpCode::JNE(cmp, exit_label));
+                            opcodes.push(OpCode::EQ(return_value, last_value, term_return));
+                            opcodes.push(OpCode::JNE(return_value, exit_label));
                             opcodes.push(OpCode::MOV(last_value, term_return));
                         }
 
@@ -147,7 +127,7 @@ impl<'a> Compiler<'a> {
         })
     }
 
-    fn resolve_label_to_addr(opcodes: Vec<OpCode>) -> Result<Vec<OpCode>, Error> {
+    pub fn resolve_label_to_addr(opcodes: Vec<OpCode>) -> Result<Vec<OpCode>, Error> {
         let mut pos = 0;
         let used_labels = opcodes
             .iter()
@@ -168,6 +148,7 @@ impl<'a> Compiler<'a> {
             .into_iter()
             .filter(|opcode| !matches!(opcode, OpCode::LABEL(_)))
             .map(|opcode| {
+                // Rewrite JMP to not use labels but instead addresses
                 Ok(match opcode {
                     OpCode::JMP(label) => {
                         OpCode::JMP(*used_labels.get(&label).ok_or(Error::UnknownLabel(*label))?)
@@ -180,7 +161,7 @@ impl<'a> Compiler<'a> {
                         register,
                         *used_labels.get(&label).ok_or(Error::UnknownLabel(*label))?,
                     ),
-                    x => x,
+                    opcode => opcode,
                 })
             })
             .collect()
@@ -194,98 +175,3 @@ impl<'a> Compiler<'a> {
         Ok(opcodes)
     }
 }
-
-impl Program {
-    pub fn new(code: &str) -> Result<Self, Error> {
-        let ast = parse_query(code)?;
-
-        let opcodes = ast.where_clause.map_or_else(
-            || {
-                Ok(vec![
-                    OpCode::LOAD(0.into(), true.into()),
-                    OpCode::HLT(0.into()),
-                ])
-            },
-            |expr| Compiler::new(&expr).compile(),
-        )?;
-
-        Ok(Self {
-            dbg_opcodes: opcodes.clone(),
-            opcodes: Compiler::resolve_label_to_addr(opcodes)?,
-            start_at: 0.into(),
-            initial_register: vec![],
-        })
-    }
-
-    /// Returns a human readable version of the compiled program (generated op-codes)
-    pub fn debug(&self) -> String {
-        self.dbg_opcodes
-            .iter()
-            .map(|x| match x {
-                OpCode::HLT(_) | OpCode::LABEL(_) => x.to_string(),
-                x => format!("\t{}", x.to_string()),
-            })
-            .collect::<Vec<_>>()
-            .join("\n")
-    }
-
-    pub fn dump(&self) -> String {
-        self.opcodes
-            .iter()
-            .enumerate()
-            .map(|(pos, opcode)| format!("{}: {}", pos, opcode.to_string()))
-            .collect::<Vec<_>>()
-            .join("\n")
-    }
-
-    /// Returns the opcodes of the program expression and the usize of the register where the result
-    /// Creates a program tailored for a transaction
-    pub fn execute(&self, transaction: Option<&Transaction>) -> Result<Value, Error> {
-        execute(
-            transaction,
-            &self.opcodes,
-            self.initial_register.clone(),
-            self.start_at,
-        )
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::Program;
-    use crate::filter_expr::{opcode::OpCode, value::Value};
-
-    #[test]
-    fn parse() {
-        let x = Program::new(
-            r#"
-            WHERE
-                $foo = 3
-                AND $bar = "bar"
-                AND ($a = $b OR $b = $c)
-                AND $foo = 1 + 2 + 3 +  ((1+ 2-3*4-5) * $bar.tx.lol)
-            ORDER BY $bar DESC
-        "#,
-        )
-        .unwrap();
-        panic!("{}\n\n{}", x.debug(), x.dump());
-    }
-
-    #[test]
-    fn simple_program() {
-        let program = Program {
-            dbg_opcodes: vec![],
-            opcodes: vec![
-                OpCode::LOAD(1.into(), 12.into()),
-                OpCode::LOAD(2.into(), 13.into()),
-                OpCode::ADD(3.into(), 1.into(), 2.into()),
-                OpCode::ADD(4.into(), 0.into(), 3.into()),
-                OpCode::HLT(4.into()),
-            ],
-            start_at: 0.into(),
-            initial_register: vec![15.into()],
-        };
-        let x = program.execute(None).expect("valid execution");
-        assert_eq!(x, 40.into());
-    }
-}

+ 109 - 0
utxo/src/filter_expr/expr.rs

@@ -0,0 +1,109 @@
+use super::Error;
+use std::{ops::Deref, str::FromStr};
+
+#[derive(Clone, PartialEq, Eq, Debug, Copy)]
+/// Expression operators
+///
+/// The operators are used to compare values in the expression language.
+pub enum ExprOp {
+    And,
+    Or,
+    Eq,
+    NotEq,
+    Gt,
+    Gte,
+    Not,
+    Lt,
+    Lte,
+    Add,
+    Sub,
+    Mul,
+    Div,
+}
+
+impl From<&str> for ExprOp {
+    fn from(s: &str) -> Self {
+        match s.to_lowercase().as_str() {
+            "+" => ExprOp::Add,
+            "-" => ExprOp::Sub,
+            "*" => ExprOp::Mul,
+            "!" => ExprOp::Not,
+            "/" => ExprOp::Div,
+            "&&" | "and" => ExprOp::And,
+            "||" | "or" => ExprOp::Or,
+            "==" | "=" => ExprOp::Eq,
+            "!=" => ExprOp::NotEq,
+            ">" => ExprOp::Gt,
+            ">=" => ExprOp::Gte,
+            "<" => ExprOp::Lt,
+            "<=" => ExprOp::Lte,
+            x => panic!("{} is not a valid Operation", x),
+        }
+    }
+}
+
+#[derive(Clone, PartialEq, Eq, Debug, Copy)]
+/// Order
+///
+/// The order is used to sort the results of a query.
+pub enum Order {
+    Ascending,
+    Descending,
+}
+
+impl FromStr for Order {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.to_lowercase().as_str() {
+            "asc" => Ok(Order::Ascending),
+            "desc" => Ok(Order::Descending),
+            s => Err(Error::InvalidOrder(s.to_owned())),
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+/// Expression
+///
+/// This is the main expression type that is used to represent the AST of the expression.
+///
+/// It can be a simple variable, a string, a number, an identifier, a unary operation or a binary
+/// operation.
+pub enum Expr {
+    Op { op: ExprOp, terms: Vec<Box<Expr>> },
+    Variable(Variable),
+    String(String),
+    Number(i128),
+}
+
+#[derive(Clone, Debug, PartialEq, Hash, PartialOrd, Eq)]
+/// Variable
+///
+/// A variable is a list of strings that represent a path to a value in the data.
+pub struct Variable(Vec<String>);
+
+impl Deref for Variable {
+    type Target = [String];
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl From<Vec<String>> for Variable {
+    fn from(v: Vec<String>) -> Self {
+        Variable(v)
+    }
+}
+
+impl From<&str> for Variable {
+    fn from(s: &str) -> Self {
+        Variable(vec![s.to_owned()])
+    }
+}
+
+impl Variable {
+    pub fn add_part(&mut self, part: String) {
+        self.0.push(part);
+    }
+}

+ 9 - 10
utxo/src/filter_expr/mod.rs

@@ -1,14 +1,19 @@
 //! Expression module
 
 #![allow(warnings)]
+use parser::Rule;
+use std::num::ParseIntError;
 
+mod compiler;
 mod expr;
+mod filter;
 mod opcode;
 mod parser;
-mod program;
 mod runtime;
 mod value;
 
+pub use self::{filter::Filter, value::Value};
+
 #[derive(
     Debug,
     Clone,
@@ -51,6 +56,9 @@ impl Addr {
 
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
+    #[error("Variable not found: {0}")]
+    VariableNotFound(String),
+
     #[error("Program is out of bound")]
     OutOfBoundaries,
 
@@ -87,12 +95,3 @@ pub enum Error {
     #[error("Unexpected expression state")]
     UnexpectedExprState,
 }
-
-use std::{
-    num::ParseIntError,
-    ops::{Deref, DerefMut},
-};
-
-use parser::Rule;
-
-pub use self::program::Program;

+ 2 - 2
utxo/src/filter_expr/opcode.rs

@@ -1,4 +1,4 @@
-use super::{value::Value, Addr, Register};
+use super::{expr::Variable, value::Value, Addr, Register};
 
 #[derive(Clone, Debug, PartialEq)]
 /// OpCode for a register based virtual machine
@@ -15,7 +15,7 @@ pub enum OpCode {
     /// LOAD <destination> <value>
     /// Load the <value> variable name from an external source which is unknown at compile time.
     /// The value is stored in the given register
-    LOAD_EXTERNAL(Register, Value),
+    LOAD_EXTERNAL(Register, Variable),
 
     /// CPY <destination> <source>
     /// Copy the value from the source register to the destination register.

+ 1 - 1
utxo/src/filter_expr/parser.rs

@@ -10,7 +10,7 @@ use std::str::FromStr;
 #[grammar = "src/filter_expr/expr.pest"] // relative path to your .pest file
 struct QueryParser;
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Query {
     pub where_clause: Option<Expr>,
     pub limit_clause: Option<i128>,

+ 33 - 57
utxo/src/filter_expr/runtime.rs

@@ -1,4 +1,9 @@
-use super::{opcode::OpCode, value::Value, Addr, Error, Register};
+use super::{
+    expr::Variable,
+    opcode::OpCode,
+    value::{Value, ValueOrRef},
+    Addr, Error, Register,
+};
 use crate::Transaction;
 use num::CheckedAdd;
 use std::{
@@ -6,57 +11,6 @@ use std::{
     ops::Deref,
 };
 
-#[derive(Debug, PartialEq, PartialOrd)]
-/// Value or reference to a value.
-///
-/// A reference to a value is being used to avoid cloning from the source code to the registers,
-/// instead to just use a readonly reference
-enum ValueOrRef<'a> {
-    Ref(&'a Value),
-    Value(Value),
-}
-
-impl<'a> Clone for ValueOrRef<'a> {
-    fn clone(&self) -> Self {
-        match self {
-            ValueOrRef::Ref(value) => ValueOrRef::Value((*value).clone()),
-            ValueOrRef::Value(value) => ValueOrRef::Value(value.clone()),
-        }
-    }
-}
-
-impl<'a> Into<Value> for ValueOrRef<'a> {
-    fn into(self) -> Value {
-        match self {
-            ValueOrRef::Ref(value) => value.clone(),
-            ValueOrRef::Value(value) => value,
-        }
-    }
-}
-
-impl Deref for ValueOrRef<'_> {
-    type Target = Value;
-
-    fn deref(&self) -> &Self::Target {
-        match self {
-            ValueOrRef::Ref(value) => *value,
-            ValueOrRef::Value(value) => &value,
-        }
-    }
-}
-
-impl<'a> From<&'a Value> for ValueOrRef<'a> {
-    fn from(value: &'a Value) -> Self {
-        ValueOrRef::Ref(value)
-    }
-}
-
-impl<'a> From<Value> for ValueOrRef<'a> {
-    fn from(value: Value) -> Self {
-        ValueOrRef::Value(value)
-    }
-}
-
 macro_rules! get {
     ($r:expr,$pos:expr) => {
         ($r.get($pos)
@@ -82,9 +36,9 @@ macro_rules! set {
 }
 
 #[inline]
-pub fn execute(
-    transaction: Option<&Transaction>,
-    code: &[OpCode],
+pub fn execute<'a>(
+    external_variables: &'a HashMap<Variable, ValueOrRef<'a>>,
+    code: &'a [OpCode],
     initial_registers: Vec<Value>,
     start_at: Addr,
 ) -> Result<Value, Error> {
@@ -97,6 +51,13 @@ pub fn execute(
 
     loop {
         match code.get(*execution).ok_or(Error::OutOfBoundaries)? {
+            OpCode::LOAD_EXTERNAL(dst, name) => {
+                let value = external_variables
+                    .get(name)
+                    .ok_or(Error::VariableNotFound((*name).join(".")))?;
+
+                set!(registers, dst, value.clone());
+            }
             OpCode::LOAD(dst, ref val) => set!(registers, dst, val.into()),
             OpCode::CPY(dst, reg2) => {
                 let value = get!(registers, reg2).clone();
@@ -120,6 +81,21 @@ pub fn execute(
                 .into();
                 set!(registers, dst, new_value);
             }
+            OpCode::EQ(dst, reg1, reg2) => {
+                let new_value = Value::Bool(get!(registers, reg1) == get!(registers, reg2)).into();
+                set!(registers, dst, new_value);
+            }
+            OpCode::MUL(dst, reg1, reg2) => {
+                let num1 = get!(registers, reg1).as_number()?;
+                let num2 = get!(registers, reg2).as_number()?;
+                set!(
+                    registers,
+                    dst,
+                    num1.checked_mul(num2)
+                        .map(|number| Value::Number(number).into())
+                        .ok_or(Error::Overflow)?
+                );
+            }
             OpCode::OR(dst1, reg2, reg3) => {
                 todo!()
             }
@@ -150,9 +126,9 @@ pub fn execute(
                 return registers
                     .remove(return_register)
                     .map(|x| x.into())
-                    .ok_or(Error::EmptyRegisters)
+                    .ok_or(Error::EmptyRegisters);
             }
-            _ => todo!(),
+            x => println!("{:?} is not implemented", x),
         }
 
         execution.next();

+ 77 - 7
utxo/src/filter_expr/value.rs

@@ -1,37 +1,56 @@
 use crate::{
-    filter_expr::Error, payment::PaymentTo, AccountId, Amount, Asset, PaymentFrom, RevId, Status,
-    Tag, TxId, Type,
+    filter_expr::{
+        expr::{Expr, Variable},
+        Error,
+    },
+    payment::PaymentTo,
+    AccountId, Amount, Asset, PaymentFrom, RevId, Status, Tag, TxId, Type,
 };
 use chrono::{DateTime, Utc};
 use num::CheckedAdd;
-use std::ops::Add;
-
-use super::expr::{Expr, Variable};
+use std::ops::{Add, Deref};
 
 #[derive(Clone, Debug, PartialEq, PartialOrd)]
 pub enum Value {
+    /// Not a value
     Nil,
+    /// Amount
     Amount(Amount),
+    /// Asset
     Asset(Asset),
+    /// Account Type
     AccountId(AccountId),
+    /// Transaction ID
     TxId(TxId),
+    /// Type
     Type(Type),
+    /// Revision Id
     RevId(RevId),
+    /// Status
     Status(Status),
+    /// Tag
     Tag(Tag),
+    /// Datetime
     Date(DateTime<Utc>),
+    /// String
     String(String),
+    /// Integer number
     Number(i128),
+    /// PaymentFrom
     From(Vec<PaymentFrom>),
+    /// Payment Recipient
     To(Vec<PaymentTo>),
+    /// Tags
     Tags(Vec<Tag>),
+    /// Bool values
     Bool(bool),
-    Variable(Vec<String>),
+    /// A dynamic variable name
+    Variable(Variable),
 }
 
 impl From<&Variable> for Value {
     fn from(value: &Variable) -> Self {
-        Self::Variable(value.to_vec())
+        Self::Variable(value.clone())
     }
 }
 
@@ -88,3 +107,54 @@ impl CheckedAdd for Value {
             .map(Value::Number)
     }
 }
+
+#[derive(Debug, PartialEq, PartialOrd)]
+/// Value or reference to a value.
+///
+/// A reference to a value is being used to avoid cloning from the source code to the registers,
+/// instead to just use a readonly reference
+pub enum ValueOrRef<'a> {
+    Ref(&'a Value),
+    Value(Value),
+}
+
+impl<'a> Clone for ValueOrRef<'a> {
+    fn clone(&self) -> Self {
+        match self {
+            ValueOrRef::Ref(value) => ValueOrRef::Value((*value).clone()),
+            ValueOrRef::Value(value) => ValueOrRef::Value(value.clone()),
+        }
+    }
+}
+
+impl<'a> Into<Value> for ValueOrRef<'a> {
+    fn into(self) -> Value {
+        match self {
+            ValueOrRef::Ref(value) => value.clone(),
+            ValueOrRef::Value(value) => value,
+        }
+    }
+}
+
+impl Deref for ValueOrRef<'_> {
+    type Target = Value;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            ValueOrRef::Ref(value) => *value,
+            ValueOrRef::Value(value) => &value,
+        }
+    }
+}
+
+impl<'a> From<&'a Value> for ValueOrRef<'a> {
+    fn from(value: &'a Value) -> Self {
+        ValueOrRef::Ref(value)
+    }
+}
+
+impl<'a> From<Value> for ValueOrRef<'a> {
+    fn from(value: Value) -> Self {
+        ValueOrRef::Value(value)
+    }
+}