Эх сурвалжийг харах

Added ability to rearrange expression terms

There are some expressions that can be rearranged for an optimal execution. Not
all expressions can be rearranged, but only Add, Or, And, Eq and NotEq.
Cesar Rodas 9 сар өмнө
parent
commit
75a4bff631

+ 4 - 6
utxo/src/filter_expr/compiler/optimizations/calculate_static_values.rs

@@ -8,19 +8,16 @@ pub fn calculate_static_values(mut opcodes: Vec<OpCode>) -> (Vec<OpCode>, bool)
     let mut register = HashMap::new();
     let mut has_changed = false;
 
+    println!("Calculating static values");
     opcodes.iter_mut().for_each(|opcode| {
         match &opcode {
             OpCode::LOAD(dst, value) => {
                 register.insert(*dst, value.clone());
             }
-            OpCode::JMP(_)
-            | OpCode::HLT(_)
-            | OpCode::JEQ(_, _)
-            | OpCode::JNE(_, _)
-            | OpCode::LABEL(_) => {}
+            OpCode::JMP(_) | OpCode::HLT(_) | OpCode::LABEL(_) => {}
             OpCode::MOV(dst, src) => {
                 if let Some(value) = register.remove(src) {
-                    register.insert(*dst, value);
+                    register.insert(*dst, value.clone());
                 }
             }
             OpCode::LOAD_EXTERNAL(dst, _)
@@ -43,6 +40,7 @@ pub fn calculate_static_values(mut opcodes: Vec<OpCode>) -> (Vec<OpCode>, bool)
             OpCode::JEQ(reg, addr) => {
                 if let Some(Value::Bool(true)) = register.get(reg) {
                     *opcode = OpCode::JMP(*addr);
+                    has_changed = true;
                 }
             }
             OpCode::JNE(reg, addr) => {

+ 58 - 0
utxo/src/filter_expr/expr.rs

@@ -1,3 +1,5 @@
+use crate::id;
+
 use super::Error;
 use std::{ops::Deref, str::FromStr};
 
@@ -78,6 +80,62 @@ pub enum Expr {
     Number(i128),
 }
 
+impl Expr {
+    /// Takes the expression, traverses it and rearranges it to a more optimal form.
+    ///
+    /// The main goal is to execute the cheaper terms first leaving the most expensive to the last.
+    pub fn rearrange_expression(self) -> Self {
+        let can_be_rearranged = self.can_be_rearranged();
+        match self {
+            Expr::Op { op, terms } => {
+                let mut terms = terms.into_iter().map(|x| (x.cost(), x)).collect::<Vec<_>>();
+                if can_be_rearranged {
+                    terms.sort_by(|a, b| a.0.cmp(&b.0));
+                }
+
+                Expr::Op {
+                    op,
+                    terms: terms
+                        .into_iter()
+                        .map(|x| x.1.rearrange_expression().into())
+                        .collect::<Vec<_>>(),
+                }
+            }
+            x => x,
+        }
+    }
+
+    /// Checks if the expression can be rearranged.
+    pub fn can_be_rearranged(&self) -> bool {
+        match self {
+            Expr::Op { op, terms } => {
+                matches!(
+                    op,
+                    ExprOp::Add | ExprOp::Or | ExprOp::And | ExprOp::Eq | ExprOp::NotEq
+                )
+            }
+            _ => false,
+        }
+    }
+
+    /// Cost associated with the expression.
+    pub fn cost(&self) -> usize {
+        match self {
+            Expr::Op { op, terms } => {
+                let mut cost = 0;
+                for term in terms {
+                    cost += term.cost();
+                }
+                cost
+            }
+            Expr::Variable(_) => 1_000,
+            Expr::String(_) => 1,
+            Expr::Bool(_) => 1,
+            Expr::Number(_) => 1,
+        }
+    }
+}
+
 #[derive(Clone, Debug, PartialEq, Hash, PartialOrd, Eq)]
 /// Variable
 ///

+ 2 - 2
utxo/src/filter_expr/filter.rs

@@ -23,7 +23,7 @@ pub struct Filter<'a> {
     /// The list of opcodes that make up the program, the labels has been converted into addresses
     opcodes_to_execute: Vec<OpCode>,
     /// The state of the register
-    initial_register: HashMap<Register, ValueOrRef<'a>>,
+    pub(crate) initial_register: HashMap<Register, ValueOrRef<'a>>,
     _phantom: std::marker::PhantomData<&'a ()>,
 }
 
@@ -82,7 +82,7 @@ impl<'a> Filter<'a> {
                     }
                     _ => Some(opcode.clone()),
                 })
-                .collect(),
+                .collect::<Vec<_>>(),
         )
         .unwrap();
         self

+ 2 - 1
utxo/src/filter_expr/parser.rs

@@ -156,7 +156,8 @@ pub fn parse_query(query: &str) -> Result<Query, Error> {
                     pair.into_inner()
                         .next()
                         .map(parse_expr)
-                        .ok_or(Error::MissingNextRule)??,
+                        .ok_or(Error::MissingNextRule)??
+                        .rearrange_expression(),
                 );
             }
             Rule::limit_clause => {

+ 11 - 0
utxo/src/filter_expr/tests/always_true.expr

@@ -0,0 +1,11 @@
+/// Test that the optimizer can rearrange the expression to be less expensive, which is always true
+/// because the cheaper expression is always true (1 = 1) which is evaluated at compile time, since
+/// the expression is an OR, the whole expression is true and it makes no further sense to continue
+/// evaluating
+WHERE
+    (
+        $foo = 3 + 2 * 4 / 2 * 298210 + $bar
+        AND 25 = 5*$five
+    )
+    OR false
+    OR  1 = 1

+ 46 - 17
utxo/src/filter_expr/tests/mod.rs

@@ -1,19 +1,26 @@
-use crate::filter_expr::value::ValueOrRef;
-
 use super::{expr::Variable, Error, Filter, Value};
+use crate::filter_expr::value::ValueOrRef;
 use std::collections::HashMap;
 
+fn external_variables<K: Into<Variable>, V: Into<Value>>(
+    external_variables: Vec<(K, V)>,
+) -> HashMap<Variable, ValueOrRef<'static>> {
+    external_variables
+        .into_iter()
+        .map(|(k, v)| (k.into(), ValueOrRef::Value(v.into())))
+        .collect()
+}
+
 fn testsuite<K: Into<Variable>, V: Into<Value>, R: Into<Value>>(
     code: &str,
-    external_variables: Vec<(K, V)>,
+    variables: Vec<(K, V)>,
     ret: Result<R, Error>,
-) {
+) -> Filter {
     let filter = Filter::new(code).expect("valid filter");
-    let external_variables = external_variables
-        .into_iter()
-        .map(|(k, v)| (k.into(), ValueOrRef::Value(v.into())))
-        .collect();
-    assert_eq!(filter.execute(&external_variables), ret.map(|x| x.into()));
+    let variables = external_variables(variables);
+    println!("{}\n\n{}", filter.debug(), filter.dump());
+    assert_eq!(filter.execute(&variables), ret.map(|x| x.into()));
+    filter
 }
 
 #[test]
@@ -32,19 +39,41 @@ fn test_1() {
 
 #[test]
 fn nested() {
-    testsuite(
+    let vm = testsuite(
         include_str!("nested.expr"),
         vec![("foo", 0), ("bar", 0), ("five", 5)],
         Ok(false),
     );
-    testsuite(
-        include_str!("nested.expr"),
-        vec![("foo", 1192844), ("bar", 1), ("five", 4)],
-        Ok(false),
+
+    assert_eq!(
+        vm.execute(&external_variables(vec![
+            ("foo", 1192844),
+            ("bar", 1),
+            ("five", 4),
+        ])),
+        Ok(false.into())
     );
-    testsuite(
+
+    assert_eq!(
+        vm.execute(&external_variables(vec![
+            ("foo", 1192844),
+            ("bar", 1),
+            ("five", 5),
+        ])),
+        Ok(true.into())
+    );
+}
+
+#[test]
+fn missing_external_variable() {
+    testsuite::<_, _, bool>(
         include_str!("nested.expr"),
-        vec![("foo", 1192844), ("bar", 1), ("five", 5)],
-        Ok(true),
+        vec![("foo", 1192844), ("bar", 1)],
+        Err(Error::VariableNotFound("five".to_owned())),
     );
 }
+
+#[test]
+fn rearrange_expr_from_less_expensive() {
+    testsuite::<&str, i128, _>(include_str!("always_true.expr"), vec![], Ok(true));
+}

+ 1 - 1
utxo/src/filter_expr/tests/nested.expr

@@ -1,3 +1,3 @@
-   WHERE
+WHERE
     $foo = 3 + 2 * 4 / 2 * 298210 + $bar
     AND 25 = 5*$five