diff options
Diffstat (limited to 'src/analysis/scan/grammar.y')
-rw-r--r-- | src/analysis/scan/grammar.y | 394 |
1 files changed, 267 insertions, 127 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y index 525c5d1..19a4257 100644 --- a/src/analysis/scan/grammar.y +++ b/src/analysis/scan/grammar.y @@ -18,12 +18,15 @@ typedef void *yyscan_t; #include "scanner.h" #include "exprs/access.h" -#include "exprs/arithmop.h" -#include "exprs/boolop.h" +#include "exprs/arithmetic.h" #include "exprs/call.h" +#include "exprs/counter.h" +#include "exprs/intersect.h" #include "exprs/literal.h" +#include "exprs/logical.h" +#include "exprs/set.h" +#include "exprs/relational.h" #include "exprs/strop.h" -#include "exprs/relop.h" #include "patterns/tokens/plain.h" @@ -48,13 +51,12 @@ typedef void *yyscan_t; unsigned long long unsigned_integer; /* Valeur entière #1 */ signed long long signed_integer; /* Valeur entière #2 */ - double floating_number; /* Valeur à virgule flottante */ + //double floating_number; /* Valeur à virgule flottante */ sized_string_t sized_cstring; /* Chaîne de caractères */ - char byte; /* Octet unique */ + //char byte; /* Octet unique */ - unsigned long long integer; /* Valeur entière */ GScanRule *rule; /* Nouvelle règle à intégrer */ @@ -89,11 +91,17 @@ YY_DECL; } +%token INCLUDE "include" + %token RAW_RULE %token RULE_NAME %token STRINGS CONDITION %token IDENTIFIER +%token BYTES_ID +%token BYTES_ID_COUNTER +%token BYTES_ID_LOCATION +%token BYTES_ID_LENGTH %token NAME %token BRACE_IN BRACE_OUT ASSIGN COLON @@ -104,7 +112,8 @@ YY_DECL; %token TRUE_ "true" %token FALSE_ "false" -%token INTEGER +%token SIGNED_INTEGER +%token UNSIGNED_INTEGER %token STRING %token KB MB GB @@ -132,7 +141,7 @@ YY_DECL; %token PLUS "+" %token MINUS "-" %token MUL "*" -%token DIV "\\" +%token DIV "/" %token MOD "%" %token PAREN_O "(" @@ -145,16 +154,18 @@ YY_DECL; %token ALL "all" %token OF "of" %token THEM "them" +%token IN "in" %type <sized_cstring> RULE_NAME -%type <sized_cstring> IDENTIFIER +%type <sized_cstring> IDENTIFIER BYTES_ID_COUNTER %type <sized_cstring> NAME -%type <integer> INTEGER +%type <signed_integer> SIGNED_INTEGER +%type <unsigned_integer> UNSIGNED_INTEGER %type <sized_cstring> STRING %type <rule> rule @@ -162,16 +173,19 @@ YY_DECL; %type <sized_cstring> PLAIN_STRING %type <pattern> MASKED_STRING -%type <expr> cexpression +%type <expr> cexpression _cexpression +%type <expr> pattern_match %type <expr> literal %type <expr> item_chain %type <args_list> call_args -%type <expr> bool_expr -%type <expr> rel_expr -%type <expr> str_expr +%type <expr> logical_expr +%type <expr> relational_expr +%type <expr> string_op %type <expr> arithm_expr %type <expr> set_counter - +%type <expr> set +%type <expr> set_items +%type <expr> intersection %left OR @@ -181,11 +195,11 @@ YY_DECL; %left LT LE GT GE %left PLUS MINUS %left MUL DIV MOD +%left IN %right NOT - %destructor { printf("-------- Discarding symbol %p.\n", $$); } <rule> @@ -207,12 +221,24 @@ YY_DECL; rules : /* empty */ + | external rules | rule rules { g_content_scanner_add_rule(scanner, $1); } //rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); } + +external : "include" STRING + { + bool __status; + __status = g_content_scanner_include_resource(scanner, $2.data); + if (!__status) + YYERROR; + } + + rule : RAW_RULE RULE_NAME { + //printf("--------built rule '%s'\n", $2.data); *built_rule = g_scan_rule_new($2.data); $<rule>$ = *built_rule; } @@ -220,6 +246,7 @@ rule : RAW_RULE RULE_NAME { $$ = $<rule>3; //printf("RULE %s -> %p\n", $2, $$); + //printf("end of rule\n"); } @@ -274,16 +301,18 @@ string_decl : IDENTIFIER ASSIGN PLAIN_STRING } ; -condition : /* empty */ - | CONDITION COLON cexpression - { - g_scan_rule_set_match_condition(*built_rule, $3); - g_object_ref(G_OBJECT($3)); - } - ; + condition : /* empty */ + | CONDITION COLON cexpression + { + g_scan_rule_set_match_condition(*built_rule, $3); + g_object_unref(G_OBJECT($3)); + } + ; -cexpression : IDENTIFIER - { + cexpression : _cexpression { $$ = $1; if ($$ == NULL) { printf("ERROR !!!\n"); YYERROR; } } + + _cexpression : IDENTIFIER + { printf("named var: %s\n", "$1"); /* GSearchPattern *__pat; @@ -297,112 +326,223 @@ cexpression : IDENTIFIER g_object_unref(G_OBJECT(__pat)); } */ - } - | literal { $$ = $1; } - | item_chain { $$ = $1; } - | bool_expr { $$ = $1; } - | rel_expr { $$ = $1; } - | str_expr { $$ = $1; } - | arithm_expr { $$ = $1; } - | set_counter { $$ = $1; } - | "(" cexpression ")" { $$ = $2; } + } + | literal { $$ = $1; } + | pattern_match { $$ = $1; } + | item_chain { $$ = $1; } + | logical_expr { $$ = $1; } + | relational_expr { $$ = $1; } + | string_op { $$ = $1; } + | arithm_expr { $$ = $1; } + | set_counter { $$ = $1; } + | set { $$ = $1; } + | intersection { $$ = $1; } + | "(" cexpression ")" { $$ = $2; } + ; + + pattern_match : BYTES_ID_COUNTER + { + GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + $$ = g_scan_match_counter_new(__pat); + g_object_unref(G_OBJECT(__pat)); + } + ; + + literal : "true" + { + $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); + } + | "false" + { + $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ false }); + } + | SIGNED_INTEGER + { + $$ = g_scan_literal_expression_new(LVT_SIGNED_INTEGER, &$1); + } + | UNSIGNED_INTEGER + { + $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &$1); + } + | UNSIGNED_INTEGER KB + { + unsigned long long __converted; + __converted = $1 * 1024; + $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); + } + | UNSIGNED_INTEGER MB + { + unsigned long long __converted; + __converted = $1 * 1048576; + $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); + } + | UNSIGNED_INTEGER GB + { + unsigned long long __converted; + __converted = $1 * 1073741824; + $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); + } + | STRING + { + $$ = g_scan_literal_expression_new(LVT_STRING, &$1); + } + ; + + item_chain : NAME { $$ = g_scan_named_access_new(&$1); } + | NAME "(" ")" { $$ = g_scan_pending_call_new(&$1, NULL, 0); } + | NAME "(" call_args ")" + { + size_t __i; + $$ = g_scan_pending_call_new(&$1, $3.args, $3.count); + for (__i = 0; __i < $3.count; __i++) + g_object_unref(G_OBJECT($3.args[__i])); + free($3.args); + } + | item_chain "." NAME + { + GScanExpression *__next; + __next = g_scan_named_access_new(&$3); + g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); + $$ = $1; + } + | item_chain "." NAME "(" ")" + { + GScanExpression *__next; + __next = g_scan_pending_call_new(&$3, NULL, 0); + g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); + $$ = $1; + } + | item_chain "." NAME "(" call_args ")" + { + GScanExpression *__next; + size_t __i; + __next = g_scan_pending_call_new(&$3, $5.args, $5.count); + for (__i = 0; __i < $5.count; __i++) + g_object_unref(G_OBJECT($5.args[__i])); + free($5.args); + g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); + $$ = $1; + } + ; + + call_args : cexpression + { + $$.count = 1; + $$.args = malloc(sizeof(GScanExpression *)); + $$.args[0] = $1; + } + | call_args "," cexpression + { + $1.count++; + $1.args = realloc($1.args, $1.count * sizeof(GScanExpression *)); + $1.args[$1.count - 1] = $3; + $$ = $1; + } + ; + + logical_expr : cexpression "and" cexpression { $$ = g_scan_logical_operation_new(BOT_AND, $1, $3); } + | cexpression "or" cexpression { $$ = g_scan_logical_operation_new(BOT_OR, $1, $3); } + | "not" "(" cexpression ")" { $$ = g_scan_logical_operation_new(BOT_NOT, $3, NULL); } + ; + +relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operation_new(RCO_LT, $1, $3); } + | cexpression "<=" cexpression { $$ = g_scan_relational_operation_new(RCO_LE, $1, $3); } + | cexpression "==" cexpression { $$ = g_scan_relational_operation_new(RCO_EQ, $1, $3); } + | cexpression "!=" cexpression { $$ = g_scan_relational_operation_new(RCO_NE, $1, $3); } + | cexpression ">" cexpression { $$ = g_scan_relational_operation_new(RCO_GT, $1, $3); } + | cexpression ">=" cexpression { $$ = g_scan_relational_operation_new(RCO_GE, $1, $3); } + ; + + string_op : cexpression "contains" cexpression + { + $$ = g_scan_string_operation_new(SOT_CONTAINS, $1, $3, true); + } + | cexpression "startswith" cexpression + { + $$ = g_scan_string_operation_new(SOT_STARTSWITH, $1, $3, true); + } + | cexpression "endswith" cexpression + { + $$ = g_scan_string_operation_new(SOT_ENDSWITH, $1, $3, true); + } + | cexpression "matches" cexpression + { + $$ = g_scan_string_operation_new(SOT_MATCHES, $1, $3, true); + } + | cexpression "icontains" cexpression + { + $$ = g_scan_string_operation_new(SOT_CONTAINS, $1, $3, false); + } + | cexpression "istartswith" cexpression + { + $$ = g_scan_string_operation_new(SOT_STARTSWITH, $1, $3, false); + } + | cexpression "iendswith" cexpression + { + $$ = g_scan_string_operation_new(SOT_ENDSWITH, $1, $3, false); + } + | cexpression "iequals" cexpression + { + $$ = g_scan_string_operation_new(SOT_IEQUALS, $1, $3, false); + } + ; + + arithm_expr : cexpression "+" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_PLUS, $1, $3); } + | cexpression "-" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MINUS, $1, $3); } + | cexpression "*" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MUL, $1, $3); } + | cexpression "/" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_DIV, $1, $3); } + | cexpression "%" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MOD, $1, $3); } + ; + +set_counter : "none" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); } + | "any" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); } + | "all" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); } ; -literal : "true" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } - | "false" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ false }); } - | INTEGER { $$ = g_literal_expression_new(EVT_INTEGER, &$1); } - | INTEGER KB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1024 }); } - | INTEGER MB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1048576 }); } - | INTEGER GB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1073741824 }); } - | STRING { $$ = g_literal_expression_new(EVT_STRING, &$1); } - ; -item_chain : NAME { $$ = g_named_access_new(&$1); } - | NAME "(" ")" { $$ = g_pending_call_new(&$1, NULL, 0); } - | NAME "(" call_args ")" - { - size_t __i; - $$ = g_pending_call_new(&$1, $3.args, $3.count); - for (__i = 0; __i < $3.count; __i++) - g_object_unref(G_OBJECT($3.args[__i])); - free($3.args); - } - | item_chain "." NAME - { - GScanExpression *__next; - __next = g_named_access_new(&$3); - g_named_access_attach_next(G_NAMED_ACCESS($1), G_NAMED_ACCESS(__next)); - $$ = $1; - } - | item_chain "." NAME "(" ")" - { - GScanExpression *__next; - __next = g_pending_call_new(&$3, NULL, 0); - g_named_access_attach_next(G_NAMED_ACCESS($1), G_NAMED_ACCESS(__next)); - $$ = $1; - } - | item_chain "." NAME "(" call_args ")" - { - GScanExpression *__next; - size_t __i; - __next = g_pending_call_new(&$3, $5.args, $5.count); - for (__i = 0; __i < $5.count; __i++) - g_object_unref(G_OBJECT($5.args[__i])); - free($5.args); - g_named_access_attach_next(G_NAMED_ACCESS($1), G_NAMED_ACCESS(__next)); - $$ = $1; - } - ; - -call_args : cexpression - { - $$.count = 1; - $$.args = malloc(sizeof(GScanExpression *)); - $$.args[0] = $1; - } - | call_args "," cexpression - { - $1.count++; - $1.args = realloc($1.args, $1.count * sizeof(GScanExpression *)); - $1.args[$1.count - 1] = $3; - $$ = $1; - } - ; - -bool_expr : cexpression "and" cexpression { $$ = g_boolean_operation_new(BOT_AND, $1, $3); } - | cexpression "or" cexpression { $$ = g_boolean_operation_new(BOT_OR, $1, $3); } - | "not" "(" cexpression ")" { $$ = g_boolean_operation_new(BOT_NOT, $3, NULL); } - ; - -rel_expr : cexpression "<" cexpression { $$ = g_relational_operation_new(RCO_LT, $1, $3); } - | cexpression "<=" cexpression { $$ = g_relational_operation_new(RCO_LE, $1, $3); } - | cexpression "==" cexpression { $$ = g_relational_operation_new(RCO_EQ, $1, $3); } - | cexpression "!=" cexpression { $$ = g_relational_operation_new(RCO_NE, $1, $3); } - | cexpression ">" cexpression { $$ = g_relational_operation_new(RCO_GT, $1, $3); } - | cexpression ">=" cexpression { $$ = g_relational_operation_new(RCO_GT, $1, $3); } - ; - -str_expr : cexpression "contains" cexpression { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, true); } - | cexpression "startswith" cexpression { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, true); } - | cexpression "endswith" cexpression { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, true); } - | cexpression "matches" cexpression { $$ = g_string_operation_new(SOT_MATCHES, $1, $3, true); } - | cexpression "icontains" cexpression { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, false); } - | cexpression "istartswith" cexpression { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, false); } - | cexpression "iendswith" cexpression { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, false); } - | cexpression "iequals" cexpression { $$ = g_string_operation_new(SOT_IEQUALS, $1, $3, false); } - ; - -arithm_expr : cexpression "+" cexpression { $$ = g_arithmetic_operation_new(AEO_PLUS, $1, $3); } - | cexpression "-" cexpression { $$ = g_arithmetic_operation_new(AEO_MINUS, $1, $3); } - | cexpression "*" cexpression { $$ = g_arithmetic_operation_new(AEO_MUL, $1, $3); } - | cexpression "\\" cexpression { $$ = g_arithmetic_operation_new(AEO_DIV, $1, $3); } - | cexpression "%" cexpression { $$ = g_arithmetic_operation_new(AEO_MOD, $1, $3); } - ; + set : "(" ")" + { + $$ = g_scan_generic_set_new(); + } + | "(" cexpression "," ")" + { + $$ = g_scan_generic_set_new(); + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $2); + g_object_unref(G_OBJECT($2)); + } + | "(" set_items ")" + { + $$ = $2; + } + ; + + set_items : cexpression "," cexpression + { + $$ = g_scan_generic_set_new(); + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $1); + g_object_unref(G_OBJECT($1)); + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); + g_object_unref(G_OBJECT($3)); + } + | set_items "," cexpression + { + $$ = $1; + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); + g_object_unref(G_OBJECT($3)); + } + ; + + intersection : cexpression "in" cexpression + { + $$ = g_scan_sets_intersection_new($1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + ; + + -set_counter : "none" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } - | "any" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } - | "all" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } - ; %% |