diff options
Diffstat (limited to 'src/analysis/scan/grammar.y')
| -rw-r--r-- | src/analysis/scan/grammar.y | 865 |
1 files changed, 670 insertions, 195 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y index e1f0e9e..2d985a7 100644 --- a/src/analysis/scan/grammar.y +++ b/src/analysis/scan/grammar.y @@ -6,10 +6,10 @@ /* Affiche un message d'erreur suite à l'analyse en échec. */ -static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *); +static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, char *); #define raise_error(msg) \ - yyerror(scanner, yyscanner, built_rule, tmp_0, tmp_1, NULL, buf, allocated, used, msg) + yyerror(scanner, yyscanner, built_rule, tmp_0, tmp_1, msg) %} @@ -30,7 +30,7 @@ typedef void *yyscan_t; #include "exprs/access.h" #include "exprs/arithmetic.h" #include "exprs/call.h" -#include "exprs/counter.h" +#include "exprs/extract.h" #include "exprs/handler.h" #include "exprs/intersect.h" #include "exprs/item.h" @@ -40,8 +40,10 @@ typedef void *yyscan_t; #include "exprs/setcounter.h" #include "exprs/relational.h" #include "exprs/strop.h" +#include "patterns/customizer.h" #include "patterns/modifier.h" #include "patterns/modifiers/list.h" +#include "patterns/modifiers/pipe.h" #include "patterns/tokens/hex.h" #include "patterns/tokens/plain.h" #include "patterns/tokens/nodes/any.h" @@ -74,15 +76,14 @@ typedef void *yyscan_t; } masked; ScanRuleFlags rule_flags; /* Fanions pour règle */ - GScanRule *rule; /* Nouvelle règle à intégrer */ GScanTokenNode *node; /* Bribe de motif à intégrer */ GSearchPattern *pattern; /* Nouveau motif à considérer */ GScanTokenModifier *modifier; /* Modificateur pour texte */ + modifier_arg_t mod_arg; /* Argument pour modificateur */ ScanPlainNodeFlags str_flags; /* Fanions pour texte */ - GScanExpression *expr; /* Expression de condition */ struct { @@ -100,13 +101,13 @@ typedef void *yyscan_t; %define api.pure full -%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } -%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } +%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} +%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} %code provides { #define YY_DECL \ - int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used) + int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1) YY_DECL; @@ -115,8 +116,8 @@ YY_DECL; %token INCLUDE "include" -%token RAW_RULE -%token RULE_NAME +%token RAW_RULE "rule" +%token RULE_IDENTIFIER %token META "meta" %token BYTES "bytes" @@ -129,9 +130,13 @@ YY_DECL; %token BYTES_ID %token BYTES_FUZZY_ID %token BYTES_ID_COUNTER +%token BYTES_FUZZY_ID_COUNTER %token BYTES_ID_START +%token BYTES_FUZZY_ID_START %token BYTES_ID_LENGTH +%token BYTES_FUZZY_ID_LENGTH %token BYTES_ID_END +%token BYTES_FUZZY_ID_END %token NAME @@ -152,7 +157,8 @@ YY_DECL; -%token BRACE_IN BRACE_OUT +%token BRACE_IN "{" +%token BRACE_OUT "}" %token ASSIGN "=" %token COLON ":" @@ -164,7 +170,6 @@ YY_DECL; %token FALSE_ "false" %token SIGNED_INTEGER %token UNSIGNED_INTEGER -%token STRING %token KB MB GB @@ -198,8 +203,6 @@ YY_DECL; %token HOOK_O "[" %token HOOK_C "]" -%token BRACKET_O "{" -%token BRACKET_C "}" %token QUESTION "?" %token PAREN_O "(" @@ -208,6 +211,9 @@ YY_DECL; %token DOT "." %token PIPE "|" +%token MOD_GROUP_O "((" +%token MOD_GROUP_C "))" + %token NONE "none" %token ANY "any" %token ALL "all" @@ -216,26 +222,28 @@ YY_DECL; %token IN "in" -%type <sized_cstring> RULE_NAME +%type <sized_cstring> RULE_IDENTIFIER %type <sized_cstring> INFO_KEY %type <sized_cstring> BYTES_ID %type <sized_cstring> BYTES_FUZZY_ID %type <sized_cstring> BYTES_ID_COUNTER +%type <sized_cstring> BYTES_FUZZY_ID_COUNTER %type <sized_cstring> BYTES_ID_START +%type <sized_cstring> BYTES_FUZZY_ID_START %type <sized_cstring> BYTES_ID_LENGTH +%type <sized_cstring> BYTES_FUZZY_ID_LENGTH %type <sized_cstring> BYTES_ID_END +%type <sized_cstring> BYTES_FUZZY_ID_END %type <sized_cstring> NAME %type <signed_integer> SIGNED_INTEGER %type <unsigned_integer> UNSIGNED_INTEGER -%type <sized_cstring> STRING %type <rule_flags> rule_flags %type <rule_flags> rule_flag -%type <rule> rule %type <sized_cstring> PLAIN_TEXT %type <tmp_cstring> ESCAPED_TEXT @@ -252,15 +260,18 @@ YY_DECL; %type <modifier> modifiers %type <modifier> _modifiers -%type <modifier> chained_modifiers + //%type <modifier> chained_modifiers %type <modifier> mod_stage %type <modifier> modifier +%type <modifier> modifier_args +%type <mod_arg> modifier_arg %type <str_flags> str_flags %type <pattern> hex_pattern %type <node> hex_tokens +%type <node> _hex_tokens %type <node> hex_token %type <node> hex_range %type <node> hex_choices @@ -270,7 +281,8 @@ YY_DECL; %type <expr> cexpression _cexpression %type <expr> literal -%type <expr> item_chain +%type <expr> chain_items +%type <expr> chain_item %type <args_list> call_args %type <expr> logical_expr %type <expr> relational_expr @@ -281,9 +293,9 @@ YY_DECL; %type <expr> pattern_set_items %type <expr> set %type <expr> set_items -%type <expr> set_access %type <expr> intersection %type <expr> pattern_handler +%type <expr> _pattern_handler @@ -308,16 +320,36 @@ YY_DECL; %left HOOK_O HOOK_C + %destructor { g_object_unref(G_OBJECT($$)); } <node> + + %destructor { g_object_unref(G_OBJECT($$)); } <pattern> + + %destructor { g_object_unref(G_OBJECT($$)); } <modifier> + + %destructor { g_object_unref(G_OBJECT($$)); } <expr> + + %destructor { + size_t __i; + + for (__i = 0; __i < $$.count; __i++) + g_object_unref(G_OBJECT($$.args[__i])); + if ($$.args != NULL) + free($$.args); -%destructor { printf("-------- Discarding symbol %p.\n", $$); } <rule> + } <args_list> %% rules : /* empty */ | external rules - | rule rules { g_content_scanner_add_rule(scanner, $1); } + | rule + { + g_content_scanner_add_rule(scanner, *built_rule); + g_clear_object(built_rule); + } + rules ; @@ -346,15 +378,11 @@ YY_DECL; * Définition de règle. */ - rule : rule_flags RAW_RULE RULE_NAME + rule : rule_flags "rule" RULE_IDENTIFIER { *built_rule = g_scan_rule_new($1, $3.data); - $<rule>$ = *built_rule; - } - BRACE_IN meta bytes condition BRACE_OUT - { - $$ = $<rule>4; } + tags "{" meta bytes condition "}" ; @@ -379,6 +407,21 @@ YY_DECL; ; + tags : /* empty */ + | ":" tag_list + ; + + tag_list : RULE_IDENTIFIER + { + g_scan_rule_add_tag(*built_rule, $1.data); + } + | tag_list RULE_IDENTIFIER + { + g_scan_rule_add_tag(*built_rule, $2.data); + } + ; + + /** * Section "meta:" d'une définition de règle. */ @@ -449,7 +492,7 @@ YY_DECL; * Définition de motif en texte brut. */ - str_pattern : BYTES_ID ASSIGN PLAIN_TEXT modifiers str_flags + str_pattern : BYTES_ID "=" PLAIN_TEXT modifiers str_flags { GScanTokenNode *node; @@ -461,7 +504,7 @@ YY_DECL; g_object_unref(G_OBJECT(node)); } - | BYTES_ID ASSIGN ESCAPED_TEXT modifiers str_flags + | BYTES_ID "=" ESCAPED_TEXT modifiers str_flags { GScanTokenNode *node; @@ -488,7 +531,9 @@ YY_DECL; { $$ = $1; + // if (...) useless + // ex : xxx | { yyy zzz } } ; @@ -497,19 +542,45 @@ YY_DECL; { $$ = $1; } - | chained_modifiers + | _modifiers "|" mod_stage { - $$ = $1; + bool status; + + if (G_IS_SCAN_MODIFIER_PIPE($1)) + $$ = $1; + else + { + $$ = g_scan_modifier_pipe_new(); + g_scan_modifier_pipe_add(G_SCAN_MODIFIER_PIPE($$), $1); + g_object_unref(G_OBJECT($1)); + } + + g_scan_modifier_pipe_add(G_SCAN_MODIFIER_PIPE($$), $3); + g_object_unref(G_OBJECT($3)); + } ; - chained_modifiers : _modifiers "|" _modifiers +/* + chained_modifiers : modifiers "|" modifiers + { + printf("need chains....\n"); + + $$ = NULL; + + } ; +*/ mod_stage : modifier { $$ = $1; } + | "((" _modifiers "))" + { + $$ = NULL; + YYERROR; /* TODO */ + } | mod_stage modifier { bool status; @@ -520,6 +591,7 @@ YY_DECL; { $$ = g_scan_modifier_list_new(); g_scan_modifier_list_add(G_SCAN_MODIFIER_LIST($$), $1); + g_object_unref(G_OBJECT($1)); } status = g_scan_modifier_list_add(G_SCAN_MODIFIER_LIST($$), $2); @@ -527,21 +599,22 @@ YY_DECL; { if (1) log_simple_message(LMT_WARNING, "modifier already taken into account!"); - g_object_unref(G_OBJECT($2)); } + g_object_unref(G_OBJECT($2)); + } ; modifier : NAME { - $$ = find_scan_token_modifiers_for_name($1.data); + $$ = find_scan_token_modifiers_for_name(&$1); if ($$ == NULL) { char *_msg; int _ret; - _ret = asprintf(&_msg, _("Unknown modifier: \"%s\""), $1.data); + _ret = asprintf(&_msg, _("Unknown modifier: \"%.*s\""), (int)$1.len, $1.data); if (_ret != -1) { @@ -552,9 +625,74 @@ YY_DECL; YYERROR; } } - | "(" chained_modifiers ")" + | NAME "(" modifier_args ")" { - $$ = $2; + GScanTokenModifier *_mod; + bool _status; + + $$ = $3; + + _mod = find_scan_token_modifiers_for_name(&$1); + if (_mod == NULL) + { + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Unknown modifier: \"%.*s\""), (int)$1.len, $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + g_object_unref(G_OBJECT($$)); + + YYERROR; + } + + _status = g_scan_token_customizer_attach_modifier(G_SCAN_TOKEN_CUSTOMIZER($$), _mod); + + g_object_unref(G_OBJECT(_mod)); + + if (!_status) + { + char *_msg; + int _ret; + + _ret = asprintf(&_msg, + _("Unsupported argument for modifier: \"%.*s\""), + (int)$1.len, $1.data); + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + g_object_unref(G_OBJECT($$)); + + YYERROR; + } + + } + ; + + + modifier_args : modifier_arg + { + $$ = g_scan_token_customizer_new(&$1); + } + | modifier_args "," modifier_arg + { + $$ = $1; + g_scan_token_customizer_add_extra_arg(G_SCAN_TOKEN_CUSTOMIZER($$), &$3); + } + ; + + modifier_arg : PLAIN_TEXT + { + $$.type = MAT_STRING; + $$.value.string = $1; } ; @@ -586,26 +724,49 @@ YY_DECL; * Définition de motif en hexadécimal. */ - hex_pattern : BYTES_ID ASSIGN hex_tokens + hex_pattern : BYTES_ID "=" hex_tokens { $$ = g_scan_hex_bytes_new($3, false); g_search_pattern_set_name($$, $1.data, $1.len); } - | BYTES_ID ASSIGN hex_tokens "private" + | BYTES_ID "=" hex_tokens "private" { $$ = g_scan_hex_bytes_new($3, true); g_search_pattern_set_name($$, $1.data, $1.len); } ; - hex_tokens : hex_token + hex_tokens : _hex_tokens + { + $$ = $1; + + if (G_IS_SCAN_TOKEN_NODE_SEQUENCE($$)) + { + if (g_scan_token_node_sequence_count(G_SCAN_TOKEN_NODE_SEQUENCE($$)) == 1) + { + GScanTokenNode *node; + + node = g_scan_token_node_sequence_get(G_SCAN_TOKEN_NODE_SEQUENCE($$), 0); + + g_object_unref(G_OBJECT($$)); + + $$ = node; + + } + + } + + } + ; + + _hex_tokens : hex_token { if ($1 == NULL) YYERROR; $$ = $1; } - | hex_tokens hex_token + | _hex_tokens hex_token { if ($2 == NULL) YYERROR; @@ -665,10 +826,18 @@ YY_DECL; } | hex_range { + if ($1 == NULL) + { + raise_error(_("Unable to build hexadecimal range")); + YYERROR; + } + $$ = $1; + } | "~" hex_token { + if ($2 == NULL) YYERROR; $$ = g_scan_token_node_not_new($2); } @@ -726,6 +895,9 @@ YY_DECL; hex_choices : hex_token "|" hex_token { + if ($1 == NULL) YYERROR; + if ($3 == NULL) YYERROR; + $$ = g_scan_token_node_choice_new(); g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $1); g_object_unref(G_OBJECT($1)); @@ -734,6 +906,8 @@ YY_DECL; } | hex_choices "|" hex_token { + if ($3 == NULL) YYERROR; + $$ = $1; g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $3); g_object_unref(G_OBJECT($3)); @@ -745,7 +919,7 @@ YY_DECL; * Définition de motif sous forme d'expression régulière */ - regex_pattern : BYTES_ID ASSIGN regex_tokens + regex_pattern : BYTES_ID "=" regex_tokens { } @@ -789,7 +963,7 @@ YY_DECL; } ; - _regex_token : DOT + _regex_token : "." { printf("reg dot!\n"); } @@ -851,24 +1025,24 @@ YY_DECL; * Définition des conditions. */ - condition : CONDITION COLON cexpression - { - g_scan_rule_set_match_condition(*built_rule, $3); - g_object_unref(G_OBJECT($3)); - } - ; + condition : "condition" ":" cexpression + { + g_scan_rule_set_match_condition(*built_rule, $3); + g_object_unref(G_OBJECT($3)); + } + ; - cexpression : _cexpression { $$ = $1; if ($$ == NULL) { printf("ERROR !!!\n"); YYERROR; } } + cexpression : _cexpression { $$ = $1; if ($$ == NULL) { printf("ERROR !!!\n"); YYERROR; } } + ; _cexpression : literal { $$ = $1; } - | item_chain { $$ = $1; } + | chain_items { $$ = $1; } | logical_expr { $$ = $1; } | relational_expr { $$ = $1; } | string_op { $$ = $1; } | arithm_expr { $$ = $1; } | set_match_counter { $$ = $1; } | set { $$ = $1; } - | set_access { $$ = $1; } | intersection { $$ = $1; } | pattern_handler { $$ = $1; } | "(" cexpression ")" { $$ = $2; } @@ -908,46 +1082,65 @@ YY_DECL; __converted = $1 * 1073741824; $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); } - | STRING + | PLAIN_TEXT { $$ = g_scan_literal_expression_new(LVT_STRING, &$1); } + | PLAIN_TEXT "[" cexpression "]" + { + GScanExpression *__src; + __src = g_scan_literal_expression_new(LVT_STRING, &$1); + $$ = g_scan_set_item_new(__src, $3); + g_object_unref(G_OBJECT(__src)); + g_object_unref(G_OBJECT($3)); + } + | ESCAPED_TEXT + { + $$ = g_scan_literal_expression_new(LVT_STRING, $1); + } + | ESCAPED_TEXT "[" cexpression "]" + { + GScanExpression *__src; + __src = g_scan_literal_expression_new(LVT_STRING, $1); + $$ = g_scan_set_item_new(__src, $3); + g_object_unref(G_OBJECT(__src)); + g_object_unref(G_OBJECT($3)); + } ; - item_chain : NAME { $$ = g_scan_named_access_new(&$1); } - | NAME "(" ")" { $$ = g_scan_pending_call_new(&$1, NULL, 0); } - | NAME "(" call_args ")" + + chain_items : chain_item { - size_t __i; - $$ = g_scan_pending_call_new(&$1, $3.args, $3.count); - for (__i = 0; __i < $3.count; __i++) - g_object_unref(G_OBJECT($3.args[__i])); - free($3.args); + $$ = $1; } - | item_chain "." NAME + | chain_items "." chain_item { - GScanExpression *__next; - __next = g_scan_named_access_new(&$3); - g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); + g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS($3)); + g_object_unref(G_OBJECT($3)); $$ = $1; } - | item_chain "." NAME "(" ")" + ; + + chain_item : NAME { - GScanExpression *__next; - __next = g_scan_pending_call_new(&$3, NULL, 0); - g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); - $$ = $1; + $$ = g_scan_named_access_new(&$1); } - | item_chain "." NAME "(" call_args ")" + | NAME "(" ")" + { + $$ = g_scan_pending_call_new(&$1, NULL, 0); + } + | NAME "(" call_args ")" { - GScanExpression *__next; size_t __i; - __next = g_scan_pending_call_new(&$3, $5.args, $5.count); - for (__i = 0; __i < $5.count; __i++) - g_object_unref(G_OBJECT($5.args[__i])); - free($5.args); - g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); - $$ = $1; + $$ = g_scan_pending_call_new(&$1, $3.args, $3.count); + for (__i = 0; __i < $3.count; __i++) + g_object_unref(G_OBJECT($3.args[__i])); + free($3.args); + } + | NAME "[" cexpression "]" + { + $$ = g_scan_pending_extraction_new(&$1, $3); + g_object_unref(G_OBJECT($3)); } ; @@ -966,58 +1159,143 @@ YY_DECL; } ; - logical_expr : cexpression "and" cexpression { $$ = g_scan_logical_operation_new(BOT_AND, $1, $3); } - | cexpression "or" cexpression { $$ = g_scan_logical_operation_new(BOT_OR, $1, $3); } - | "not" "(" cexpression ")" { $$ = g_scan_logical_operation_new(BOT_NOT, $3, NULL); } + logical_expr : cexpression "and" cexpression + { + $$ = g_scan_logical_operation_new(BOT_AND, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "or" cexpression + { + $$ = g_scan_logical_operation_new(BOT_OR, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | "not" "(" cexpression ")" + { + $$ = g_scan_logical_operation_new(BOT_NOT, $3, NULL); + g_object_unref(G_OBJECT($3)); + } ; -relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operation_new(RCO_LT, $1, $3); } - | cexpression "<=" cexpression { $$ = g_scan_relational_operation_new(RCO_LE, $1, $3); } - | cexpression "==" cexpression { $$ = g_scan_relational_operation_new(RCO_EQ, $1, $3); } - | cexpression "!=" cexpression { $$ = g_scan_relational_operation_new(RCO_NE, $1, $3); } - | cexpression ">" cexpression { $$ = g_scan_relational_operation_new(RCO_GT, $1, $3); } - | cexpression ">=" cexpression { $$ = g_scan_relational_operation_new(RCO_GE, $1, $3); } +relational_expr : cexpression "<" cexpression + { + $$ = g_scan_relational_operation_new(RCO_LT, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "<=" cexpression + { + $$ = g_scan_relational_operation_new(RCO_LE, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "==" cexpression + { + $$ = g_scan_relational_operation_new(RCO_EQ, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "!=" cexpression + { + $$ = g_scan_relational_operation_new(RCO_NE, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression ">" cexpression + { + $$ = g_scan_relational_operation_new(RCO_GT, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression ">=" cexpression + { + $$ = g_scan_relational_operation_new(RCO_GE, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } ; string_op : cexpression "contains" cexpression { $$ = g_scan_string_operation_new(SOT_CONTAINS, $1, $3, true); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } | cexpression "startswith" cexpression { $$ = g_scan_string_operation_new(SOT_STARTSWITH, $1, $3, true); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } | cexpression "endswith" cexpression { $$ = g_scan_string_operation_new(SOT_ENDSWITH, $1, $3, true); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } | cexpression "matches" cexpression { $$ = g_scan_string_operation_new(SOT_MATCHES, $1, $3, true); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } | cexpression "icontains" cexpression { $$ = g_scan_string_operation_new(SOT_CONTAINS, $1, $3, false); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } | cexpression "istartswith" cexpression { $$ = g_scan_string_operation_new(SOT_STARTSWITH, $1, $3, false); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } | cexpression "iendswith" cexpression { $$ = g_scan_string_operation_new(SOT_ENDSWITH, $1, $3, false); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } | cexpression "iequals" cexpression { $$ = g_scan_string_operation_new(SOT_IEQUALS, $1, $3, false); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); } ; - arithm_expr : cexpression "+" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_PLUS, $1, $3); } - | cexpression "-" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MINUS, $1, $3); } - | cexpression "*" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MUL, $1, $3); } - | cexpression "/" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_DIV, $1, $3); } - | cexpression "%" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MOD, $1, $3); } + arithm_expr : cexpression "+" cexpression + { + $$ = g_scan_arithmetic_operation_new(AEO_PLUS, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "-" cexpression + { + $$ = g_scan_arithmetic_operation_new(AEO_MINUS, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "*" cexpression + { + $$ = g_scan_arithmetic_operation_new(AEO_MUL, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "/" cexpression + { + $$ = g_scan_arithmetic_operation_new(AEO_DIV, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + | cexpression "%" cexpression + { + $$ = g_scan_arithmetic_operation_new(AEO_MOD, $1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } ; @@ -1078,15 +1356,17 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio pattern_set : "them" { size_t __count; - GSearchPattern **__patterns; - size_t __i; + const GSearchPattern **__patterns; __patterns = g_scan_rule_get_local_variables(*built_rule, NULL, &__count); - $$ = g_scan_set_match_counter_new(__patterns, __count); + if (__patterns == NULL) + { + raise_error(_("No pattern found for \"them\"")); + YYERROR; + } - for (__i = 0; __i < __count; __i++) - g_object_unref(G_OBJECT(__patterns[__i])); + $$ = g_scan_set_match_counter_new_shared(__patterns, __count); free(__patterns); @@ -1099,7 +1379,7 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio pattern_set_items : BYTES_ID { - GSearchPattern *__pat; + const GSearchPattern *__pat; __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); @@ -1119,20 +1399,17 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio YYERROR; } - $$ = g_scan_set_match_counter_new((GSearchPattern *[]) { __pat }, 1); - - g_object_unref(G_OBJECT(__pat)); + $$ = g_scan_set_match_counter_new_shared((const GSearchPattern *[]) { __pat }, 1); } | BYTES_FUZZY_ID { size_t __count; - GSearchPattern **__patterns; - size_t __i; + const GSearchPattern **__patterns; __patterns = g_scan_rule_get_local_variables(*built_rule, $1.data, &__count); - if (__count == 0) + if (__patterns == NULL) { char *_msg; int _ret; @@ -1148,17 +1425,14 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio YYERROR; } - $$ = g_scan_set_match_counter_new(__patterns, __count); - - for (__i = 0; __i < __count; __i++) - g_object_unref(G_OBJECT(__patterns[__i])); + $$ = g_scan_set_match_counter_new_shared(__patterns, __count); free(__patterns); } | pattern_set_items "," BYTES_ID { - GSearchPattern *__pat; + const GSearchPattern *__pat; GScanSetMatchCounter *__counter; __pat = g_scan_rule_get_local_variable(*built_rule, $3.data); @@ -1180,9 +1454,8 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio } __counter = G_SCAN_SET_MATCH_COUNTER($1); - g_scan_set_match_counter_add_extra_patterns(__counter, (GSearchPattern *[]) { __pat }, 1); - - g_object_unref(G_OBJECT(__pat)); + g_scan_set_match_counter_add_extra_shared_patterns(__counter, + (const GSearchPattern *[]) { __pat }, 1); $$ = $1; @@ -1190,13 +1463,12 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio | pattern_set_items "," BYTES_FUZZY_ID { size_t __count; - GSearchPattern **__patterns; + const GSearchPattern **__patterns; GScanSetMatchCounter *__counter; - size_t __i; __patterns = g_scan_rule_get_local_variables(*built_rule, $3.data, &__count); - if (__count == 0) + if (__patterns == NULL) { char *_msg; int _ret; @@ -1213,10 +1485,7 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio } __counter = G_SCAN_SET_MATCH_COUNTER($1); - g_scan_set_match_counter_add_extra_patterns(__counter, __patterns, __count); - - for (__i = 0; __i < __count; __i++) - g_object_unref(G_OBJECT(__patterns[__i])); + g_scan_set_match_counter_add_extra_shared_patterns(__counter, __patterns, __count); free(__patterns); @@ -1226,46 +1495,39 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio ; - set : "(" ")" - { - $$ = g_scan_generic_set_new(); - } - | "(" cexpression "," ")" - { - $$ = g_scan_generic_set_new(); - g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $2); - g_object_unref(G_OBJECT($2)); - } - | "(" set_items ")" - { - $$ = $2; - } - ; - - set_items : cexpression "," cexpression - { - $$ = g_scan_generic_set_new(); - g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $1); - g_object_unref(G_OBJECT($1)); - g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); - g_object_unref(G_OBJECT($3)); - } - | set_items "," cexpression - { - $$ = $1; - g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); - g_object_unref(G_OBJECT($3)); - } - ; + set : "(" ")" + { + $$ = g_scan_generic_set_new(); + } + | "(" cexpression "," ")" + { + $$ = g_scan_generic_set_new(); + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $2); + g_object_unref(G_OBJECT($2)); + } + | "(" set_items ")" + { + $$ = $2; + } + ; - set_access : cexpression "[" cexpression "]" + set_items : cexpression "," cexpression { - $$ = g_scan_set_item_new($1, $3); + $$ = g_scan_generic_set_new(); + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $1); g_object_unref(G_OBJECT($1)); + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); + g_object_unref(G_OBJECT($3)); + } + | set_items "," cexpression + { + $$ = $1; + g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); g_object_unref(G_OBJECT($3)); } ; + intersection : cexpression "in" cexpression { $$ = g_scan_sets_intersection_new($1, $3); @@ -1274,65 +1536,290 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio } ; - pattern_handler : BYTES_ID + + pattern_handler : _pattern_handler + { + $$ = $1; + } + | _pattern_handler "[" cexpression "]" + { + if (g_scan_pattern_handler_get_handler_type(G_SCAN_PATTERN_HANDLER($1)) == SHT_COUNTER) + { + raise_error("Match counts can not get indexed"); + YYERROR; + } + + $$ = g_scan_set_item_new($1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + + } + ; + + _pattern_handler : BYTES_ID { - GSearchPattern *__pat; + const GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) - $$ = NULL; - else { - $$ = g_scan_pattern_handler_new(__pat, SHT_RAW); - g_object_unref(G_OBJECT(__pat)); + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Pattern not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; + } + + $$ = g_scan_pattern_handler_new_shared((const GSearchPattern *[]) { __pat }, 1, SHT_RAW); + + } + | BYTES_FUZZY_ID + { + size_t __count; + const GSearchPattern **__patterns; + + __patterns = g_scan_rule_get_local_variables(*built_rule, $1.data, &__count); + + if (__patterns == NULL) + { + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Patterns not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; } + + $$ = g_scan_pattern_handler_new_shared(__patterns, __count, SHT_RAW); + + free(__patterns); + } | BYTES_ID_COUNTER { - GSearchPattern *__pat; + const GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) - $$ = NULL; - else { - $$ = g_scan_match_counter_new(__pat); - g_object_unref(G_OBJECT(__pat)); + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Pattern not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; + } + + $$ = g_scan_pattern_handler_new_shared((const GSearchPattern *[]) { __pat }, 1, SHT_COUNTER); + + } + | BYTES_FUZZY_ID_COUNTER + { + size_t __count; + const GSearchPattern **__patterns; + + __patterns = g_scan_rule_get_local_variables(*built_rule, $1.data, &__count); + + if (__patterns == NULL) + { + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Patterns not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; } + + $$ = g_scan_pattern_handler_new_shared(__patterns, __count, SHT_COUNTER); + + free(__patterns); + } | BYTES_ID_START { - GSearchPattern *__pat; + const GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) - $$ = NULL; - else { - $$ = g_scan_pattern_handler_new(__pat, SHT_START); - g_object_unref(G_OBJECT(__pat)); + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Pattern not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; + } + + $$ = g_scan_pattern_handler_new_shared((const GSearchPattern *[]) { __pat }, 1, SHT_START); + + } + | BYTES_FUZZY_ID_START + { + size_t __count; + const GSearchPattern **__patterns; + + __patterns = g_scan_rule_get_local_variables(*built_rule, $1.data, &__count); + + if (__patterns == NULL) + { + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Patterns not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; } + + $$ = g_scan_pattern_handler_new_shared(__patterns, __count, SHT_START); + + free(__patterns); + } | BYTES_ID_LENGTH { - GSearchPattern *__pat; + const GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) - $$ = NULL; - else { - $$ = g_scan_pattern_handler_new(__pat, SHT_LENGTH); - g_object_unref(G_OBJECT(__pat)); + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Pattern not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; } + + $$ = g_scan_pattern_handler_new_shared((const GSearchPattern *[]) { __pat }, 1, SHT_LENGTH); + + } + | BYTES_FUZZY_ID_LENGTH + { + size_t __count; + const GSearchPattern **__patterns; + + __patterns = g_scan_rule_get_local_variables(*built_rule, $1.data, &__count); + + if (__patterns == NULL) + { + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Patterns not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; + } + + $$ = g_scan_pattern_handler_new_shared(__patterns, __count, SHT_LENGTH); + + free(__patterns); + } | BYTES_ID_END { - GSearchPattern *__pat; + const GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) - $$ = NULL; - else { - $$ = g_scan_pattern_handler_new(__pat, SHT_END); - g_object_unref(G_OBJECT(__pat)); + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Pattern not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; + } + + $$ = g_scan_pattern_handler_new_shared((const GSearchPattern *[]) { __pat }, 1, SHT_END); + + } + | BYTES_FUZZY_ID_END + { + size_t __count; + const GSearchPattern **__patterns; + + __patterns = g_scan_rule_get_local_variables(*built_rule, $1.data, &__count); + + if (__patterns == NULL) + { + char *_msg; + int _ret; + + _ret = asprintf(&_msg, _("Patterns not found: \"%s\""), $1.data); + + if (_ret != -1) + { + raise_error(_msg); + free(_msg); + } + + YYERROR; } + + $$ = g_scan_pattern_handler_new_shared(__patterns, __count, SHT_END); + + free(__patterns); + } ; @@ -1353,9 +1840,9 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio * * ******************************************************************************/ -static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg) +static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, char *msg) { - printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg); + //printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg); return 0; @@ -1382,10 +1869,6 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ GScanRule *built_rule; /* Règle en construction */ sized_string_t tmp_0; /* Zone tampon #1 */ sized_string_t tmp_1; /* Zone tampon #2 */ - void /*GBytesPattern*/ *built_pattern; /* Motif en construction */ - char *buf; /* Zone de travail temporaire */ - size_t allocated; /* Taille de mémoire allouée */ - size_t used; /* Quantité utilisée */ yyscan_t lexstate; /* Gestion d'analyse lexicale */ YY_BUFFER_STATE state; /* Contexte d'analyse */ int status; /* Bilan d'une analyse */ @@ -1400,19 +1883,11 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ tmp_1.data = malloc((length + 1) * sizeof(bin_t)); tmp_1.len = 0; - built_pattern = NULL; - - allocated = 256; - used = 0; - - buf = malloc(allocated * sizeof(char)); - buf[0] = '\0'; - rost_lex_init(&lexstate); state = rost__scan_bytes(text, length, lexstate); - status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1, &built_pattern, &buf, &allocated, &used); + status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1); result = (status == EXIT_SUCCESS); @@ -1423,7 +1898,7 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ exit_szstr(&tmp_0); exit_szstr(&tmp_1); - free(buf); + g_clear_object(&built_rule); return result; |
