diff options
Diffstat (limited to 'src/analysis/scan/grammar.y')
-rw-r--r-- | src/analysis/scan/grammar.y | 559 |
1 files changed, 475 insertions, 84 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y index 10e1d42..c0aa52d 100644 --- a/src/analysis/scan/grammar.y +++ b/src/analysis/scan/grammar.y @@ -6,7 +6,7 @@ /* Affiche un message d'erreur suite à l'analyse en échec. */ -static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *); +static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *); %} @@ -16,34 +16,26 @@ static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPatter #define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; +#include "core.h" #include "scanner.h" #include "exprs/access.h" #include "exprs/arithmetic.h" #include "exprs/call.h" #include "exprs/counter.h" +#include "exprs/handler.h" #include "exprs/intersect.h" +#include "exprs/item.h" #include "exprs/literal.h" #include "exprs/logical.h" #include "exprs/set.h" #include "exprs/relational.h" #include "exprs/strop.h" +#include "patterns/modifier.h" +#include "patterns/modifiers/list.h" +#include "patterns/tokens/hex.h" #include "patterns/tokens/plain.h" - - -#if 0 /////////////////////////////////////////////////////////////////////////:: -#define handle_coder_conversions(c, r) \ - ({ \ - encoding_spec *__spec; \ - encoding_syntax *__syntax; \ - conv_list *__list; \ - bool __status; \ - __spec = get_current_encoding_spec(c); \ - __syntax = get_current_encoding_syntax(__spec); \ - __list = get_conversions_in_encoding_syntax(__syntax); \ - __status = load_convs_from_raw_block(__list, r); \ - if (!__status) YYABORT; \ - }) -#endif /////////////////////////////////////////////////////////////////////////// +#include "patterns/tokens/nodes/plain.h" +#include "../../core/logs.h" } @@ -58,9 +50,25 @@ typedef void *yyscan_t; + sized_string_t *tmp_cstring; /* Série d'octets reconstituée */ + + struct { + bin_t byte; /* Valeur partielle recherchée */ + uint8_t mask; /* Masque associé */ + } semi_mask; + + GScanRule *rule; /* Nouvelle règle à intégrer */ - void/*GBytesPattern*/ *pattern; /* Nouveau motif à considérer */ + + + + GScanTokenNode *node; /* Bribe de motif à intégrer */ + GSearchPattern *pattern; /* Nouveau motif à considérer */ + + GScanTokenModifier *modifier; + + GScanExpression *expr; /* Expression de condition */ struct { @@ -78,13 +86,13 @@ typedef void *yyscan_t; %define api.pure full -%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } -%lex-param { yyscan_t yyscanner } { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } +%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } +%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } %code provides { #define YY_DECL \ - int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used) + int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used) YY_DECL; @@ -97,13 +105,26 @@ YY_DECL; %token RULE_NAME %token STRINGS CONDITION -%token IDENTIFIER + %token BYTES_ID %token BYTES_ID_COUNTER -%token BYTES_ID_LOCATION +%token BYTES_ID_START %token BYTES_ID_LENGTH +%token BYTES_ID_END %token NAME + +%token HEX_BYTES +%token FULL_MASK +%token SEMI_MASK + + +%token REGEX_BYTES +%token REGEX_CLASSES +%token REGEX_RANGE + + + %token BRACE_IN BRACE_OUT ASSIGN COLON @@ -143,11 +164,20 @@ YY_DECL; %token MUL "*" %token DIV "/" %token MOD "%" +%token TILDE "~" + +%token HOOK_O "[" +%token HOOK_C "]" + +%token BRACKET_O "{" +%token BRACKET_C "}" +%token QUESTION "?" %token PAREN_O "(" %token PAREN_C ")" %token COMMA "," %token DOT "." +%token PIPE "|" %token NONE "none" %token ANY "any" @@ -160,7 +190,11 @@ YY_DECL; %type <sized_cstring> RULE_NAME -%type <sized_cstring> IDENTIFIER BYTES_ID_COUNTER +%type <sized_cstring> BYTES_ID +%type <sized_cstring> BYTES_ID_COUNTER +%type <sized_cstring> BYTES_ID_START +%type <sized_cstring> BYTES_ID_LENGTH +%type <sized_cstring> BYTES_ID_END %type <sized_cstring> NAME @@ -173,8 +207,27 @@ YY_DECL; %type <sized_cstring> PLAIN_STRING %type <pattern> MASKED_STRING +%type <tmp_cstring> HEX_BYTES +%type <unsigned_integer> FULL_MASK +%type <semi_mask> SEMI_MASK + +%type <tmp_cstring> REGEX_BYTES + + +%type <modifier> modifiers +%type <modifier> _modifiers +%type <modifier> chained_modifiers +%type <modifier> mod_stage +%type <modifier> modifier + +%type <pattern> hex_pattern +%type <node> hex_tokens +%type <node> hex_token + + + %type <expr> cexpression _cexpression -%type <expr> pattern_match + %type <expr> literal %type <expr> item_chain %type <args_list> call_args @@ -185,7 +238,16 @@ YY_DECL; %type <expr> set_counter %type <expr> set %type <expr> set_items +%type <expr> set_access %type <expr> intersection +%type <expr> pattern_handler + + + + + +%left PIPE + %left OR @@ -200,6 +262,11 @@ YY_DECL; +%left HOOK_O HOOK_C + + + + %destructor { printf("-------- Discarding symbol %p.\n", $$); } <rule> @@ -257,14 +324,28 @@ strings : /* empty */ ; -string_decls : string_decl - | string_decls string_decl - ; + string_decls : string_decl + | hex_pattern + { + if ($1 == NULL) YYERROR; + g_scan_rule_add_local_variable(*built_rule, $1); + g_object_unref(G_OBJECT($1)); + } + | regex_pattern + | string_decls string_decl + | string_decls hex_pattern + { + if ($2 == NULL) YYERROR; + g_scan_rule_add_local_variable(*built_rule, $2); + g_object_unref(G_OBJECT($2)); + } + | string_decls regex_pattern + ; -string_decl : IDENTIFIER ASSIGN PLAIN_STRING +string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers { GSearchPattern *__pat; - __pat = g_plain_bytes_new((uint8_t *)$3.data, $3.len); + __pat = g_scan_plain_bytes_new(&$3, NULL, SPBF_NONE); g_search_pattern_set_name(__pat, $1.data, $1.len); g_scan_rule_add_local_variable(*built_rule, __pat); g_object_unref(G_OBJECT(__pat)); @@ -281,7 +362,7 @@ string_decl : IDENTIFIER ASSIGN PLAIN_STRING g_object_unref(G_OBJECT(__pat)); */ } - | IDENTIFIER ASSIGN MASKED_STRING + | BYTES_ID ASSIGN MASKED_STRING { printf("built %p\n", $3); /* @@ -301,6 +382,267 @@ string_decl : IDENTIFIER ASSIGN PLAIN_STRING } ; + +/** + * Prise en charge des modificateurs. + */ + + modifiers : /* empty */ + { + $$ = NULL; + } + | _modifiers + { + + // if (...) useless + + } + ; + + _modifiers : mod_stage + { + $$ = $1; + } + | chained_modifiers + { + $$ = $1; + } + ; + + chained_modifiers : _modifiers "|" _modifiers + ; + + mod_stage : modifier + { + $$ = $1; + } + | mod_stage modifier + { + bool status; + + if (G_IS_SCAN_MODIFIER_LIST($1)) + $$ = $1; + else + { + $$ = g_scan_modifier_list_new(); + g_scan_modifier_list_add(G_SCAN_MODIFIER_LIST($$), $1); + } + + status = g_scan_modifier_list_add(G_SCAN_MODIFIER_LIST($$), $2); + if (!status) + { + if (1) + log_simple_message(LMT_WARNING, "modifier already taken into account!"); + g_object_unref(G_OBJECT($2)); + } + + } + ; + + modifier : NAME + { + $$ = find_scan_token_modifiers_for_name($1.data); + if ($$ == NULL) YYERROR; + } + | "(" chained_modifiers ")" + { + $$ = $2; + } + ; + +/** + * Définition de motif en hexadécimal. + */ + + hex_pattern : BYTES_ID ASSIGN hex_tokens + { + $$ = g_scan_hex_bytes_new($3); + g_search_pattern_set_name($$, $1.data, $1.len); + } + ; + + hex_tokens : hex_token + { + $$ = $1; + } + | hex_tokens hex_token + { + + } + ; + + hex_token : HEX_BYTES + { + $$ = g_scan_token_node_plain_new($1, NULL, SPNF_NONE); + } + | FULL_MASK + { + printf("mask len: %llu\n", $1); + } + | SEMI_MASK + { + printf("semi mask: %hhx / %hhx \n", $1.byte, $1.mask); + } + | hex_range + { + printf("...range...\n"); + } + | "~" hex_token + { + + printf("hex -- NOT --\n"); + + } + | "(" hex_token "|" hex_token ")" + { + + printf("hex -- OR --\n"); + + } + ; + + hex_range : "[" "-" "]" + { + + printf("got inf range\n"); + + } + | "[" UNSIGNED_INTEGER "]" + { + + printf("got range [%llu]\n", $2); + + } + | "[" UNSIGNED_INTEGER "-" "]" + { + + printf("got range [%llu -> ]\n", $2); + + } + | "[" "-" UNSIGNED_INTEGER "]" + { + + printf("got range [ -> %llu]\n", $3); + + } + | "[" UNSIGNED_INTEGER "-" UNSIGNED_INTEGER "]" + { + + printf("got range [%llu -> %llu]\n", $2, $4); + + } + ; + +/** + * Définition de motif sous forme d'expression régulière + */ + + regex_pattern : BYTES_ID ASSIGN regex_tokens + { + + } + ; + + regex_tokens : regex_token + { + + } + | regex_tokens regex_token + { + + } + | "(" regex_tokens_list ")" + { + + printf("regex -- OR --\n"); + + } + | regex_tokens "(" regex_tokens_list ")" + { + + printf("regex -- OR --\n"); + + } + ; + + + regex_tokens_list : regex_tokens + | regex_tokens_list "|" regex_tokens + ; + + + regex_token : _regex_token + { + + } + | _regex_token regex_repeat + { + + } + ; + + _regex_token : DOT + { + printf("reg dot!\n"); + } + | REGEX_BYTES + { + printf("reg bytes: '%s' (l=%zu)\n", $1->data, $1->len); + } + | REGEX_CLASSES + { + printf("reg class!\n"); + } + | "[" REGEX_RANGE "]" + { + printf("reg range!\n"); + } + ; + + regex_repeat : "*" + { + printf(" .. repeat: *\n"); + } + | "+" + { + printf(" .. repeat: +\n"); + } + | "?" + { + printf(" .. repeat: ?\n"); + } + | "{" UNSIGNED_INTEGER "}" + { + + printf(" .. repeat {%llu}\n", $2); + + } + | "{" UNSIGNED_INTEGER "," "}" + { + + printf(" .. repeat {%llu,}\n", $2); + + } + | "{" "," UNSIGNED_INTEGER "}" + { + + printf(" .. repeat {,%llu}\n", $3); + + } + | "{" UNSIGNED_INTEGER "," UNSIGNED_INTEGER "}" + { + + printf(" .. repeat {%llu,%llu}\n", $2, $4); + + } + ; + + + +/** + * Définition des conditions. + */ + condition : CONDITION COLON cexpression { g_scan_rule_set_match_condition(*built_rule, $3); @@ -310,49 +652,19 @@ string_decl : IDENTIFIER ASSIGN PLAIN_STRING cexpression : _cexpression { $$ = $1; if ($$ == NULL) { printf("ERROR !!!\n"); YYERROR; } } - _cexpression : IDENTIFIER - { - printf("named var: %s\n", "$1"); - $$ = NULL; - /* - GSearchPattern *__pat; - GMatchCounter *__counter; - __pat = g_scan_rule_get_local_variable(*built_rule, $1); - if (__pat != NULL) - { - __counter = g_match_counter_new(__pat); - g_scan_rule_add_condition(*built_rule, G_MATCH_CONDITION(__counter)); - g_object_unref(G_OBJECT(__counter)); - g_object_unref(G_OBJECT(__pat)); - } - */ - } - | literal { $$ = $1; } - | pattern_match { $$ = $1; } - | item_chain { $$ = $1; } - | logical_expr { $$ = $1; } - | relational_expr { $$ = $1; } - | string_op { $$ = $1; } - | arithm_expr { $$ = $1; } - | set_counter { $$ = $1; } - | set { $$ = $1; } - | intersection { $$ = $1; } - | "(" cexpression ")" { $$ = $2; } - ; - - pattern_match : BYTES_ID_COUNTER - { - GSearchPattern *__pat; - __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); - if (__pat == NULL) - $$ = NULL; - else - { - $$ = g_scan_match_counter_new(__pat); - g_object_unref(G_OBJECT(__pat)); - } - } - ; + _cexpression : literal { $$ = $1; } + | item_chain { $$ = $1; } + | logical_expr { $$ = $1; } + | relational_expr { $$ = $1; } + | string_op { $$ = $1; } + | arithm_expr { $$ = $1; } + | set_counter { $$ = $1; } + | set { $$ = $1; } + | set_access { $$ = $1; } + | intersection { $$ = $1; } + | pattern_handler { $$ = $1; } + | "(" cexpression ")" { $$ = $2; } + ; literal : "true" { @@ -538,16 +850,83 @@ set_counter : "none" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEA } ; - intersection : cexpression "in" cexpression - { - $$ = g_scan_sets_intersection_new($1, $3); - g_object_unref(G_OBJECT($1)); - g_object_unref(G_OBJECT($3)); - } - ; - + set_access : cexpression "[" cexpression "]" + { + $$ = g_scan_set_item_new($1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + ; + intersection : cexpression "in" cexpression + { + $$ = g_scan_sets_intersection_new($1, $3); + g_object_unref(G_OBJECT($1)); + g_object_unref(G_OBJECT($3)); + } + ; + pattern_handler : BYTES_ID + { + GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) + $$ = NULL; + else + { + $$ = g_scan_pattern_handler_new(__pat, SHT_RAW); + g_object_unref(G_OBJECT(__pat)); + } + } + | BYTES_ID_COUNTER + { + GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) + $$ = NULL; + else + { + $$ = g_scan_match_counter_new(__pat); + g_object_unref(G_OBJECT(__pat)); + } + } + | BYTES_ID_START + { + GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) + $$ = NULL; + else + { + $$ = g_scan_pattern_handler_new(__pat, SHT_START); + g_object_unref(G_OBJECT(__pat)); + } + } + | BYTES_ID_LENGTH + { + GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) + $$ = NULL; + else + { + $$ = g_scan_pattern_handler_new(__pat, SHT_LENGTH); + g_object_unref(G_OBJECT(__pat)); + } + } + | BYTES_ID_END + { + GSearchPattern *__pat; + __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); + if (__pat == NULL) + $$ = NULL; + else + { + $$ = g_scan_pattern_handler_new(__pat, SHT_END); + g_object_unref(G_OBJECT(__pat)); + } + } + ; %% @@ -566,7 +945,7 @@ set_counter : "none" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEA * * ******************************************************************************/ -static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg) +static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg) { printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg); @@ -593,6 +972,8 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ { bool result; /* Bilan à renvoyer */ GScanRule *built_rule; /* Règle en construction */ + sized_string_t tmp_0; /* Zone tampon #1 */ + sized_string_t tmp_1; /* Zone tampon #2 */ void /*GBytesPattern*/ *built_pattern; /* Motif en construction */ char *buf; /* Zone de travail temporaire */ size_t allocated; /* Taille de mémoire allouée */ @@ -604,6 +985,13 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ result = false; built_rule = NULL; + + tmp_0.data = malloc((length + 1) * sizeof(bin_t)); + tmp_0.len = 0; + + tmp_1.data = malloc((length + 1) * sizeof(bin_t)); + tmp_1.len = 0; + built_pattern = NULL; allocated = 256; @@ -616,7 +1004,7 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ state = rost__scan_bytes(text, length, lexstate); - status = yyparse(scanner, lexstate, &built_rule, &built_pattern, &buf, &allocated, &used); + status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1, &built_pattern, &buf, &allocated, &used); result = (status == EXIT_SUCCESS); @@ -624,6 +1012,9 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ rost_lex_destroy(lexstate); + exit_szstr(&tmp_0); + exit_szstr(&tmp_1); + free(buf); return result; |