diff options
author | Cyrille Bagard <nocbos@gmail.com> | 2023-09-12 04:43:02 (GMT) |
---|---|---|
committer | Cyrille Bagard <nocbos@gmail.com> | 2023-09-12 04:45:25 (GMT) |
commit | fed4c10b9bb1c6f99440dba3839a1e7b56b40359 (patch) | |
tree | d544bde345c16a5eff19c0f9a074c09b366f7dbe /src/analysis/scan/grammar.y | |
parent | 155c500b8933d2c7269215ea1d141d341de0a44f (diff) |
Save current state with some ROST extra features.
Diffstat (limited to 'src/analysis/scan/grammar.y')
-rw-r--r-- | src/analysis/scan/grammar.y | 288 |
1 files changed, 209 insertions, 79 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y index c0aa52d..64bcd5b 100644 --- a/src/analysis/scan/grammar.y +++ b/src/analysis/scan/grammar.y @@ -16,6 +16,8 @@ static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, #define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; +#include <assert.h> + #include "core.h" #include "scanner.h" #include "exprs/access.h" @@ -34,7 +36,12 @@ typedef void *yyscan_t; #include "patterns/modifiers/list.h" #include "patterns/tokens/hex.h" #include "patterns/tokens/plain.h" +#include "patterns/tokens/nodes/any.h" +#include "patterns/tokens/nodes/choice.h" +#include "patterns/tokens/nodes/masked.h" +#include "patterns/tokens/nodes/not.h" #include "patterns/tokens/nodes/plain.h" +#include "patterns/tokens/nodes/sequence.h" #include "../../core/logs.h" } @@ -52,11 +59,11 @@ typedef void *yyscan_t; sized_string_t *tmp_cstring; /* Série d'octets reconstituée */ - struct { - bin_t byte; /* Valeur partielle recherchée */ - uint8_t mask; /* Masque associé */ - } semi_mask; - + struct + { + sized_string_t *tmp_values; /* Série d'octets partiels */ + sized_string_t *tmp_masks; /* Masques associés */ + } masked; GScanRule *rule; /* Nouvelle règle à intégrer */ @@ -66,7 +73,8 @@ typedef void *yyscan_t; GScanTokenNode *node; /* Bribe de motif à intégrer */ GSearchPattern *pattern; /* Nouveau motif à considérer */ - GScanTokenModifier *modifier; + GScanTokenModifier *modifier; /* Modificateur pour texte */ + ScanPlainNodeFlags str_flags; /* Fanions pour texte */ GScanExpression *expr; /* Expression de condition */ @@ -114,6 +122,11 @@ YY_DECL; %token NAME +%token NOCASE "nocase" +%token FULLWORD "fullword" +%token PRIVATE "private" + + %token HEX_BYTES %token FULL_MASK %token SEMI_MASK @@ -128,8 +141,8 @@ YY_DECL; %token BRACE_IN BRACE_OUT ASSIGN COLON -%token PLAIN_STRING -%token MASKED_STRING +%token PLAIN_TEXT +%token ESCAPED_TEXT %token TRUE_ "true" %token FALSE_ "false" @@ -204,25 +217,33 @@ YY_DECL; %type <rule> rule -%type <sized_cstring> PLAIN_STRING -%type <pattern> MASKED_STRING +%type <sized_cstring> PLAIN_TEXT +%type <tmp_cstring> ESCAPED_TEXT %type <tmp_cstring> HEX_BYTES %type <unsigned_integer> FULL_MASK -%type <semi_mask> SEMI_MASK +%type <masked> SEMI_MASK %type <tmp_cstring> REGEX_BYTES + +%type <pattern> str_pattern + %type <modifier> modifiers %type <modifier> _modifiers %type <modifier> chained_modifiers %type <modifier> mod_stage %type <modifier> modifier +%type <str_flags> str_flags + + %type <pattern> hex_pattern %type <node> hex_tokens %type <node> hex_token +%type <node> hex_range +%type <node> hex_choices @@ -294,13 +315,21 @@ rules : /* empty */ //rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); } -external : "include" STRING - { - bool __status; - __status = g_content_scanner_include_resource(scanner, $2.data); - if (!__status) - YYERROR; - } + external : "include" PLAIN_TEXT + { + bool __status; + __status = g_content_scanner_include_resource(scanner, $2.data); + if (!__status) + YYERROR; + } + | "include" ESCAPED_TEXT + { + bool __status; + __status = g_content_scanner_include_resource(scanner, $2->data); + if (!__status) + YYERROR; + } + ; rule : RAW_RULE RULE_NAME @@ -320,11 +349,16 @@ rule : RAW_RULE RULE_NAME strings : /* empty */ - | STRINGS COLON string_decls + | STRINGS COLON bytes_decls ; - string_decls : string_decl + bytes_decls : str_pattern + { + if ($1 == NULL) YYERROR; + g_scan_rule_add_local_variable(*built_rule, $1); + g_object_unref(G_OBJECT($1)); + } | hex_pattern { if ($1 == NULL) YYERROR; @@ -332,55 +366,51 @@ strings : /* empty */ g_object_unref(G_OBJECT($1)); } | regex_pattern - | string_decls string_decl - | string_decls hex_pattern + | bytes_decls str_pattern + { + if ($2 == NULL) YYERROR; + g_scan_rule_add_local_variable(*built_rule, $2); + g_object_unref(G_OBJECT($2)); + } + | bytes_decls hex_pattern { if ($2 == NULL) YYERROR; g_scan_rule_add_local_variable(*built_rule, $2); g_object_unref(G_OBJECT($2)); } - | string_decls regex_pattern + | bytes_decls regex_pattern ; -string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers - { - GSearchPattern *__pat; - __pat = g_scan_plain_bytes_new(&$3, NULL, SPBF_NONE); - g_search_pattern_set_name(__pat, $1.data, $1.len); - g_scan_rule_add_local_variable(*built_rule, __pat); - g_object_unref(G_OBJECT(__pat)); - - /* - string_token_t *__token; - //printf("built plain %s\n", $3.cstring); - GBytesPattern *__pat; - __token = create_plain_string_token($3.cstring, $3.len); - printf("token: %p\n", __token); - __pat = g_bytes_pattern_new(); - g_bytes_pattern_append_string(__pat, $3.cstring, $3.len); - g_scan_rule_add_local_variable(*built_rule, $1, G_SEARCH_PATTERN(__pat)); - g_object_unref(G_OBJECT(__pat)); - */ - } - | BYTES_ID ASSIGN MASKED_STRING - { - printf("built %p\n", $3); - /* - GBytesPattern *__pat; - __pat = g_bytes_pattern_new(); - g_search_pattern_set_name(__pat, $1.cstring, $1.len); - g_bytes_pattern_append_string(__pat, "\xd9\x74\x24\xf4", 4); - g_scan_rule_add_local_variable(*built_rule, G_SEARCH_PATTERN(__pat)); - */ - /* - GSearchPattern *__pat; - __pat = G_SEARCH_PATTERN($3); - if (g_search_pattern_prepare(__pat)) - g_scan_rule_add_local_variable(*built_rule, $1, __pat); - g_clear_object(built_pattern); - */ - } - ; + +/** + * Définition de motif en texte brut. + */ + + str_pattern : BYTES_ID ASSIGN PLAIN_TEXT modifiers str_flags + { + GScanTokenNode *node; + + node = g_scan_token_node_plain_new(&$3, $4, $5); + + $$ = g_scan_plain_bytes_new(node); + g_search_pattern_set_name($$, $1.data, $1.len); + + g_object_unref(G_OBJECT(node)); + + } + | BYTES_ID ASSIGN ESCAPED_TEXT modifiers str_flags + { + GScanTokenNode *node; + + node = g_scan_token_node_plain_new($3, $4, $5); + + $$ = g_scan_plain_bytes_new(node); + g_search_pattern_set_name($$, $1.data, $1.len); + + g_object_unref(G_OBJECT(node)); + + } + ; /** @@ -450,23 +480,70 @@ string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers } ; + +/** + * Prise en charge des fanions pour texte. + */ + + str_flags : /* empty */ + { + $$ = SPNF_NONE; + } + | str_flags "nocase" + { + $$ = $1 | SPNF_CASE_INSENSITIVE; + } + | str_flags "fullword" + { + $$ = $1 | SPNF_FULLWORD; + } + | str_flags "private" + { + $$ = $1 | SPNF_PRIVATE; + } + ; + + /** * Définition de motif en hexadécimal. */ hex_pattern : BYTES_ID ASSIGN hex_tokens { - $$ = g_scan_hex_bytes_new($3); + $$ = g_scan_hex_bytes_new($3, false); + g_search_pattern_set_name($$, $1.data, $1.len); + } + | BYTES_ID ASSIGN hex_tokens "private" + { + $$ = g_scan_hex_bytes_new($3, true); g_search_pattern_set_name($$, $1.data, $1.len); } ; hex_tokens : hex_token { + if ($1 == NULL) YYERROR; + $$ = $1; + } | hex_tokens hex_token { + if ($2 == NULL) YYERROR; + + if (!G_IS_SCAN_TOKEN_NODE_SEQUENCE($1)) + { + $$ = g_scan_token_node_sequence_new($1); + g_object_unref(G_OBJECT($1)); + g_scan_token_node_sequence_add(G_SCAN_TOKEN_NODE_SEQUENCE($$), $2); + g_object_unref(G_OBJECT($2)); + } + else + { + $$ = $1; + g_scan_token_node_sequence_add(G_SCAN_TOKEN_NODE_SEQUENCE($$), $2); + g_object_unref(G_OBJECT($2)); + } } ; @@ -477,62 +554,115 @@ string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers } | FULL_MASK { - printf("mask len: %llu\n", $1); + phys_t min; + phys_t max; + + min = $1; + max = $1; + + $$ = g_scan_token_node_any_new(&min, &max); + } | SEMI_MASK { - printf("semi mask: %hhx / %hhx \n", $1.byte, $1.mask); + size_t i; + masked_byte_t byte; + + assert($1.tmp_values->len == $1.tmp_masks->len); + + byte.value = $1.tmp_values->data[0]; + byte.mask = $1.tmp_masks->data[0]; + + $$ = g_scan_token_node_masked_new(&byte); + + for (i = 1; i < $1.tmp_values->len; i++) + { + byte.value = $1.tmp_values->data[i]; + byte.mask = $1.tmp_masks->data[i]; + + g_scan_token_node_masked_add(G_SCAN_TOKEN_NODE_MASKED($$), &byte); + + } + } | hex_range { - printf("...range...\n"); + $$ = $1; } | "~" hex_token { - - printf("hex -- NOT --\n"); + $$ = g_scan_token_node_not_new($2); } - | "(" hex_token "|" hex_token ")" + | "(" hex_choices ")" { - - printf("hex -- OR --\n"); - + $$ = $2; } ; hex_range : "[" "-" "]" { - - printf("got inf range\n"); - + $$ = g_scan_token_node_any_new(NULL, NULL); } | "[" UNSIGNED_INTEGER "]" { + phys_t min; + phys_t max; + + min = $2; + max = $2; - printf("got range [%llu]\n", $2); + $$ = g_scan_token_node_any_new(&min, &max); } | "[" UNSIGNED_INTEGER "-" "]" { + phys_t min; + + min = $2; - printf("got range [%llu -> ]\n", $2); + $$ = g_scan_token_node_any_new(&min, NULL); } | "[" "-" UNSIGNED_INTEGER "]" { + phys_t max; - printf("got range [ -> %llu]\n", $3); + max = $3; + + $$ = g_scan_token_node_any_new(NULL, &max); } | "[" UNSIGNED_INTEGER "-" UNSIGNED_INTEGER "]" { + phys_t min; + phys_t max; + + min = $2; + max = $4; - printf("got range [%llu -> %llu]\n", $2, $4); + $$ = g_scan_token_node_any_new(&min, &max); } ; + hex_choices : hex_token "|" hex_token + { + $$ = g_scan_token_node_choice_new(); + g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $1); + g_object_unref(G_OBJECT($1)); + g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $3); + g_object_unref(G_OBJECT($3)); + } + | hex_choices "|" hex_token + { + $$ = $1; + g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $3); + g_object_unref(G_OBJECT($3)); + } + ; + + /** * Définition de motif sous forme d'expression régulière */ |