summaryrefslogtreecommitdiff
path: root/src/analysis/scan/grammar.y
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis/scan/grammar.y')
-rw-r--r--src/analysis/scan/grammar.y288
1 files changed, 209 insertions, 79 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y
index c0aa52d..64bcd5b 100644
--- a/src/analysis/scan/grammar.y
+++ b/src/analysis/scan/grammar.y
@@ -16,6 +16,8 @@ static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *,
#define YY_TYPEDEF_YY_SCANNER_T
typedef void *yyscan_t;
+#include <assert.h>
+
#include "core.h"
#include "scanner.h"
#include "exprs/access.h"
@@ -34,7 +36,12 @@ typedef void *yyscan_t;
#include "patterns/modifiers/list.h"
#include "patterns/tokens/hex.h"
#include "patterns/tokens/plain.h"
+#include "patterns/tokens/nodes/any.h"
+#include "patterns/tokens/nodes/choice.h"
+#include "patterns/tokens/nodes/masked.h"
+#include "patterns/tokens/nodes/not.h"
#include "patterns/tokens/nodes/plain.h"
+#include "patterns/tokens/nodes/sequence.h"
#include "../../core/logs.h"
}
@@ -52,11 +59,11 @@ typedef void *yyscan_t;
sized_string_t *tmp_cstring; /* Série d'octets reconstituée */
- struct {
- bin_t byte; /* Valeur partielle recherchée */
- uint8_t mask; /* Masque associé */
- } semi_mask;
-
+ struct
+ {
+ sized_string_t *tmp_values; /* Série d'octets partiels */
+ sized_string_t *tmp_masks; /* Masques associés */
+ } masked;
GScanRule *rule; /* Nouvelle règle à intégrer */
@@ -66,7 +73,8 @@ typedef void *yyscan_t;
GScanTokenNode *node; /* Bribe de motif à intégrer */
GSearchPattern *pattern; /* Nouveau motif à considérer */
- GScanTokenModifier *modifier;
+ GScanTokenModifier *modifier; /* Modificateur pour texte */
+ ScanPlainNodeFlags str_flags; /* Fanions pour texte */
GScanExpression *expr; /* Expression de condition */
@@ -114,6 +122,11 @@ YY_DECL;
%token NAME
+%token NOCASE "nocase"
+%token FULLWORD "fullword"
+%token PRIVATE "private"
+
+
%token HEX_BYTES
%token FULL_MASK
%token SEMI_MASK
@@ -128,8 +141,8 @@ YY_DECL;
%token BRACE_IN BRACE_OUT ASSIGN COLON
-%token PLAIN_STRING
-%token MASKED_STRING
+%token PLAIN_TEXT
+%token ESCAPED_TEXT
%token TRUE_ "true"
%token FALSE_ "false"
@@ -204,25 +217,33 @@ YY_DECL;
%type <rule> rule
-%type <sized_cstring> PLAIN_STRING
-%type <pattern> MASKED_STRING
+%type <sized_cstring> PLAIN_TEXT
+%type <tmp_cstring> ESCAPED_TEXT
%type <tmp_cstring> HEX_BYTES
%type <unsigned_integer> FULL_MASK
-%type <semi_mask> SEMI_MASK
+%type <masked> SEMI_MASK
%type <tmp_cstring> REGEX_BYTES
+
+%type <pattern> str_pattern
+
%type <modifier> modifiers
%type <modifier> _modifiers
%type <modifier> chained_modifiers
%type <modifier> mod_stage
%type <modifier> modifier
+%type <str_flags> str_flags
+
+
%type <pattern> hex_pattern
%type <node> hex_tokens
%type <node> hex_token
+%type <node> hex_range
+%type <node> hex_choices
@@ -294,13 +315,21 @@ rules : /* empty */
//rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); }
-external : "include" STRING
- {
- bool __status;
- __status = g_content_scanner_include_resource(scanner, $2.data);
- if (!__status)
- YYERROR;
- }
+ external : "include" PLAIN_TEXT
+ {
+ bool __status;
+ __status = g_content_scanner_include_resource(scanner, $2.data);
+ if (!__status)
+ YYERROR;
+ }
+ | "include" ESCAPED_TEXT
+ {
+ bool __status;
+ __status = g_content_scanner_include_resource(scanner, $2->data);
+ if (!__status)
+ YYERROR;
+ }
+ ;
rule : RAW_RULE RULE_NAME
@@ -320,11 +349,16 @@ rule : RAW_RULE RULE_NAME
strings : /* empty */
- | STRINGS COLON string_decls
+ | STRINGS COLON bytes_decls
;
- string_decls : string_decl
+ bytes_decls : str_pattern
+ {
+ if ($1 == NULL) YYERROR;
+ g_scan_rule_add_local_variable(*built_rule, $1);
+ g_object_unref(G_OBJECT($1));
+ }
| hex_pattern
{
if ($1 == NULL) YYERROR;
@@ -332,55 +366,51 @@ strings : /* empty */
g_object_unref(G_OBJECT($1));
}
| regex_pattern
- | string_decls string_decl
- | string_decls hex_pattern
+ | bytes_decls str_pattern
+ {
+ if ($2 == NULL) YYERROR;
+ g_scan_rule_add_local_variable(*built_rule, $2);
+ g_object_unref(G_OBJECT($2));
+ }
+ | bytes_decls hex_pattern
{
if ($2 == NULL) YYERROR;
g_scan_rule_add_local_variable(*built_rule, $2);
g_object_unref(G_OBJECT($2));
}
- | string_decls regex_pattern
+ | bytes_decls regex_pattern
;
-string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers
- {
- GSearchPattern *__pat;
- __pat = g_scan_plain_bytes_new(&$3, NULL, SPBF_NONE);
- g_search_pattern_set_name(__pat, $1.data, $1.len);
- g_scan_rule_add_local_variable(*built_rule, __pat);
- g_object_unref(G_OBJECT(__pat));
-
- /*
- string_token_t *__token;
- //printf("built plain %s\n", $3.cstring);
- GBytesPattern *__pat;
- __token = create_plain_string_token($3.cstring, $3.len);
- printf("token: %p\n", __token);
- __pat = g_bytes_pattern_new();
- g_bytes_pattern_append_string(__pat, $3.cstring, $3.len);
- g_scan_rule_add_local_variable(*built_rule, $1, G_SEARCH_PATTERN(__pat));
- g_object_unref(G_OBJECT(__pat));
- */
- }
- | BYTES_ID ASSIGN MASKED_STRING
- {
- printf("built %p\n", $3);
- /*
- GBytesPattern *__pat;
- __pat = g_bytes_pattern_new();
- g_search_pattern_set_name(__pat, $1.cstring, $1.len);
- g_bytes_pattern_append_string(__pat, "\xd9\x74\x24\xf4", 4);
- g_scan_rule_add_local_variable(*built_rule, G_SEARCH_PATTERN(__pat));
- */
- /*
- GSearchPattern *__pat;
- __pat = G_SEARCH_PATTERN($3);
- if (g_search_pattern_prepare(__pat))
- g_scan_rule_add_local_variable(*built_rule, $1, __pat);
- g_clear_object(built_pattern);
- */
- }
- ;
+
+/**
+ * Définition de motif en texte brut.
+ */
+
+ str_pattern : BYTES_ID ASSIGN PLAIN_TEXT modifiers str_flags
+ {
+ GScanTokenNode *node;
+
+ node = g_scan_token_node_plain_new(&$3, $4, $5);
+
+ $$ = g_scan_plain_bytes_new(node);
+ g_search_pattern_set_name($$, $1.data, $1.len);
+
+ g_object_unref(G_OBJECT(node));
+
+ }
+ | BYTES_ID ASSIGN ESCAPED_TEXT modifiers str_flags
+ {
+ GScanTokenNode *node;
+
+ node = g_scan_token_node_plain_new($3, $4, $5);
+
+ $$ = g_scan_plain_bytes_new(node);
+ g_search_pattern_set_name($$, $1.data, $1.len);
+
+ g_object_unref(G_OBJECT(node));
+
+ }
+ ;
/**
@@ -450,23 +480,70 @@ string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers
}
;
+
+/**
+ * Prise en charge des fanions pour texte.
+ */
+
+ str_flags : /* empty */
+ {
+ $$ = SPNF_NONE;
+ }
+ | str_flags "nocase"
+ {
+ $$ = $1 | SPNF_CASE_INSENSITIVE;
+ }
+ | str_flags "fullword"
+ {
+ $$ = $1 | SPNF_FULLWORD;
+ }
+ | str_flags "private"
+ {
+ $$ = $1 | SPNF_PRIVATE;
+ }
+ ;
+
+
/**
* Définition de motif en hexadécimal.
*/
hex_pattern : BYTES_ID ASSIGN hex_tokens
{
- $$ = g_scan_hex_bytes_new($3);
+ $$ = g_scan_hex_bytes_new($3, false);
+ g_search_pattern_set_name($$, $1.data, $1.len);
+ }
+ | BYTES_ID ASSIGN hex_tokens "private"
+ {
+ $$ = g_scan_hex_bytes_new($3, true);
g_search_pattern_set_name($$, $1.data, $1.len);
}
;
hex_tokens : hex_token
{
+ if ($1 == NULL) YYERROR;
+
$$ = $1;
+
}
| hex_tokens hex_token
{
+ if ($2 == NULL) YYERROR;
+
+ if (!G_IS_SCAN_TOKEN_NODE_SEQUENCE($1))
+ {
+ $$ = g_scan_token_node_sequence_new($1);
+ g_object_unref(G_OBJECT($1));
+ g_scan_token_node_sequence_add(G_SCAN_TOKEN_NODE_SEQUENCE($$), $2);
+ g_object_unref(G_OBJECT($2));
+ }
+ else
+ {
+ $$ = $1;
+ g_scan_token_node_sequence_add(G_SCAN_TOKEN_NODE_SEQUENCE($$), $2);
+ g_object_unref(G_OBJECT($2));
+ }
}
;
@@ -477,62 +554,115 @@ string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers
}
| FULL_MASK
{
- printf("mask len: %llu\n", $1);
+ phys_t min;
+ phys_t max;
+
+ min = $1;
+ max = $1;
+
+ $$ = g_scan_token_node_any_new(&min, &max);
+
}
| SEMI_MASK
{
- printf("semi mask: %hhx / %hhx \n", $1.byte, $1.mask);
+ size_t i;
+ masked_byte_t byte;
+
+ assert($1.tmp_values->len == $1.tmp_masks->len);
+
+ byte.value = $1.tmp_values->data[0];
+ byte.mask = $1.tmp_masks->data[0];
+
+ $$ = g_scan_token_node_masked_new(&byte);
+
+ for (i = 1; i < $1.tmp_values->len; i++)
+ {
+ byte.value = $1.tmp_values->data[i];
+ byte.mask = $1.tmp_masks->data[i];
+
+ g_scan_token_node_masked_add(G_SCAN_TOKEN_NODE_MASKED($$), &byte);
+
+ }
+
}
| hex_range
{
- printf("...range...\n");
+ $$ = $1;
}
| "~" hex_token
{
-
- printf("hex -- NOT --\n");
+ $$ = g_scan_token_node_not_new($2);
}
- | "(" hex_token "|" hex_token ")"
+ | "(" hex_choices ")"
{
-
- printf("hex -- OR --\n");
-
+ $$ = $2;
}
;
hex_range : "[" "-" "]"
{
-
- printf("got inf range\n");
-
+ $$ = g_scan_token_node_any_new(NULL, NULL);
}
| "[" UNSIGNED_INTEGER "]"
{
+ phys_t min;
+ phys_t max;
+
+ min = $2;
+ max = $2;
- printf("got range [%llu]\n", $2);
+ $$ = g_scan_token_node_any_new(&min, &max);
}
| "[" UNSIGNED_INTEGER "-" "]"
{
+ phys_t min;
+
+ min = $2;
- printf("got range [%llu -> ]\n", $2);
+ $$ = g_scan_token_node_any_new(&min, NULL);
}
| "[" "-" UNSIGNED_INTEGER "]"
{
+ phys_t max;
- printf("got range [ -> %llu]\n", $3);
+ max = $3;
+
+ $$ = g_scan_token_node_any_new(NULL, &max);
}
| "[" UNSIGNED_INTEGER "-" UNSIGNED_INTEGER "]"
{
+ phys_t min;
+ phys_t max;
+
+ min = $2;
+ max = $4;
- printf("got range [%llu -> %llu]\n", $2, $4);
+ $$ = g_scan_token_node_any_new(&min, &max);
}
;
+ hex_choices : hex_token "|" hex_token
+ {
+ $$ = g_scan_token_node_choice_new();
+ g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $1);
+ g_object_unref(G_OBJECT($1));
+ g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $3);
+ g_object_unref(G_OBJECT($3));
+ }
+ | hex_choices "|" hex_token
+ {
+ $$ = $1;
+ g_scan_token_node_choice_add(G_SCAN_TOKEN_NODE_CHOICE($$), $3);
+ g_object_unref(G_OBJECT($3));
+ }
+ ;
+
+
/**
* Définition de motif sous forme d'expression régulière
*/