summaryrefslogtreecommitdiff
path: root/src/analysis/scan/grammar.y
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis/scan/grammar.y')
-rw-r--r--src/analysis/scan/grammar.y559
1 files changed, 475 insertions, 84 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y
index 10e1d42..c0aa52d 100644
--- a/src/analysis/scan/grammar.y
+++ b/src/analysis/scan/grammar.y
@@ -6,7 +6,7 @@
/* Affiche un message d'erreur suite à l'analyse en échec. */
-static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *);
+static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *);
%}
@@ -16,34 +16,26 @@ static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPatter
#define YY_TYPEDEF_YY_SCANNER_T
typedef void *yyscan_t;
+#include "core.h"
#include "scanner.h"
#include "exprs/access.h"
#include "exprs/arithmetic.h"
#include "exprs/call.h"
#include "exprs/counter.h"
+#include "exprs/handler.h"
#include "exprs/intersect.h"
+#include "exprs/item.h"
#include "exprs/literal.h"
#include "exprs/logical.h"
#include "exprs/set.h"
#include "exprs/relational.h"
#include "exprs/strop.h"
+#include "patterns/modifier.h"
+#include "patterns/modifiers/list.h"
+#include "patterns/tokens/hex.h"
#include "patterns/tokens/plain.h"
-
-
-#if 0 /////////////////////////////////////////////////////////////////////////::
-#define handle_coder_conversions(c, r) \
- ({ \
- encoding_spec *__spec; \
- encoding_syntax *__syntax; \
- conv_list *__list; \
- bool __status; \
- __spec = get_current_encoding_spec(c); \
- __syntax = get_current_encoding_syntax(__spec); \
- __list = get_conversions_in_encoding_syntax(__syntax); \
- __status = load_convs_from_raw_block(__list, r); \
- if (!__status) YYABORT; \
- })
-#endif ///////////////////////////////////////////////////////////////////////////
+#include "patterns/tokens/nodes/plain.h"
+#include "../../core/logs.h"
}
@@ -58,9 +50,25 @@ typedef void *yyscan_t;
+ sized_string_t *tmp_cstring; /* Série d'octets reconstituée */
+
+ struct {
+ bin_t byte; /* Valeur partielle recherchée */
+ uint8_t mask; /* Masque associé */
+ } semi_mask;
+
+
GScanRule *rule; /* Nouvelle règle à intégrer */
- void/*GBytesPattern*/ *pattern; /* Nouveau motif à considérer */
+
+
+
+ GScanTokenNode *node; /* Bribe de motif à intégrer */
+ GSearchPattern *pattern; /* Nouveau motif à considérer */
+
+ GScanTokenModifier *modifier;
+
+
GScanExpression *expr; /* Expression de condition */
struct {
@@ -78,13 +86,13 @@ typedef void *yyscan_t;
%define api.pure full
-%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
-%lex-param { yyscan_t yyscanner } { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
+%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
+%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
%code provides {
#define YY_DECL \
- int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used)
+ int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used)
YY_DECL;
@@ -97,13 +105,26 @@ YY_DECL;
%token RULE_NAME
%token STRINGS CONDITION
-%token IDENTIFIER
+
%token BYTES_ID
%token BYTES_ID_COUNTER
-%token BYTES_ID_LOCATION
+%token BYTES_ID_START
%token BYTES_ID_LENGTH
+%token BYTES_ID_END
%token NAME
+
+%token HEX_BYTES
+%token FULL_MASK
+%token SEMI_MASK
+
+
+%token REGEX_BYTES
+%token REGEX_CLASSES
+%token REGEX_RANGE
+
+
+
%token BRACE_IN BRACE_OUT ASSIGN COLON
@@ -143,11 +164,20 @@ YY_DECL;
%token MUL "*"
%token DIV "/"
%token MOD "%"
+%token TILDE "~"
+
+%token HOOK_O "["
+%token HOOK_C "]"
+
+%token BRACKET_O "{"
+%token BRACKET_C "}"
+%token QUESTION "?"
%token PAREN_O "("
%token PAREN_C ")"
%token COMMA ","
%token DOT "."
+%token PIPE "|"
%token NONE "none"
%token ANY "any"
@@ -160,7 +190,11 @@ YY_DECL;
%type <sized_cstring> RULE_NAME
-%type <sized_cstring> IDENTIFIER BYTES_ID_COUNTER
+%type <sized_cstring> BYTES_ID
+%type <sized_cstring> BYTES_ID_COUNTER
+%type <sized_cstring> BYTES_ID_START
+%type <sized_cstring> BYTES_ID_LENGTH
+%type <sized_cstring> BYTES_ID_END
%type <sized_cstring> NAME
@@ -173,8 +207,27 @@ YY_DECL;
%type <sized_cstring> PLAIN_STRING
%type <pattern> MASKED_STRING
+%type <tmp_cstring> HEX_BYTES
+%type <unsigned_integer> FULL_MASK
+%type <semi_mask> SEMI_MASK
+
+%type <tmp_cstring> REGEX_BYTES
+
+
+%type <modifier> modifiers
+%type <modifier> _modifiers
+%type <modifier> chained_modifiers
+%type <modifier> mod_stage
+%type <modifier> modifier
+
+%type <pattern> hex_pattern
+%type <node> hex_tokens
+%type <node> hex_token
+
+
+
%type <expr> cexpression _cexpression
-%type <expr> pattern_match
+
%type <expr> literal
%type <expr> item_chain
%type <args_list> call_args
@@ -185,7 +238,16 @@ YY_DECL;
%type <expr> set_counter
%type <expr> set
%type <expr> set_items
+%type <expr> set_access
%type <expr> intersection
+%type <expr> pattern_handler
+
+
+
+
+
+%left PIPE
+
%left OR
@@ -200,6 +262,11 @@ YY_DECL;
+%left HOOK_O HOOK_C
+
+
+
+
%destructor { printf("-------- Discarding symbol %p.\n", $$); } <rule>
@@ -257,14 +324,28 @@ strings : /* empty */
;
-string_decls : string_decl
- | string_decls string_decl
- ;
+ string_decls : string_decl
+ | hex_pattern
+ {
+ if ($1 == NULL) YYERROR;
+ g_scan_rule_add_local_variable(*built_rule, $1);
+ g_object_unref(G_OBJECT($1));
+ }
+ | regex_pattern
+ | string_decls string_decl
+ | string_decls hex_pattern
+ {
+ if ($2 == NULL) YYERROR;
+ g_scan_rule_add_local_variable(*built_rule, $2);
+ g_object_unref(G_OBJECT($2));
+ }
+ | string_decls regex_pattern
+ ;
-string_decl : IDENTIFIER ASSIGN PLAIN_STRING
+string_decl : BYTES_ID ASSIGN PLAIN_STRING modifiers
{
GSearchPattern *__pat;
- __pat = g_plain_bytes_new((uint8_t *)$3.data, $3.len);
+ __pat = g_scan_plain_bytes_new(&$3, NULL, SPBF_NONE);
g_search_pattern_set_name(__pat, $1.data, $1.len);
g_scan_rule_add_local_variable(*built_rule, __pat);
g_object_unref(G_OBJECT(__pat));
@@ -281,7 +362,7 @@ string_decl : IDENTIFIER ASSIGN PLAIN_STRING
g_object_unref(G_OBJECT(__pat));
*/
}
- | IDENTIFIER ASSIGN MASKED_STRING
+ | BYTES_ID ASSIGN MASKED_STRING
{
printf("built %p\n", $3);
/*
@@ -301,6 +382,267 @@ string_decl : IDENTIFIER ASSIGN PLAIN_STRING
}
;
+
+/**
+ * Prise en charge des modificateurs.
+ */
+
+ modifiers : /* empty */
+ {
+ $$ = NULL;
+ }
+ | _modifiers
+ {
+
+ // if (...) useless
+
+ }
+ ;
+
+ _modifiers : mod_stage
+ {
+ $$ = $1;
+ }
+ | chained_modifiers
+ {
+ $$ = $1;
+ }
+ ;
+
+ chained_modifiers : _modifiers "|" _modifiers
+ ;
+
+ mod_stage : modifier
+ {
+ $$ = $1;
+ }
+ | mod_stage modifier
+ {
+ bool status;
+
+ if (G_IS_SCAN_MODIFIER_LIST($1))
+ $$ = $1;
+ else
+ {
+ $$ = g_scan_modifier_list_new();
+ g_scan_modifier_list_add(G_SCAN_MODIFIER_LIST($$), $1);
+ }
+
+ status = g_scan_modifier_list_add(G_SCAN_MODIFIER_LIST($$), $2);
+ if (!status)
+ {
+ if (1)
+ log_simple_message(LMT_WARNING, "modifier already taken into account!");
+ g_object_unref(G_OBJECT($2));
+ }
+
+ }
+ ;
+
+ modifier : NAME
+ {
+ $$ = find_scan_token_modifiers_for_name($1.data);
+ if ($$ == NULL) YYERROR;
+ }
+ | "(" chained_modifiers ")"
+ {
+ $$ = $2;
+ }
+ ;
+
+/**
+ * Définition de motif en hexadécimal.
+ */
+
+ hex_pattern : BYTES_ID ASSIGN hex_tokens
+ {
+ $$ = g_scan_hex_bytes_new($3);
+ g_search_pattern_set_name($$, $1.data, $1.len);
+ }
+ ;
+
+ hex_tokens : hex_token
+ {
+ $$ = $1;
+ }
+ | hex_tokens hex_token
+ {
+
+ }
+ ;
+
+ hex_token : HEX_BYTES
+ {
+ $$ = g_scan_token_node_plain_new($1, NULL, SPNF_NONE);
+ }
+ | FULL_MASK
+ {
+ printf("mask len: %llu\n", $1);
+ }
+ | SEMI_MASK
+ {
+ printf("semi mask: %hhx / %hhx \n", $1.byte, $1.mask);
+ }
+ | hex_range
+ {
+ printf("...range...\n");
+ }
+ | "~" hex_token
+ {
+
+ printf("hex -- NOT --\n");
+
+ }
+ | "(" hex_token "|" hex_token ")"
+ {
+
+ printf("hex -- OR --\n");
+
+ }
+ ;
+
+ hex_range : "[" "-" "]"
+ {
+
+ printf("got inf range\n");
+
+ }
+ | "[" UNSIGNED_INTEGER "]"
+ {
+
+ printf("got range [%llu]\n", $2);
+
+ }
+ | "[" UNSIGNED_INTEGER "-" "]"
+ {
+
+ printf("got range [%llu -> ]\n", $2);
+
+ }
+ | "[" "-" UNSIGNED_INTEGER "]"
+ {
+
+ printf("got range [ -> %llu]\n", $3);
+
+ }
+ | "[" UNSIGNED_INTEGER "-" UNSIGNED_INTEGER "]"
+ {
+
+ printf("got range [%llu -> %llu]\n", $2, $4);
+
+ }
+ ;
+
+/**
+ * Définition de motif sous forme d'expression régulière
+ */
+
+ regex_pattern : BYTES_ID ASSIGN regex_tokens
+ {
+
+ }
+ ;
+
+ regex_tokens : regex_token
+ {
+
+ }
+ | regex_tokens regex_token
+ {
+
+ }
+ | "(" regex_tokens_list ")"
+ {
+
+ printf("regex -- OR --\n");
+
+ }
+ | regex_tokens "(" regex_tokens_list ")"
+ {
+
+ printf("regex -- OR --\n");
+
+ }
+ ;
+
+
+ regex_tokens_list : regex_tokens
+ | regex_tokens_list "|" regex_tokens
+ ;
+
+
+ regex_token : _regex_token
+ {
+
+ }
+ | _regex_token regex_repeat
+ {
+
+ }
+ ;
+
+ _regex_token : DOT
+ {
+ printf("reg dot!\n");
+ }
+ | REGEX_BYTES
+ {
+ printf("reg bytes: '%s' (l=%zu)\n", $1->data, $1->len);
+ }
+ | REGEX_CLASSES
+ {
+ printf("reg class!\n");
+ }
+ | "[" REGEX_RANGE "]"
+ {
+ printf("reg range!\n");
+ }
+ ;
+
+ regex_repeat : "*"
+ {
+ printf(" .. repeat: *\n");
+ }
+ | "+"
+ {
+ printf(" .. repeat: +\n");
+ }
+ | "?"
+ {
+ printf(" .. repeat: ?\n");
+ }
+ | "{" UNSIGNED_INTEGER "}"
+ {
+
+ printf(" .. repeat {%llu}\n", $2);
+
+ }
+ | "{" UNSIGNED_INTEGER "," "}"
+ {
+
+ printf(" .. repeat {%llu,}\n", $2);
+
+ }
+ | "{" "," UNSIGNED_INTEGER "}"
+ {
+
+ printf(" .. repeat {,%llu}\n", $3);
+
+ }
+ | "{" UNSIGNED_INTEGER "," UNSIGNED_INTEGER "}"
+ {
+
+ printf(" .. repeat {%llu,%llu}\n", $2, $4);
+
+ }
+ ;
+
+
+
+/**
+ * Définition des conditions.
+ */
+
condition : CONDITION COLON cexpression
{
g_scan_rule_set_match_condition(*built_rule, $3);
@@ -310,49 +652,19 @@ string_decl : IDENTIFIER ASSIGN PLAIN_STRING
cexpression : _cexpression { $$ = $1; if ($$ == NULL) { printf("ERROR !!!\n"); YYERROR; } }
- _cexpression : IDENTIFIER
- {
- printf("named var: %s\n", "$1");
- $$ = NULL;
- /*
- GSearchPattern *__pat;
- GMatchCounter *__counter;
- __pat = g_scan_rule_get_local_variable(*built_rule, $1);
- if (__pat != NULL)
- {
- __counter = g_match_counter_new(__pat);
- g_scan_rule_add_condition(*built_rule, G_MATCH_CONDITION(__counter));
- g_object_unref(G_OBJECT(__counter));
- g_object_unref(G_OBJECT(__pat));
- }
- */
- }
- | literal { $$ = $1; }
- | pattern_match { $$ = $1; }
- | item_chain { $$ = $1; }
- | logical_expr { $$ = $1; }
- | relational_expr { $$ = $1; }
- | string_op { $$ = $1; }
- | arithm_expr { $$ = $1; }
- | set_counter { $$ = $1; }
- | set { $$ = $1; }
- | intersection { $$ = $1; }
- | "(" cexpression ")" { $$ = $2; }
- ;
-
- pattern_match : BYTES_ID_COUNTER
- {
- GSearchPattern *__pat;
- __pat = g_scan_rule_get_local_variable(*built_rule, $1.data);
- if (__pat == NULL)
- $$ = NULL;
- else
- {
- $$ = g_scan_match_counter_new(__pat);
- g_object_unref(G_OBJECT(__pat));
- }
- }
- ;
+ _cexpression : literal { $$ = $1; }
+ | item_chain { $$ = $1; }
+ | logical_expr { $$ = $1; }
+ | relational_expr { $$ = $1; }
+ | string_op { $$ = $1; }
+ | arithm_expr { $$ = $1; }
+ | set_counter { $$ = $1; }
+ | set { $$ = $1; }
+ | set_access { $$ = $1; }
+ | intersection { $$ = $1; }
+ | pattern_handler { $$ = $1; }
+ | "(" cexpression ")" { $$ = $2; }
+ ;
literal : "true"
{
@@ -538,16 +850,83 @@ set_counter : "none" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEA
}
;
- intersection : cexpression "in" cexpression
- {
- $$ = g_scan_sets_intersection_new($1, $3);
- g_object_unref(G_OBJECT($1));
- g_object_unref(G_OBJECT($3));
- }
- ;
-
+ set_access : cexpression "[" cexpression "]"
+ {
+ $$ = g_scan_set_item_new($1, $3);
+ g_object_unref(G_OBJECT($1));
+ g_object_unref(G_OBJECT($3));
+ }
+ ;
+ intersection : cexpression "in" cexpression
+ {
+ $$ = g_scan_sets_intersection_new($1, $3);
+ g_object_unref(G_OBJECT($1));
+ g_object_unref(G_OBJECT($3));
+ }
+ ;
+ pattern_handler : BYTES_ID
+ {
+ GSearchPattern *__pat;
+ __pat = g_scan_rule_get_local_variable(*built_rule, $1.data);
+ if (__pat == NULL)
+ $$ = NULL;
+ else
+ {
+ $$ = g_scan_pattern_handler_new(__pat, SHT_RAW);
+ g_object_unref(G_OBJECT(__pat));
+ }
+ }
+ | BYTES_ID_COUNTER
+ {
+ GSearchPattern *__pat;
+ __pat = g_scan_rule_get_local_variable(*built_rule, $1.data);
+ if (__pat == NULL)
+ $$ = NULL;
+ else
+ {
+ $$ = g_scan_match_counter_new(__pat);
+ g_object_unref(G_OBJECT(__pat));
+ }
+ }
+ | BYTES_ID_START
+ {
+ GSearchPattern *__pat;
+ __pat = g_scan_rule_get_local_variable(*built_rule, $1.data);
+ if (__pat == NULL)
+ $$ = NULL;
+ else
+ {
+ $$ = g_scan_pattern_handler_new(__pat, SHT_START);
+ g_object_unref(G_OBJECT(__pat));
+ }
+ }
+ | BYTES_ID_LENGTH
+ {
+ GSearchPattern *__pat;
+ __pat = g_scan_rule_get_local_variable(*built_rule, $1.data);
+ if (__pat == NULL)
+ $$ = NULL;
+ else
+ {
+ $$ = g_scan_pattern_handler_new(__pat, SHT_LENGTH);
+ g_object_unref(G_OBJECT(__pat));
+ }
+ }
+ | BYTES_ID_END
+ {
+ GSearchPattern *__pat;
+ __pat = g_scan_rule_get_local_variable(*built_rule, $1.data);
+ if (__pat == NULL)
+ $$ = NULL;
+ else
+ {
+ $$ = g_scan_pattern_handler_new(__pat, SHT_END);
+ g_object_unref(G_OBJECT(__pat));
+ }
+ }
+ ;
%%
@@ -566,7 +945,7 @@ set_counter : "none" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEA
* *
******************************************************************************/
-static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg)
+static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg)
{
printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg);
@@ -593,6 +972,8 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_
{
bool result; /* Bilan à renvoyer */
GScanRule *built_rule; /* Règle en construction */
+ sized_string_t tmp_0; /* Zone tampon #1 */
+ sized_string_t tmp_1; /* Zone tampon #2 */
void /*GBytesPattern*/ *built_pattern; /* Motif en construction */
char *buf; /* Zone de travail temporaire */
size_t allocated; /* Taille de mémoire allouée */
@@ -604,6 +985,13 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_
result = false;
built_rule = NULL;
+
+ tmp_0.data = malloc((length + 1) * sizeof(bin_t));
+ tmp_0.len = 0;
+
+ tmp_1.data = malloc((length + 1) * sizeof(bin_t));
+ tmp_1.len = 0;
+
built_pattern = NULL;
allocated = 256;
@@ -616,7 +1004,7 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_
state = rost__scan_bytes(text, length, lexstate);
- status = yyparse(scanner, lexstate, &built_rule, &built_pattern, &buf, &allocated, &used);
+ status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1, &built_pattern, &buf, &allocated, &used);
result = (status == EXIT_SUCCESS);
@@ -624,6 +1012,9 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_
rost_lex_destroy(lexstate);
+ exit_szstr(&tmp_0);
+ exit_szstr(&tmp_1);
+
free(buf);
return result;