diff options
author | Cyrille Bagard <nocbos@gmail.com> | 2023-01-30 06:59:35 (GMT) |
---|---|---|
committer | Cyrille Bagard <nocbos@gmail.com> | 2023-01-30 06:59:35 (GMT) |
commit | db3b204dd7a71b2f74a4e69b2159a96e3ab66614 (patch) | |
tree | 34174311b7ac504f03a10a889ada7f28db7a06c0 /src/analysis/scan/grammar.y | |
parent | 34ee1bfca78e8423cfa29329fdc756569d6b1960 (diff) |
Save an initial version of rost.
Diffstat (limited to 'src/analysis/scan/grammar.y')
-rw-r--r-- | src/analysis/scan/grammar.y | 487 |
1 files changed, 487 insertions, 0 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y new file mode 100644 index 0000000..ab64ad8 --- /dev/null +++ b/src/analysis/scan/grammar.y @@ -0,0 +1,487 @@ + +%{ + +#include "decl.h" +#include "tokens.h" + + +/* Affiche un message d'erreur suite à l'analyse en échec. */ +static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *); + +%} + + +%code requires { + +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; + +#include "scanner.h" +#include "conds/counter.h" +#include "exprs/arithmop.h" +#include "exprs/boolop.h" +#include "exprs/call.h" +#include "exprs/literal.h" +#include "exprs/str.h" +#include "exprs/relop.h" +#include "patterns/tokens/plain.h" + + +#if 0 /////////////////////////////////////////////////////////////////////////:: +#define handle_coder_conversions(c, r) \ + ({ \ + encoding_spec *__spec; \ + encoding_syntax *__syntax; \ + conv_list *__list; \ + bool __status; \ + __spec = get_current_encoding_spec(c); \ + __syntax = get_current_encoding_syntax(__spec); \ + __list = get_conversions_in_encoding_syntax(__syntax); \ + __status = load_convs_from_raw_block(__list, r); \ + if (!__status) YYABORT; \ + }) +#endif /////////////////////////////////////////////////////////////////////////// + +} + +%union { + + //char *string; /* Chaîne de caractères #1 */ + const char *cstring; /* Chaîne de caractères #2 */ + unsigned long long integer; /* Valeur entière */ + + struct { + const char *cstring; /* Chaîne de caractères #3 */ + size_t len; /* Taille correspondante */ + } sized_cstring; + + GScanRule *rule; /* Nouvelle règle à intégrer */ + void/*GBytesPattern*/ *pattern; /* Nouveau motif à considérer */ + GScanExpression *expr; /* Expression de condition */ + + struct { + GScanExpression **args; /* Liste d'arguments à fournir */ + size_t count; /* Quantité de ces arguments */ + } args_list; + +} + + +/** + * Cf. + * http://stackoverflow.com/questions/34418381/how-to-reference-lex-or-parse-parameters-in-flex-rules/34420950 + */ + +%define api.pure full + +%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } +%lex-param { yyscan_t yyscanner } { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } + +%code provides { + +#define YY_DECL \ + int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used) + +YY_DECL; + +} + + +%token RAW_RULE +%token RULE_NAME + +%token STRINGS CONDITION +%token IDENTIFIER +%token NAME + +%token BRACE_IN BRACE_OUT ASSIGN COLON + + +%token RAW_BLOCK + +%token PLAIN_STRING +%token MASKED_STRING + +%token TRUE_ "true" +%token FALSE_ "false" +%token INTEGER +%token STRING + +%token KB MB GB + +%token AND "and" +%token OR "or" +%token NOT "not" + +%token LT "<" +%token LE "<=" +%token EQ "==" +%token NE "!=" +%token GT ">" +%token GE ">=" + +%token CONTAINS "contains" +%token STARTSWITH "startswith" +%token ENDSWITH "endswith" +%token MATCHES "matches" +%token ICONTAINS "icontains" +%token ISTARTSWITH "istartswith" +%token IENDSWITH "iendswith" +%token IEQUALS "iequals" + +%token PLUS "+" +%token MINUS "-" +%token MUL "*" +%token DIV "\\" +%token MOD "%" + +%token PAREN_O "(" +%token PAREN_C ")" +%token COMMA "," +%token DOT "." + +%token NONE "none" +%token ANY "any" +%token ALL "all" +%token OF "of" +%token THEM "them" + + +%type <cstring> RULE_NAME +%type <cstring> RAW_BLOCK + + +%type <sized_cstring> IDENTIFIER +%type <sized_cstring> NAME + + +%type <integer> INTEGER +%type <cstring> STRING + +%type <rule> rule + +%type <sized_cstring> PLAIN_STRING +%type <pattern> MASKED_STRING + +%type <expr> cexpression +%type <expr> literal +%type <expr> callable +%type <args_list> call_args +%type <expr> bool_expr +%type <expr> rel_expr +%type <expr> str_expr +%type <expr> arithm_expr +%type <expr> set_counter + + + +%left OR +%left AND +%left EQ NE +%left CONTAINS STARTSWITH ENDSWITH MATCHES ICONTAINS ISTARTSWITH IENDSWITH IEQUALS +%left LT LE GT GE +%left PLUS MINUS +%left MUL DIV MOD +%right NOT + + + + +%destructor { printf("-------- Discarding symbol %p.\n", $$); } <rule> + + +%% + + + + /* + + +<raw_block>[ \t\n]+ { } +<raw_block>"{" { + read_block(temp); + yylvalp->cstring = temp; return RAW_BLOCK; + } +<raw_block>"}" { yy_pop_state(); } + + */ + + +rules : /* empty */ + | rule rules { g_content_scanner_add_rule(scanner, $1); } + + //rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); } + +rule : RAW_RULE RULE_NAME + { + *built_rule = g_scan_rule_new($2); + $<rule>$ = *built_rule; + } + BRACE_IN strings condition BRACE_OUT + { + $$ = $<rule>3; + //printf("RULE %s -> %p\n", $2, $$); + } + + + + +strings : /* empty */ + | STRINGS COLON string_decls + ; + + +string_decls : string_decl + | string_decls string_decl + ; + +string_decl : IDENTIFIER ASSIGN PLAIN_STRING + { + GSearchPattern *__pat; + __pat = g_plain_bytes_new((uint8_t *)$3.cstring, $3.len); + g_search_pattern_set_name(__pat, $1.cstring, $1.len); + g_scan_rule_add_local_variable(*built_rule, __pat); + g_object_unref(G_OBJECT(__pat)); + + /* + string_token_t *__token; + //printf("built plain %s\n", $3.cstring); + GBytesPattern *__pat; + __token = create_plain_string_token($3.cstring, $3.len); + printf("token: %p\n", __token); + __pat = g_bytes_pattern_new(); + g_bytes_pattern_append_string(__pat, $3.cstring, $3.len); + g_scan_rule_add_local_variable(*built_rule, $1, G_SEARCH_PATTERN(__pat)); + g_object_unref(G_OBJECT(__pat)); + */ + } + | IDENTIFIER ASSIGN MASKED_STRING + { + printf("built %p\n", $3); + /* + GBytesPattern *__pat; + __pat = g_bytes_pattern_new(); + g_search_pattern_set_name(__pat, $1.cstring, $1.len); + g_bytes_pattern_append_string(__pat, "\xd9\x74\x24\xf4", 4); + g_scan_rule_add_local_variable(*built_rule, G_SEARCH_PATTERN(__pat)); + */ + /* + GSearchPattern *__pat; + __pat = G_SEARCH_PATTERN($3); + if (g_search_pattern_prepare(__pat)) + g_scan_rule_add_local_variable(*built_rule, $1, __pat); + g_clear_object(built_pattern); + */ + } + ; + +condition : /* empty */ + | CONDITION COLON cexpression + { + g_scan_rule_set_match_condition(*built_rule, $3); + g_object_ref(G_OBJECT($3)); + } + ; + +cexpression : IDENTIFIER + { + printf("named var: %s\n", "$1"); + /* + GSearchPattern *__pat; + GMatchCounter *__counter; + __pat = g_scan_rule_get_local_variable(*built_rule, $1); + if (__pat != NULL) + { + __counter = g_match_counter_new(__pat); + g_scan_rule_add_condition(*built_rule, G_MATCH_CONDITION(__counter)); + g_object_unref(G_OBJECT(__counter)); + g_object_unref(G_OBJECT(__pat)); + } + */ + } + | literal { $$ = $1; } + | callable { $$ = $1; } + | bool_expr { $$ = $1; } + | rel_expr { $$ = $1; } + | str_expr { $$ = $1; } + | arithm_expr { $$ = $1; } + | set_counter { $$ = $1; } + | "(" cexpression ")" { $$ = $2; } + ; + +literal : "true" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } + | "false" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ false }); } + | INTEGER { $$ = g_literal_expression_new(EVT_INTEGER, &$1); } + | INTEGER KB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1024 }); } + | INTEGER MB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1048576 }); } + | INTEGER GB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1073741824 }); } + | STRING { $$ = g_literal_expression_new(EVT_STRING, $1); } + ; + +callable : NAME { $$ = g_pending_call_new($1.cstring, $1.len, NULL, 0); } + | NAME "(" ")" { $$ = g_pending_call_new($1.cstring, $1.len, NULL, 0); } + | NAME "(" call_args ")" + { + size_t __i; + $$ = g_pending_call_new($1.cstring, $1.len, $3.args, $3.count); + for (__i = 0; __i < $3.count; __i++) + g_object_unref(G_OBJECT($3.args[__i])); + free($3.args); + } + | callable "." NAME + { + GScanExpression *__next; + __next = g_pending_call_new($3.cstring, $3.len, NULL, 0); + g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next)); + $$ = $1; + } + | callable "." NAME "(" ")" + { + GScanExpression *__next; + __next = g_pending_call_new($3.cstring, $3.len, NULL, 0); + g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next)); + $$ = $1; + } + | callable "." NAME "(" call_args ")" + { + GScanExpression *__next; + size_t __i; + __next = g_pending_call_new($3.cstring, $3.len, $5.args, $5.count); + for (__i = 0; __i < $5.count; __i++) + g_object_unref(G_OBJECT($5.args[__i])); + free($5.args); + g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next)); + $$ = $1; + } + ; + +call_args : cexpression + { + $$.count = 1; + $$.args = malloc(sizeof(GScanExpression *)); + $$.args[0] = $1; + } + | call_args "," cexpression + { + $1.count++; + $1.args = realloc($1.args, $1.count * sizeof(GScanExpression *)); + $1.args[$1.count - 1] = $3; + $$ = $1; + } + ; + +bool_expr : cexpression "and" cexpression { $$ = g_boolean_operation_new(BOT_AND, $1, $3); } + | cexpression "or" cexpression { $$ = g_boolean_operation_new(BOT_OR, $1, $3); } + | "not" "(" cexpression ")" { $$ = g_boolean_operation_new(BOT_NOT, $3, NULL); } + ; + +rel_expr : cexpression "<" cexpression { $$ = g_relational_operation_new(RCO_LT, $1, $3); } + | cexpression "<=" cexpression { $$ = g_relational_operation_new(RCO_LE, $1, $3); } + | cexpression "==" cexpression { $$ = g_relational_operation_new(RCO_EQ, $1, $3); } + | cexpression "!=" cexpression { $$ = g_relational_operation_new(RCO_NE, $1, $3); } + | cexpression ">" cexpression { $$ = g_relational_operation_new(RCO_GT, $1, $3); } + | cexpression ">=" cexpression { $$ = g_relational_operation_new(RCO_GT, $1, $3); } + ; + +str_expr : cexpression "contains" cexpression { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, true); } + | cexpression "startswith" cexpression { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, true); } + | cexpression "endswith" cexpression { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, true); } + | cexpression "matches" cexpression { $$ = g_string_operation_new(SOT_MATCHES, $1, $3, true); } + | cexpression "icontains" cexpression { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, false); } + | cexpression "istartswith" cexpression { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, false); } + | cexpression "iendswith" cexpression { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, false); } + | cexpression "iequals" cexpression { $$ = g_string_operation_new(SOT_IEQUALS, $1, $3, false); } + ; + +arithm_expr : cexpression "+" cexpression { $$ = g_arithmetic_operation_new(AEO_PLUS, $1, $3); } + | cexpression "-" cexpression { $$ = g_arithmetic_operation_new(AEO_MINUS, $1, $3); } + | cexpression "*" cexpression { $$ = g_arithmetic_operation_new(AEO_MUL, $1, $3); } + | cexpression "\\" cexpression { $$ = g_arithmetic_operation_new(AEO_DIV, $1, $3); } + | cexpression "%" cexpression { $$ = g_arithmetic_operation_new(AEO_MOD, $1, $3); } + ; + +set_counter : "none" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } + | "any" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } + | "all" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); } + ; + +%% + + +/****************************************************************************** +* * +* Paramètres : scanner = décodeur impliqué dans le processus. * +* temp = zone de travail à destination des lectures. * +* msg = message d'erreur. * +* * +* Description : Affiche un message d'erreur suite à l'analyse en échec. * +* * +* Retour : 0 * +* * +* Remarques : - * +* * +******************************************************************************/ + +static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg) +{ + printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg); + + return 0; + +} + + +/****************************************************************************** +* * +* Paramètres : scanner = chercheur de motifs à préparer. * +* text = définitions des règles à charger. * +* length = longueur de ces définitions. * +* * +* Description : Complète une recherche de motifs avec des règles. * +* * +* Retour : Bilan à retourner. * +* * +* Remarques : - * +* * +******************************************************************************/ + +bool process_rules_definitions(GContentScanner *scanner, const char *text, size_t length) +{ + bool result; /* Bilan à renvoyer */ + GScanRule *built_rule; /* Règle en construction */ + void /*GBytesPattern*/ *built_pattern; /* Motif en construction */ + char *buf; /* Zone de travail temporaire */ + size_t allocated; /* Taille de mémoire allouée */ + size_t used; /* Quantité utilisée */ + yyscan_t lexstate; /* Gestion d'analyse lexicale */ + YY_BUFFER_STATE state; /* Contexte d'analyse */ + int status; /* Bilan d'une analyse */ + + result = false; + + built_rule = NULL; + built_pattern = NULL; + + allocated = 256; + used = 0; + + buf = malloc(allocated * sizeof(char)); + buf[0] = '\0'; + + rost_lex_init(&lexstate); + + state = rost__scan_bytes(text, length, lexstate); + + status = yyparse(scanner, lexstate, &built_rule, &built_pattern, &buf, &allocated, &used); + + result = (status == EXIT_SUCCESS); + + yy_delete_buffer(state, lexstate); + + rost_lex_destroy(lexstate); + + free(buf); + + return result; + +} |