%{ #include "decl.h" #include "tokens.h" /* Affiche un message d'erreur suite à l'analyse en échec. */ static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *); %} %code requires { #define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; #include "scanner.h" #include "exprs/access.h" #include "exprs/arithmetic.h" #include "exprs/call.h" #include "exprs/counter.h" #include "exprs/intersect.h" #include "exprs/literal.h" #include "exprs/logical.h" #include "exprs/set.h" #include "exprs/relational.h" #include "exprs/strop.h" #include "patterns/tokens/plain.h" #if 0 /////////////////////////////////////////////////////////////////////////:: #define handle_coder_conversions(c, r) \ ({ \ encoding_spec *__spec; \ encoding_syntax *__syntax; \ conv_list *__list; \ bool __status; \ __spec = get_current_encoding_spec(c); \ __syntax = get_current_encoding_syntax(__spec); \ __list = get_conversions_in_encoding_syntax(__syntax); \ __status = load_convs_from_raw_block(__list, r); \ if (!__status) YYABORT; \ }) #endif /////////////////////////////////////////////////////////////////////////// } %union { unsigned long long unsigned_integer; /* Valeur entière #1 */ signed long long signed_integer; /* Valeur entière #2 */ //double floating_number; /* Valeur à virgule flottante */ sized_string_t sized_cstring; /* Chaîne de caractères */ //char byte; /* Octet unique */ GScanRule *rule; /* Nouvelle règle à intégrer */ void/*GBytesPattern*/ *pattern; /* Nouveau motif à considérer */ GScanExpression *expr; /* Expression de condition */ struct { GScanExpression **args; /* Liste d'arguments à fournir */ size_t count; /* Quantité de ces arguments */ } args_list; } /** * Cf. * http://stackoverflow.com/questions/34418381/how-to-reference-lex-or-parse-parameters-in-flex-rules/34420950 */ %define api.pure full %parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } %lex-param { yyscan_t yyscanner } { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } %code provides { #define YY_DECL \ int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used) YY_DECL; } %token INCLUDE "include" %token RAW_RULE %token RULE_NAME %token STRINGS CONDITION %token IDENTIFIER %token BYTES_ID %token BYTES_ID_COUNTER %token BYTES_ID_LOCATION %token BYTES_ID_LENGTH %token NAME %token BRACE_IN BRACE_OUT ASSIGN COLON %token PLAIN_STRING %token MASKED_STRING %token TRUE_ "true" %token FALSE_ "false" %token SIGNED_INTEGER %token UNSIGNED_INTEGER %token STRING %token KB MB GB %token AND "and" %token OR "or" %token NOT "not" %token LT "<" %token LE "<=" %token EQ "==" %token NE "!=" %token GT ">" %token GE ">=" %token CONTAINS "contains" %token STARTSWITH "startswith" %token ENDSWITH "endswith" %token MATCHES "matches" %token ICONTAINS "icontains" %token ISTARTSWITH "istartswith" %token IENDSWITH "iendswith" %token IEQUALS "iequals" %token PLUS "+" %token MINUS "-" %token MUL "*" %token DIV "/" %token MOD "%" %token PAREN_O "(" %token PAREN_C ")" %token COMMA "," %token DOT "." %token NONE "none" %token ANY "any" %token ALL "all" %token OF "of" %token THEM "them" %token IN "in" %type RULE_NAME %type IDENTIFIER BYTES_ID_COUNTER %type NAME %type SIGNED_INTEGER %type UNSIGNED_INTEGER %type STRING %type rule %type PLAIN_STRING %type MASKED_STRING %type cexpression _cexpression %type pattern_match %type literal %type item_chain %type call_args %type logical_expr %type relational_expr %type string_op %type arithm_expr %type set_counter %type set %type set_items %type intersection %left OR %left AND %left EQ NE %left CONTAINS STARTSWITH ENDSWITH MATCHES ICONTAINS ISTARTSWITH IENDSWITH IEQUALS %left LT LE GT GE %left PLUS MINUS %left MUL DIV MOD %left IN %right NOT %destructor { printf("-------- Discarding symbol %p.\n", $$); } %% /* [ \t\n]+ { } "{" { read_block(temp); yylvalp->cstring = temp; return RAW_BLOCK; } "}" { yy_pop_state(); } */ rules : /* empty */ | external rules | rule rules { g_content_scanner_add_rule(scanner, $1); } //rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); } external : "include" STRING { bool __status; __status = g_content_scanner_include_resource(scanner, $2.data); if (!__status) YYERROR; } rule : RAW_RULE RULE_NAME { //printf("--------built rule '%s'\n", $2.data); *built_rule = g_scan_rule_new($2.data); $$ = *built_rule; } BRACE_IN strings condition BRACE_OUT { $$ = $3; //printf("RULE %s -> %p\n", $2, $$); //printf("end of rule\n"); } strings : /* empty */ | STRINGS COLON string_decls ; string_decls : string_decl | string_decls string_decl ; string_decl : IDENTIFIER ASSIGN PLAIN_STRING { GSearchPattern *__pat; __pat = g_plain_bytes_new((uint8_t *)$3.data, $3.len); g_search_pattern_set_name(__pat, $1.data, $1.len); g_scan_rule_add_local_variable(*built_rule, __pat); g_object_unref(G_OBJECT(__pat)); /* string_token_t *__token; //printf("built plain %s\n", $3.cstring); GBytesPattern *__pat; __token = create_plain_string_token($3.cstring, $3.len); printf("token: %p\n", __token); __pat = g_bytes_pattern_new(); g_bytes_pattern_append_string(__pat, $3.cstring, $3.len); g_scan_rule_add_local_variable(*built_rule, $1, G_SEARCH_PATTERN(__pat)); g_object_unref(G_OBJECT(__pat)); */ } | IDENTIFIER ASSIGN MASKED_STRING { printf("built %p\n", $3); /* GBytesPattern *__pat; __pat = g_bytes_pattern_new(); g_search_pattern_set_name(__pat, $1.cstring, $1.len); g_bytes_pattern_append_string(__pat, "\xd9\x74\x24\xf4", 4); g_scan_rule_add_local_variable(*built_rule, G_SEARCH_PATTERN(__pat)); */ /* GSearchPattern *__pat; __pat = G_SEARCH_PATTERN($3); if (g_search_pattern_prepare(__pat)) g_scan_rule_add_local_variable(*built_rule, $1, __pat); g_clear_object(built_pattern); */ } ; condition : CONDITION COLON cexpression { g_scan_rule_set_match_condition(*built_rule, $3); g_object_unref(G_OBJECT($3)); } ; cexpression : _cexpression { $$ = $1; if ($$ == NULL) { printf("ERROR !!!\n"); YYERROR; } } _cexpression : IDENTIFIER { printf("named var: %s\n", "$1"); /* GSearchPattern *__pat; GMatchCounter *__counter; __pat = g_scan_rule_get_local_variable(*built_rule, $1); if (__pat != NULL) { __counter = g_match_counter_new(__pat); g_scan_rule_add_condition(*built_rule, G_MATCH_CONDITION(__counter)); g_object_unref(G_OBJECT(__counter)); g_object_unref(G_OBJECT(__pat)); } */ } | literal { $$ = $1; } | pattern_match { $$ = $1; } | item_chain { $$ = $1; } | logical_expr { $$ = $1; } | relational_expr { $$ = $1; } | string_op { $$ = $1; } | arithm_expr { $$ = $1; } | set_counter { $$ = $1; } | set { $$ = $1; } | intersection { $$ = $1; } | "(" cexpression ")" { $$ = $2; } ; pattern_match : BYTES_ID_COUNTER { GSearchPattern *__pat; __pat = g_scan_rule_get_local_variable(*built_rule, $1.data); if (__pat == NULL) $$ = NULL; else { $$ = g_scan_match_counter_new(__pat); g_object_unref(G_OBJECT(__pat)); } } ; literal : "true" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); } | "false" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ false }); } | SIGNED_INTEGER { $$ = g_scan_literal_expression_new(LVT_SIGNED_INTEGER, &$1); } | UNSIGNED_INTEGER { $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &$1); } | UNSIGNED_INTEGER KB { unsigned long long __converted; __converted = $1 * 1024; $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); } | UNSIGNED_INTEGER MB { unsigned long long __converted; __converted = $1 * 1048576; $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); } | UNSIGNED_INTEGER GB { unsigned long long __converted; __converted = $1 * 1073741824; $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); } | STRING { $$ = g_scan_literal_expression_new(LVT_STRING, &$1); } ; item_chain : NAME { $$ = g_scan_named_access_new(&$1); } | NAME "(" ")" { $$ = g_scan_pending_call_new(&$1, NULL, 0); } | NAME "(" call_args ")" { size_t __i; $$ = g_scan_pending_call_new(&$1, $3.args, $3.count); for (__i = 0; __i < $3.count; __i++) g_object_unref(G_OBJECT($3.args[__i])); free($3.args); } | item_chain "." NAME { GScanExpression *__next; __next = g_scan_named_access_new(&$3); g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); $$ = $1; } | item_chain "." NAME "(" ")" { GScanExpression *__next; __next = g_scan_pending_call_new(&$3, NULL, 0); g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); $$ = $1; } | item_chain "." NAME "(" call_args ")" { GScanExpression *__next; size_t __i; __next = g_scan_pending_call_new(&$3, $5.args, $5.count); for (__i = 0; __i < $5.count; __i++) g_object_unref(G_OBJECT($5.args[__i])); free($5.args); g_scan_named_access_attach_next(G_SCAN_NAMED_ACCESS($1), G_SCAN_NAMED_ACCESS(__next)); $$ = $1; } ; call_args : cexpression { $$.count = 1; $$.args = malloc(sizeof(GScanExpression *)); $$.args[0] = $1; } | call_args "," cexpression { $1.count++; $1.args = realloc($1.args, $1.count * sizeof(GScanExpression *)); $1.args[$1.count - 1] = $3; $$ = $1; } ; logical_expr : cexpression "and" cexpression { $$ = g_scan_logical_operation_new(BOT_AND, $1, $3); } | cexpression "or" cexpression { $$ = g_scan_logical_operation_new(BOT_OR, $1, $3); } | "not" "(" cexpression ")" { $$ = g_scan_logical_operation_new(BOT_NOT, $3, NULL); } ; relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operation_new(RCO_LT, $1, $3); } | cexpression "<=" cexpression { $$ = g_scan_relational_operation_new(RCO_LE, $1, $3); } | cexpression "==" cexpression { $$ = g_scan_relational_operation_new(RCO_EQ, $1, $3); } | cexpression "!=" cexpression { $$ = g_scan_relational_operation_new(RCO_NE, $1, $3); } | cexpression ">" cexpression { $$ = g_scan_relational_operation_new(RCO_GT, $1, $3); } | cexpression ">=" cexpression { $$ = g_scan_relational_operation_new(RCO_GE, $1, $3); } ; string_op : cexpression "contains" cexpression { $$ = g_scan_string_operation_new(SOT_CONTAINS, $1, $3, true); } | cexpression "startswith" cexpression { $$ = g_scan_string_operation_new(SOT_STARTSWITH, $1, $3, true); } | cexpression "endswith" cexpression { $$ = g_scan_string_operation_new(SOT_ENDSWITH, $1, $3, true); } | cexpression "matches" cexpression { $$ = g_scan_string_operation_new(SOT_MATCHES, $1, $3, true); } | cexpression "icontains" cexpression { $$ = g_scan_string_operation_new(SOT_CONTAINS, $1, $3, false); } | cexpression "istartswith" cexpression { $$ = g_scan_string_operation_new(SOT_STARTSWITH, $1, $3, false); } | cexpression "iendswith" cexpression { $$ = g_scan_string_operation_new(SOT_ENDSWITH, $1, $3, false); } | cexpression "iequals" cexpression { $$ = g_scan_string_operation_new(SOT_IEQUALS, $1, $3, false); } ; arithm_expr : cexpression "+" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_PLUS, $1, $3); } | cexpression "-" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MINUS, $1, $3); } | cexpression "*" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MUL, $1, $3); } | cexpression "/" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_DIV, $1, $3); } | cexpression "%" cexpression { $$ = g_scan_arithmetic_operation_new(AEO_MOD, $1, $3); } ; set_counter : "none" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); } | "any" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); } | "all" "of" "them" { $$ = g_scan_literal_expression_new(LVT_BOOLEAN, (bool []){ true }); } ; set : "(" ")" { $$ = g_scan_generic_set_new(); } | "(" cexpression "," ")" { $$ = g_scan_generic_set_new(); g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $2); g_object_unref(G_OBJECT($2)); } | "(" set_items ")" { $$ = $2; } ; set_items : cexpression "," cexpression { $$ = g_scan_generic_set_new(); g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $1); g_object_unref(G_OBJECT($1)); g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); g_object_unref(G_OBJECT($3)); } | set_items "," cexpression { $$ = $1; g_scan_generic_set_add_item(G_SCAN_GENERIC_SET($$), $3); g_object_unref(G_OBJECT($3)); } ; intersection : cexpression "in" cexpression { $$ = g_scan_sets_intersection_new($1, $3); g_object_unref(G_OBJECT($1)); g_object_unref(G_OBJECT($3)); } ; %% /****************************************************************************** * * * Paramètres : scanner = décodeur impliqué dans le processus. * * temp = zone de travail à destination des lectures. * * msg = message d'erreur. * * * * Description : Affiche un message d'erreur suite à l'analyse en échec. * * * * Retour : 0 * * * * Remarques : - * * * ******************************************************************************/ static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg) { printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg); return 0; } /****************************************************************************** * * * Paramètres : scanner = chercheur de motifs à préparer. * * text = définitions des règles à charger. * * length = longueur de ces définitions. * * * * Description : Complète une recherche de motifs avec des règles. * * * * Retour : Bilan à retourner. * * * * Remarques : - * * * ******************************************************************************/ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_t length) { bool result; /* Bilan à renvoyer */ GScanRule *built_rule; /* Règle en construction */ void /*GBytesPattern*/ *built_pattern; /* Motif en construction */ char *buf; /* Zone de travail temporaire */ size_t allocated; /* Taille de mémoire allouée */ size_t used; /* Quantité utilisée */ yyscan_t lexstate; /* Gestion d'analyse lexicale */ YY_BUFFER_STATE state; /* Contexte d'analyse */ int status; /* Bilan d'une analyse */ result = false; built_rule = NULL; built_pattern = NULL; allocated = 256; used = 0; buf = malloc(allocated * sizeof(char)); buf[0] = '\0'; rost_lex_init(&lexstate); state = rost__scan_bytes(text, length, lexstate); status = yyparse(scanner, lexstate, &built_rule, &built_pattern, &buf, &allocated, &used); result = (status == EXIT_SUCCESS); yy_delete_buffer(state, lexstate); rost_lex_destroy(lexstate); free(buf); return result; }