summaryrefslogtreecommitdiff
path: root/src/analysis/scan/grammar.y
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis/scan/grammar.y')
-rw-r--r--src/analysis/scan/grammar.y487
1 files changed, 487 insertions, 0 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y
new file mode 100644
index 0000000..ab64ad8
--- /dev/null
+++ b/src/analysis/scan/grammar.y
@@ -0,0 +1,487 @@
+
+%{
+
+#include "decl.h"
+#include "tokens.h"
+
+
+/* Affiche un message d'erreur suite à l'analyse en échec. */
+static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *);
+
+%}
+
+
+%code requires {
+
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+
+#include "scanner.h"
+#include "conds/counter.h"
+#include "exprs/arithmop.h"
+#include "exprs/boolop.h"
+#include "exprs/call.h"
+#include "exprs/literal.h"
+#include "exprs/str.h"
+#include "exprs/relop.h"
+#include "patterns/tokens/plain.h"
+
+
+#if 0 /////////////////////////////////////////////////////////////////////////::
+#define handle_coder_conversions(c, r) \
+ ({ \
+ encoding_spec *__spec; \
+ encoding_syntax *__syntax; \
+ conv_list *__list; \
+ bool __status; \
+ __spec = get_current_encoding_spec(c); \
+ __syntax = get_current_encoding_syntax(__spec); \
+ __list = get_conversions_in_encoding_syntax(__syntax); \
+ __status = load_convs_from_raw_block(__list, r); \
+ if (!__status) YYABORT; \
+ })
+#endif ///////////////////////////////////////////////////////////////////////////
+
+}
+
+%union {
+
+ //char *string; /* Chaîne de caractères #1 */
+ const char *cstring; /* Chaîne de caractères #2 */
+ unsigned long long integer; /* Valeur entière */
+
+ struct {
+ const char *cstring; /* Chaîne de caractères #3 */
+ size_t len; /* Taille correspondante */
+ } sized_cstring;
+
+ GScanRule *rule; /* Nouvelle règle à intégrer */
+ void/*GBytesPattern*/ *pattern; /* Nouveau motif à considérer */
+ GScanExpression *expr; /* Expression de condition */
+
+ struct {
+ GScanExpression **args; /* Liste d'arguments à fournir */
+ size_t count; /* Quantité de ces arguments */
+ } args_list;
+
+}
+
+
+/**
+ * Cf.
+ * http://stackoverflow.com/questions/34418381/how-to-reference-lex-or-parse-parameters-in-flex-rules/34420950
+ */
+
+%define api.pure full
+
+%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
+%lex-param { yyscan_t yyscanner } { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
+
+%code provides {
+
+#define YY_DECL \
+ int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used)
+
+YY_DECL;
+
+}
+
+
+%token RAW_RULE
+%token RULE_NAME
+
+%token STRINGS CONDITION
+%token IDENTIFIER
+%token NAME
+
+%token BRACE_IN BRACE_OUT ASSIGN COLON
+
+
+%token RAW_BLOCK
+
+%token PLAIN_STRING
+%token MASKED_STRING
+
+%token TRUE_ "true"
+%token FALSE_ "false"
+%token INTEGER
+%token STRING
+
+%token KB MB GB
+
+%token AND "and"
+%token OR "or"
+%token NOT "not"
+
+%token LT "<"
+%token LE "<="
+%token EQ "=="
+%token NE "!="
+%token GT ">"
+%token GE ">="
+
+%token CONTAINS "contains"
+%token STARTSWITH "startswith"
+%token ENDSWITH "endswith"
+%token MATCHES "matches"
+%token ICONTAINS "icontains"
+%token ISTARTSWITH "istartswith"
+%token IENDSWITH "iendswith"
+%token IEQUALS "iequals"
+
+%token PLUS "+"
+%token MINUS "-"
+%token MUL "*"
+%token DIV "\\"
+%token MOD "%"
+
+%token PAREN_O "("
+%token PAREN_C ")"
+%token COMMA ","
+%token DOT "."
+
+%token NONE "none"
+%token ANY "any"
+%token ALL "all"
+%token OF "of"
+%token THEM "them"
+
+
+%type <cstring> RULE_NAME
+%type <cstring> RAW_BLOCK
+
+
+%type <sized_cstring> IDENTIFIER
+%type <sized_cstring> NAME
+
+
+%type <integer> INTEGER
+%type <cstring> STRING
+
+%type <rule> rule
+
+%type <sized_cstring> PLAIN_STRING
+%type <pattern> MASKED_STRING
+
+%type <expr> cexpression
+%type <expr> literal
+%type <expr> callable
+%type <args_list> call_args
+%type <expr> bool_expr
+%type <expr> rel_expr
+%type <expr> str_expr
+%type <expr> arithm_expr
+%type <expr> set_counter
+
+
+
+%left OR
+%left AND
+%left EQ NE
+%left CONTAINS STARTSWITH ENDSWITH MATCHES ICONTAINS ISTARTSWITH IENDSWITH IEQUALS
+%left LT LE GT GE
+%left PLUS MINUS
+%left MUL DIV MOD
+%right NOT
+
+
+
+
+%destructor { printf("-------- Discarding symbol %p.\n", $$); } <rule>
+
+
+%%
+
+
+
+ /*
+
+
+<raw_block>[ \t\n]+ { }
+<raw_block>"{" {
+ read_block(temp);
+ yylvalp->cstring = temp; return RAW_BLOCK;
+ }
+<raw_block>"}" { yy_pop_state(); }
+
+ */
+
+
+rules : /* empty */
+ | rule rules { g_content_scanner_add_rule(scanner, $1); }
+
+ //rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); }
+
+rule : RAW_RULE RULE_NAME
+ {
+ *built_rule = g_scan_rule_new($2);
+ $<rule>$ = *built_rule;
+ }
+ BRACE_IN strings condition BRACE_OUT
+ {
+ $$ = $<rule>3;
+ //printf("RULE %s -> %p\n", $2, $$);
+ }
+
+
+
+
+strings : /* empty */
+ | STRINGS COLON string_decls
+ ;
+
+
+string_decls : string_decl
+ | string_decls string_decl
+ ;
+
+string_decl : IDENTIFIER ASSIGN PLAIN_STRING
+ {
+ GSearchPattern *__pat;
+ __pat = g_plain_bytes_new((uint8_t *)$3.cstring, $3.len);
+ g_search_pattern_set_name(__pat, $1.cstring, $1.len);
+ g_scan_rule_add_local_variable(*built_rule, __pat);
+ g_object_unref(G_OBJECT(__pat));
+
+ /*
+ string_token_t *__token;
+ //printf("built plain %s\n", $3.cstring);
+ GBytesPattern *__pat;
+ __token = create_plain_string_token($3.cstring, $3.len);
+ printf("token: %p\n", __token);
+ __pat = g_bytes_pattern_new();
+ g_bytes_pattern_append_string(__pat, $3.cstring, $3.len);
+ g_scan_rule_add_local_variable(*built_rule, $1, G_SEARCH_PATTERN(__pat));
+ g_object_unref(G_OBJECT(__pat));
+ */
+ }
+ | IDENTIFIER ASSIGN MASKED_STRING
+ {
+ printf("built %p\n", $3);
+ /*
+ GBytesPattern *__pat;
+ __pat = g_bytes_pattern_new();
+ g_search_pattern_set_name(__pat, $1.cstring, $1.len);
+ g_bytes_pattern_append_string(__pat, "\xd9\x74\x24\xf4", 4);
+ g_scan_rule_add_local_variable(*built_rule, G_SEARCH_PATTERN(__pat));
+ */
+ /*
+ GSearchPattern *__pat;
+ __pat = G_SEARCH_PATTERN($3);
+ if (g_search_pattern_prepare(__pat))
+ g_scan_rule_add_local_variable(*built_rule, $1, __pat);
+ g_clear_object(built_pattern);
+ */
+ }
+ ;
+
+condition : /* empty */
+ | CONDITION COLON cexpression
+ {
+ g_scan_rule_set_match_condition(*built_rule, $3);
+ g_object_ref(G_OBJECT($3));
+ }
+ ;
+
+cexpression : IDENTIFIER
+ {
+ printf("named var: %s\n", "$1");
+ /*
+ GSearchPattern *__pat;
+ GMatchCounter *__counter;
+ __pat = g_scan_rule_get_local_variable(*built_rule, $1);
+ if (__pat != NULL)
+ {
+ __counter = g_match_counter_new(__pat);
+ g_scan_rule_add_condition(*built_rule, G_MATCH_CONDITION(__counter));
+ g_object_unref(G_OBJECT(__counter));
+ g_object_unref(G_OBJECT(__pat));
+ }
+ */
+ }
+ | literal { $$ = $1; }
+ | callable { $$ = $1; }
+ | bool_expr { $$ = $1; }
+ | rel_expr { $$ = $1; }
+ | str_expr { $$ = $1; }
+ | arithm_expr { $$ = $1; }
+ | set_counter { $$ = $1; }
+ | "(" cexpression ")" { $$ = $2; }
+ ;
+
+literal : "true" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+ | "false" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ false }); }
+ | INTEGER { $$ = g_literal_expression_new(EVT_INTEGER, &$1); }
+ | INTEGER KB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1024 }); }
+ | INTEGER MB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1048576 }); }
+ | INTEGER GB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1073741824 }); }
+ | STRING { $$ = g_literal_expression_new(EVT_STRING, $1); }
+ ;
+
+callable : NAME { $$ = g_pending_call_new($1.cstring, $1.len, NULL, 0); }
+ | NAME "(" ")" { $$ = g_pending_call_new($1.cstring, $1.len, NULL, 0); }
+ | NAME "(" call_args ")"
+ {
+ size_t __i;
+ $$ = g_pending_call_new($1.cstring, $1.len, $3.args, $3.count);
+ for (__i = 0; __i < $3.count; __i++)
+ g_object_unref(G_OBJECT($3.args[__i]));
+ free($3.args);
+ }
+ | callable "." NAME
+ {
+ GScanExpression *__next;
+ __next = g_pending_call_new($3.cstring, $3.len, NULL, 0);
+ g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next));
+ $$ = $1;
+ }
+ | callable "." NAME "(" ")"
+ {
+ GScanExpression *__next;
+ __next = g_pending_call_new($3.cstring, $3.len, NULL, 0);
+ g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next));
+ $$ = $1;
+ }
+ | callable "." NAME "(" call_args ")"
+ {
+ GScanExpression *__next;
+ size_t __i;
+ __next = g_pending_call_new($3.cstring, $3.len, $5.args, $5.count);
+ for (__i = 0; __i < $5.count; __i++)
+ g_object_unref(G_OBJECT($5.args[__i]));
+ free($5.args);
+ g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next));
+ $$ = $1;
+ }
+ ;
+
+call_args : cexpression
+ {
+ $$.count = 1;
+ $$.args = malloc(sizeof(GScanExpression *));
+ $$.args[0] = $1;
+ }
+ | call_args "," cexpression
+ {
+ $1.count++;
+ $1.args = realloc($1.args, $1.count * sizeof(GScanExpression *));
+ $1.args[$1.count - 1] = $3;
+ $$ = $1;
+ }
+ ;
+
+bool_expr : cexpression "and" cexpression { $$ = g_boolean_operation_new(BOT_AND, $1, $3); }
+ | cexpression "or" cexpression { $$ = g_boolean_operation_new(BOT_OR, $1, $3); }
+ | "not" "(" cexpression ")" { $$ = g_boolean_operation_new(BOT_NOT, $3, NULL); }
+ ;
+
+rel_expr : cexpression "<" cexpression { $$ = g_relational_operation_new(RCO_LT, $1, $3); }
+ | cexpression "<=" cexpression { $$ = g_relational_operation_new(RCO_LE, $1, $3); }
+ | cexpression "==" cexpression { $$ = g_relational_operation_new(RCO_EQ, $1, $3); }
+ | cexpression "!=" cexpression { $$ = g_relational_operation_new(RCO_NE, $1, $3); }
+ | cexpression ">" cexpression { $$ = g_relational_operation_new(RCO_GT, $1, $3); }
+ | cexpression ">=" cexpression { $$ = g_relational_operation_new(RCO_GT, $1, $3); }
+ ;
+
+str_expr : cexpression "contains" cexpression { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, true); }
+ | cexpression "startswith" cexpression { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, true); }
+ | cexpression "endswith" cexpression { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, true); }
+ | cexpression "matches" cexpression { $$ = g_string_operation_new(SOT_MATCHES, $1, $3, true); }
+ | cexpression "icontains" cexpression { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, false); }
+ | cexpression "istartswith" cexpression { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, false); }
+ | cexpression "iendswith" cexpression { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, false); }
+ | cexpression "iequals" cexpression { $$ = g_string_operation_new(SOT_IEQUALS, $1, $3, false); }
+ ;
+
+arithm_expr : cexpression "+" cexpression { $$ = g_arithmetic_operation_new(AEO_PLUS, $1, $3); }
+ | cexpression "-" cexpression { $$ = g_arithmetic_operation_new(AEO_MINUS, $1, $3); }
+ | cexpression "*" cexpression { $$ = g_arithmetic_operation_new(AEO_MUL, $1, $3); }
+ | cexpression "\\" cexpression { $$ = g_arithmetic_operation_new(AEO_DIV, $1, $3); }
+ | cexpression "%" cexpression { $$ = g_arithmetic_operation_new(AEO_MOD, $1, $3); }
+ ;
+
+set_counter : "none" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+ | "any" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+ | "all" "of" "them" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+ ;
+
+%%
+
+
+/******************************************************************************
+* *
+* Paramètres : scanner = décodeur impliqué dans le processus. *
+* temp = zone de travail à destination des lectures. *
+* msg = message d'erreur. *
+* *
+* Description : Affiche un message d'erreur suite à l'analyse en échec. *
+* *
+* Retour : 0 *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg)
+{
+ printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg);
+
+ return 0;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : scanner = chercheur de motifs à préparer. *
+* text = définitions des règles à charger. *
+* length = longueur de ces définitions. *
+* *
+* Description : Complète une recherche de motifs avec des règles. *
+* *
+* Retour : Bilan à retourner. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+bool process_rules_definitions(GContentScanner *scanner, const char *text, size_t length)
+{
+ bool result; /* Bilan à renvoyer */
+ GScanRule *built_rule; /* Règle en construction */
+ void /*GBytesPattern*/ *built_pattern; /* Motif en construction */
+ char *buf; /* Zone de travail temporaire */
+ size_t allocated; /* Taille de mémoire allouée */
+ size_t used; /* Quantité utilisée */
+ yyscan_t lexstate; /* Gestion d'analyse lexicale */
+ YY_BUFFER_STATE state; /* Contexte d'analyse */
+ int status; /* Bilan d'une analyse */
+
+ result = false;
+
+ built_rule = NULL;
+ built_pattern = NULL;
+
+ allocated = 256;
+ used = 0;
+
+ buf = malloc(allocated * sizeof(char));
+ buf[0] = '\0';
+
+ rost_lex_init(&lexstate);
+
+ state = rost__scan_bytes(text, length, lexstate);
+
+ status = yyparse(scanner, lexstate, &built_rule, &built_pattern, &buf, &allocated, &used);
+
+ result = (status == EXIT_SUCCESS);
+
+ yy_delete_buffer(state, lexstate);
+
+ rost_lex_destroy(lexstate);
+
+ free(buf);
+
+ return result;
+
+}