1 files changed, 487 insertions, 0 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y
new file mode 100644
index 0000000..ab64ad8
--- /dev/null
+++ b/src/analysis/scan/grammar.y
@@ -0,0 +1,487 @@
+
+%{
+
+#include "decl.h"
+#include "tokens.h"
+
+
+/* Affiche un message d'erreur suite à l'analyse en échec. */
+static int yyerror(GContentScanner *, yyscan_t, GScanRule **, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *);
+
+%}
+
+
+%code requires {
+
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+
+#include "scanner.h"
+#include "conds/counter.h"
+#include "exprs/arithmop.h"
+#include "exprs/boolop.h"
+#include "exprs/call.h"
+#include "exprs/literal.h"
+#include "exprs/str.h"
+#include "exprs/relop.h"
+#include "patterns/tokens/plain.h"
+
+
+#if 0 /////////////////////////////////////////////////////////////////////////::
+#define handle_coder_conversions(c, r)                              \
+    ({                                                              \
+        encoding_spec *__spec;                                      \
+        encoding_syntax *__syntax;                                  \
+        conv_list *__list;                                          \
+        bool __status;                                              \
+        __spec = get_current_encoding_spec(c);                      \
+        __syntax = get_current_encoding_syntax(__spec);             \
+        __list = get_conversions_in_encoding_syntax(__syntax);      \
+        __status = load_convs_from_raw_block(__list, r);            \
+        if (!__status) YYABORT;                                     \
+    })
+#endif ///////////////////////////////////////////////////////////////////////////
+
+}
+
+%union {
+
+    //char *string;                           /* Chaîne de caractères #1     */
+    const char *cstring;                    /* Chaîne de caractères #2     */
+    unsigned long long integer;             /* Valeur entière              */
+
+    struct {
+        const char *cstring;                /* Chaîne de caractères #3     */
+        size_t len;                         /* Taille correspondante       */
+    } sized_cstring;
+
+    GScanRule *rule;                        /* Nouvelle règle à intégrer   */
+    void/*GBytesPattern*/ *pattern;                 /* Nouveau motif à considérer  */
+    GScanExpression *expr;                  /* Expression de condition     */
+
+    struct {
+        GScanExpression **args;             /* Liste d'arguments à fournir */
+        size_t count;                       /* Quantité de ces arguments   */
+    } args_list;
+
+}
+
+
+/**
+ * Cf.
+ * http://stackoverflow.com/questions/34418381/how-to-reference-lex-or-parse-parameters-in-flex-rules/34420950
+ */
+
+%define api.pure full
+
+%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
+%lex-param { yyscan_t yyscanner } { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
+
+%code provides {
+
+#define YY_DECL \
+    int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used)
+
+YY_DECL;
+
+}
+
+
+%token RAW_RULE
+%token RULE_NAME
+
+%token STRINGS CONDITION
+%token IDENTIFIER
+%token NAME
+
+%token BRACE_IN BRACE_OUT ASSIGN COLON
+
+
+%token RAW_BLOCK
+
+%token PLAIN_STRING
+%token MASKED_STRING
+
+%token TRUE_            "true"
+%token FALSE_           "false"
+%token INTEGER
+%token STRING
+
+%token KB MB GB
+
+%token AND              "and"
+%token OR               "or"
+%token NOT              "not"
+
+%token LT               "<"
+%token LE               "<="
+%token EQ               "=="
+%token NE               "!="
+%token GT               ">"
+%token GE               ">="
+
+%token CONTAINS     "contains"
+%token STARTSWITH   "startswith"
+%token ENDSWITH     "endswith"
+%token MATCHES      "matches"
+%token ICONTAINS    "icontains"
+%token ISTARTSWITH  "istartswith"
+%token IENDSWITH    "iendswith"
+%token IEQUALS      "iequals"
+
+%token PLUS             "+"
+%token MINUS            "-"
+%token MUL              "*"
+%token DIV              "\\"
+%token MOD              "%"
+
+%token PAREN_O          "("
+%token PAREN_C          ")"
+%token COMMA            ","
+%token DOT              "."
+
+%token NONE             "none"
+%token ANY              "any"
+%token ALL              "all"
+%token OF               "of"
+%token THEM             "them"
+
+
+%type <cstring> RULE_NAME
+%type <cstring> RAW_BLOCK
+
+
+%type <sized_cstring> IDENTIFIER
+%type <sized_cstring> NAME
+
+
+%type <integer> INTEGER
+%type <cstring> STRING
+
+%type <rule> rule
+
+%type <sized_cstring> PLAIN_STRING
+%type <pattern> MASKED_STRING
+
+%type <expr> cexpression
+%type <expr> literal
+%type <expr> callable
+%type <args_list> call_args
+%type <expr> bool_expr
+%type <expr> rel_expr
+%type <expr> str_expr
+%type <expr> arithm_expr
+%type <expr> set_counter
+
+
+
+%left OR
+%left AND
+%left EQ NE
+%left CONTAINS STARTSWITH ENDSWITH MATCHES ICONTAINS ISTARTSWITH IENDSWITH IEQUALS
+%left LT LE GT GE
+%left PLUS MINUS
+%left MUL DIV MOD
+%right NOT
+
+
+
+
+%destructor { printf("-------- Discarding symbol %p.\n", $$); } <rule>
+
+
+%%
+
+
+
+ /*
+
+
+<raw_block>[ \t\n]+             { }
+<raw_block>"{"                  {
+                                    read_block(temp);
+                                    yylvalp->cstring = temp; return RAW_BLOCK;
+                                }
+<raw_block>"}"                  { yy_pop_state(); }
+
+  */
+
+
+rules : /* empty */
+      | rule rules { g_content_scanner_add_rule(scanner, $1); }
+
+        //rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); }
+
+rule : RAW_RULE RULE_NAME
+     {
+         *built_rule = g_scan_rule_new($2);
+         $<rule>$ = *built_rule;
+     }
+     BRACE_IN strings condition BRACE_OUT
+     {
+         $$ = $<rule>3;
+         //printf("RULE %s -> %p\n", $2, $$);
+     } 
+
+
+
+
+strings : /* empty */
+        | STRINGS COLON string_decls
+        ;
+
+
+string_decls : string_decl
+             | string_decls string_decl
+             ;
+
+string_decl : IDENTIFIER ASSIGN PLAIN_STRING
+            {
+                GSearchPattern *__pat;
+                __pat = g_plain_bytes_new((uint8_t *)$3.cstring, $3.len);
+                g_search_pattern_set_name(__pat, $1.cstring, $1.len);
+                g_scan_rule_add_local_variable(*built_rule, __pat);
+                g_object_unref(G_OBJECT(__pat));
+
+                /*
+                string_token_t *__token;
+                //printf("built plain %s\n", $3.cstring);
+                GBytesPattern *__pat;
+                __token = create_plain_string_token($3.cstring, $3.len);
+                printf("token: %p\n", __token);
+                __pat = g_bytes_pattern_new();
+                g_bytes_pattern_append_string(__pat, $3.cstring, $3.len);
+                g_scan_rule_add_local_variable(*built_rule, $1, G_SEARCH_PATTERN(__pat));
+                g_object_unref(G_OBJECT(__pat));
+                */
+            }
+            | IDENTIFIER ASSIGN MASKED_STRING
+            {
+                printf("built %p\n", $3);
+                /*
+                GBytesPattern *__pat;
+                __pat = g_bytes_pattern_new();
+                g_search_pattern_set_name(__pat, $1.cstring, $1.len);
+                g_bytes_pattern_append_string(__pat, "\xd9\x74\x24\xf4", 4);
+                g_scan_rule_add_local_variable(*built_rule, G_SEARCH_PATTERN(__pat));
+                */
+                /*
+                GSearchPattern *__pat;
+                __pat = G_SEARCH_PATTERN($3);
+                if (g_search_pattern_prepare(__pat))
+                    g_scan_rule_add_local_variable(*built_rule, $1, __pat);
+                g_clear_object(built_pattern);
+                */
+            }
+            ;
+
+condition : /* empty */
+          | CONDITION COLON cexpression
+          {
+              g_scan_rule_set_match_condition(*built_rule, $3);
+              g_object_ref(G_OBJECT($3));
+          }
+          ;
+
+cexpression : IDENTIFIER
+            {
+                printf("named var: %s\n", "$1");
+                /*
+                   GSearchPattern *__pat;
+                   GMatchCounter *__counter;
+                   __pat = g_scan_rule_get_local_variable(*built_rule, $1);
+                   if (__pat != NULL)
+                   {
+                       __counter = g_match_counter_new(__pat);
+                       g_scan_rule_add_condition(*built_rule, G_MATCH_CONDITION(__counter));
+                       g_object_unref(G_OBJECT(__counter));
+                       g_object_unref(G_OBJECT(__pat));
+                   }
+                */
+            }
+            | literal { $$ = $1; }
+            | callable { $$ = $1; }
+            | bool_expr { $$ = $1; }
+            | rel_expr { $$ = $1; }
+            | str_expr { $$ = $1; }
+            | arithm_expr { $$ = $1; }
+            | set_counter { $$ = $1; }
+            | "(" cexpression ")" { $$ = $2; }
+            ;
+
+literal : "true" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+        | "false" { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ false }); }
+        | INTEGER { $$ = g_literal_expression_new(EVT_INTEGER, &$1); }
+        | INTEGER KB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1024 }); }
+        | INTEGER MB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1048576 }); }
+        | INTEGER GB { $$ = g_literal_expression_new(EVT_INTEGER, (unsigned long long []){ $1 * 1073741824 }); }
+        | STRING { $$ = g_literal_expression_new(EVT_STRING, $1); }
+        ;
+
+callable : NAME { $$ = g_pending_call_new($1.cstring, $1.len, NULL, 0); }
+         | NAME "(" ")" { $$ = g_pending_call_new($1.cstring, $1.len, NULL, 0); }
+         | NAME "(" call_args ")"
+         {
+             size_t __i;
+             $$ = g_pending_call_new($1.cstring, $1.len, $3.args, $3.count);
+             for (__i = 0; __i < $3.count; __i++)
+                 g_object_unref(G_OBJECT($3.args[__i]));
+             free($3.args);
+         }
+         | callable "." NAME
+         {
+             GScanExpression *__next;
+             __next = g_pending_call_new($3.cstring, $3.len, NULL, 0);
+             g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next));
+             $$ = $1;
+         }
+         | callable "." NAME "(" ")"
+         {
+             GScanExpression *__next;
+             __next = g_pending_call_new($3.cstring, $3.len, NULL, 0);
+             g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next));
+             $$ = $1;
+         }
+         | callable "." NAME "(" call_args ")"
+         {
+             GScanExpression *__next;
+             size_t __i;
+             __next = g_pending_call_new($3.cstring, $3.len, $5.args, $5.count);
+             for (__i = 0; __i < $5.count; __i++)
+                 g_object_unref(G_OBJECT($5.args[__i]));
+             free($5.args);
+             g_pending_call_attach_next(G_PENDING_CALL($1), G_PENDING_CALL(__next));
+             $$ = $1;
+         }
+         ;
+
+call_args : cexpression
+          {
+              $$.count = 1;
+              $$.args = malloc(sizeof(GScanExpression *));
+              $$.args[0] = $1;
+          }
+          | call_args "," cexpression
+          {
+              $1.count++;
+              $1.args = realloc($1.args, $1.count * sizeof(GScanExpression *));
+              $1.args[$1.count - 1] = $3;
+              $$ = $1;
+          }
+          ;
+
+bool_expr : cexpression "and" cexpression { $$ = g_boolean_operation_new(BOT_AND, $1, $3); }
+          | cexpression "or" cexpression  { $$ = g_boolean_operation_new(BOT_OR, $1, $3); }
+          | "not" "(" cexpression ")"     { $$ = g_boolean_operation_new(BOT_NOT, $3, NULL); }
+          ;
+
+rel_expr : cexpression "<" cexpression  { $$ = g_relational_operation_new(RCO_LT, $1, $3); }
+         | cexpression "<=" cexpression { $$ = g_relational_operation_new(RCO_LE, $1, $3); }
+         | cexpression "==" cexpression { $$ = g_relational_operation_new(RCO_EQ, $1, $3); }
+         | cexpression "!=" cexpression { $$ = g_relational_operation_new(RCO_NE, $1, $3); }
+         | cexpression ">" cexpression  { $$ = g_relational_operation_new(RCO_GT, $1, $3); }
+         | cexpression ">=" cexpression { $$ = g_relational_operation_new(RCO_GT, $1, $3); }
+         ;
+
+str_expr : cexpression "contains" cexpression    { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, true); }
+         | cexpression "startswith" cexpression  { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, true); }
+         | cexpression "endswith" cexpression    { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, true); }
+         | cexpression "matches" cexpression     { $$ = g_string_operation_new(SOT_MATCHES, $1, $3, true); }
+         | cexpression "icontains" cexpression   { $$ = g_string_operation_new(SOT_CONTAINS, $1, $3, false); }
+         | cexpression "istartswith" cexpression { $$ = g_string_operation_new(SOT_STARTSWITH, $1, $3, false); }
+         | cexpression "iendswith" cexpression   { $$ = g_string_operation_new(SOT_ENDSWITH, $1, $3, false); }
+         | cexpression "iequals" cexpression     { $$ = g_string_operation_new(SOT_IEQUALS, $1, $3, false); }
+         ;
+
+arithm_expr : cexpression "+" cexpression  { $$ = g_arithmetic_operation_new(AEO_PLUS, $1, $3); }
+            | cexpression "-" cexpression  { $$ = g_arithmetic_operation_new(AEO_MINUS, $1, $3); }
+            | cexpression "*" cexpression  { $$ = g_arithmetic_operation_new(AEO_MUL, $1, $3); }
+            | cexpression "\\" cexpression { $$ = g_arithmetic_operation_new(AEO_DIV, $1, $3); }
+            | cexpression "%" cexpression  { $$ = g_arithmetic_operation_new(AEO_MOD, $1, $3); }
+            ;
+
+set_counter : "none" "of" "them"  { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+            | "any" "of" "them"  { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+            | "all" "of" "them"  { $$ = g_literal_expression_new(EVT_BOOLEAN, (bool []){ true }); }
+            ;
+
+%%
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : scanner = décodeur impliqué dans le processus.               *
+*                temp    = zone de travail à destination des lectures.        *
+*                msg     = message d'erreur.                                  *
+*                                                                             *
+*  Description : Affiche un message d'erreur suite à l'analyse en échec.      *
+*                                                                             *
+*  Retour      : 0                                                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg)
+{
+	printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg);
+
+	return 0;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : scanner = chercheur de motifs à préparer.                    *
+*                text    = définitions des règles à charger.                  *
+*                length  = longueur de ces définitions.                       *
+*                                                                             *
+*  Description : Complète une recherche de motifs avec des règles.            *
+*                                                                             *
+*  Retour      : Bilan à retourner.                                           *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+bool process_rules_definitions(GContentScanner *scanner, const char *text, size_t length)
+{
+    bool result;                            /* Bilan à renvoyer            */
+    GScanRule *built_rule;                  /* Règle en construction       */
+    void /*GBytesPattern*/ *built_pattern;           /* Motif en construction       */
+    char *buf;                              /* Zone de travail temporaire  */
+    size_t allocated;                       /* Taille de mémoire allouée   */
+    size_t used;                            /* Quantité utilisée           */
+    yyscan_t lexstate;                      /* Gestion d'analyse lexicale  */
+    YY_BUFFER_STATE state;                  /* Contexte d'analyse          */
+    int status;                             /* Bilan d'une analyse         */
+
+    result = false;
+
+    built_rule = NULL;
+    built_pattern = NULL;
+
+    allocated = 256;
+    used = 0;
+
+    buf = malloc(allocated * sizeof(char));
+    buf[0] = '\0';
+
+    rost_lex_init(&lexstate);
+
+    state = rost__scan_bytes(text, length, lexstate);
+
+    status = yyparse(scanner, lexstate, &built_rule, &built_pattern, &buf, &allocated, &used);
+
+    result = (status == EXIT_SUCCESS);
+
+    yy_delete_buffer(state, lexstate);
+
+    rost_lex_destroy(lexstate);
+
+    free(buf);
+
+    return result;
+
+}