From 72b3287318b203ebcdc5bf6e8f07bf99c9b06a71 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Tue, 12 Sep 2023 08:11:47 +0200
Subject: Handle the meta section in ROST rule definitions.

---
 src/analysis/scan/grammar.y |  94 ++++++++++++++++++++-------------
 src/analysis/scan/tokens.l  | 123 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 166 insertions(+), 51 deletions(-)

diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y
index 64bcd5b..66d3f87 100644
--- a/src/analysis/scan/grammar.y
+++ b/src/analysis/scan/grammar.y
@@ -112,7 +112,13 @@ YY_DECL;
 %token RAW_RULE
 %token RULE_NAME
 
-%token STRINGS CONDITION
+%token META "meta"
+%token STRINGS "strings"
+%token CONDITION "condition"
+
+%token INFO_KEY
+
+
 
 %token BYTES_ID
 %token BYTES_ID_COUNTER
@@ -138,7 +144,9 @@ YY_DECL;
 
 
 
-%token BRACE_IN BRACE_OUT ASSIGN COLON
+%token BRACE_IN BRACE_OUT
+%token ASSIGN "="
+%token COLON ":"
 
 
 %token PLAIN_TEXT
@@ -202,6 +210,7 @@ YY_DECL;
 
 %type <sized_cstring> RULE_NAME
 
+%type <sized_cstring> INFO_KEY
 
 %type <sized_cstring> BYTES_ID
 %type <sized_cstring> BYTES_ID_COUNTER
@@ -293,27 +302,15 @@ YY_DECL;
 
 %%
 
+             rules : /* empty */
+                   | external rules
+                   | rule rules { g_content_scanner_add_rule(scanner, $1); }
+                   ;
 
 
- /*
-
-
-<raw_block>[ \t\n]+             { }
-<raw_block>"{"                  {
-                                    read_block(temp);
-                                    yylvalp->cstring = temp; return RAW_BLOCK;
-                                }
-<raw_block>"}"                  { yy_pop_state(); }
-
-  */
-
-
-rules : /* empty */
-      | external rules
-      | rule rules { g_content_scanner_add_rule(scanner, $1); }
-
-        //rule : RAW_RULE RULE_NAME { printf("RULE %s\n", $2); } RAW_BLOCK { printf("BLOCK: %s\n", $4); }
-
+/**
+ * Inclusion d'une règle externe.
+ */
 
           external : "include" PLAIN_TEXT
                    {
@@ -332,26 +329,51 @@ rules : /* empty */
                    ;
 
 
-rule : RAW_RULE RULE_NAME
-     {
-         //printf("--------built rule '%s'\n", $2.data);
-         *built_rule = g_scan_rule_new($2.data);
-         $<rule>$ = *built_rule;
-     }
-     BRACE_IN strings condition BRACE_OUT
-     {
-         $$ = $<rule>3;
-         //printf("RULE %s -> %p\n", $2, $$);
-         //printf("end of rule\n");
-     } 
+/**
+ * Définition de règle.
+ */
+
+              rule : RAW_RULE RULE_NAME
+                   {
+                       *built_rule = g_scan_rule_new($2.data);
+                       $<rule>$ = *built_rule;
+                   }
+                   BRACE_IN meta strings condition BRACE_OUT
+                   {
+                       $$ = $<rule>3;
+                   }
+                   ;
+
+
+/**
+ * Section "meta:" d'une définition de règle.
+ */
+
+              meta : /* empty */
+                   | "meta" ":"
+                   | "meta" ":" meta_list
+                   ;
 
+         meta_list : meta_info
+                   | meta_list meta_info
+                   ;
 
+         meta_info : INFO_KEY "=" "true"
+                   | INFO_KEY "=" "false"
+                   | INFO_KEY "=" SIGNED_INTEGER
+                   | INFO_KEY "=" UNSIGNED_INTEGER
+                   | INFO_KEY "=" PLAIN_TEXT
+                   | INFO_KEY "=" ESCAPED_TEXT
+                   ;
 
 
-strings : /* empty */
-        | STRINGS COLON bytes_decls
-        ;
+/**
+ * Section "bytes:" d'une définition de règle.
+ */
 
+           strings : /* empty */
+                   | STRINGS COLON bytes_decls
+                   ;
 
        bytes_decls : str_pattern
                    {
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l
index 594d4d9..1cf65fb 100644
--- a/src/analysis/scan/tokens.l
+++ b/src/analysis/scan/tokens.l
@@ -327,6 +327,9 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
 %x rule_intro
 %x raw_block
 
+%x meta
+%x meta_value
+
 %x strings
 %x bytes_value
 %x bytes_value_raw
@@ -399,25 +402,119 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
 
 %{ /* Définition locale d'une règle */ %}
 
+                             "rule" {
+                                        PUSH_STATE(rule_intro);
+                                        return RAW_RULE;
+                                    }
 
-"rule"                          { PUSH_STATE(rule_intro); return RAW_RULE; }
+          <rule_intro>[A-Za-z0-9_]+ {
+                                        yylval->sized_cstring.data = yytext;
+                                        yylval->sized_cstring.len = yyleng;
+                                        return RULE_NAME;
+                                    }
 
-<rule_intro>[A-Za-z0-9_]+       {
-                                    yylval->sized_cstring.data = yytext;
-                                    yylval->sized_cstring.len = yyleng;
-                                    return RULE_NAME;
-                                }
+                 <rule_intro>[ \t]* { }
+
+                    <rule_intro>"{" {
+                                        POP_STATE;
+                                        PUSH_STATE(raw_block);
+                                        return BRACE_IN;
+                                    }
+
+                  <raw_block>"meta" {
+                                        POP_STATE;
+                                        PUSH_STATE(meta);
+                                        PUSH_STATE(wait_for_colon);
+                                        return META;
+                                    }
+          <raw_block,meta>"strings" {
+                                        POP_STATE;
+                                        PUSH_STATE(strings);
+                                        PUSH_STATE(wait_for_colon);
+                                        return STRINGS;
+                                    }
+<raw_block,meta,strings>"condition" {
+                                        POP_STATE;
+                                        PUSH_STATE(condition);
+                                        PUSH_STATE(wait_for_colon);
+                                        return CONDITION;
+                                    }
+
+                <wait_for_colon>":" {
+                                        POP_STATE;
+                                        return COLON;
+                                    }
+
+<raw_block,meta,strings,condition>"}" {
+                                        POP_STATE;
+                                        return BRACE_OUT;
+                                    }
+
+
+%{ /* Définitions communes pour la section "meta:" */ %}
+
+                   <meta>{bytes_id} {
+                                        yylval->sized_cstring.data = yytext;
+                                        yylval->sized_cstring.len = yyleng;
+                                        return INFO_KEY;
+                                    }
+
+                          <meta>"=" { PUSH_STATE(meta_value); return ASSIGN; }
+
+                 <meta_value>"true" { POP_STATE; return TRUE_; }
+                <meta_value>"false" { POP_STATE; return FALSE_; }
+
+       <meta_value>-(0|[1-9][0-9]*) {
+                                        POP_STATE;
+                                        yylval->signed_integer = strtoll(yytext, NULL, 10);
+                                        return SIGNED_INTEGER;
+                                    }
+
+           <meta_value>-0x[0-9a-f]+ {
+                                        POP_STATE;
+                                        yylval->signed_integer = strtoll(yytext, NULL, 16);
+                                        return SIGNED_INTEGER;
+                                    }
+
+        <meta_value>(0|[1-9][0-9]*) {
+                                        POP_STATE;
+                                        yylval->unsigned_integer = strtoull(yytext, NULL, 10);
+                                        return UNSIGNED_INTEGER;
+                                    }
+
+            <meta_value>0x[0-9a-f]+ {
+                                        POP_STATE;
+                                        yylval->unsigned_integer = strtoull(yytext, NULL, 16);
+                                        return UNSIGNED_INTEGER;
+                                    }
+
+ <meta_value>\"{str_not_escaped}+\" {
+                                        POP_STATE;
+
+                                        yylval->sized_cstring.data = yytext + 1;
+                                        yylval->sized_cstring.len = yyleng - 2;
 
-<rule_intro>[ \t]*              {  }
-<rule_intro>"{"                 { POP_STATE; PUSH_STATE(raw_block); return BRACE_IN; }
+                                        return PLAIN_TEXT;
+                                    }
 
-<raw_block>"strings"            { POP_STATE; PUSH_STATE(strings); PUSH_STATE(wait_for_colon); return STRINGS; }
-<raw_block,strings>"condition"          { POP_STATE; PUSH_STATE(condition); PUSH_STATE(wait_for_colon); return CONDITION; }
+       <meta_value>\"{str_mixed}+\" {
+                                        POP_STATE;
 
+                                        rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0);
 
+#ifndef NDEBUG
+                                        /* Pour rendre plus lisibles les impressions de débogage */
+                                        tmp_0->data[tmp_0->len] = '\0';
+#endif
 
+                                        yylval->tmp_cstring = tmp_0;
+
+                                        return ESCAPED_TEXT;
+                                    }
 
 
+%{ /* A déplacer... */ %}
+
 
 <condition>"true"               { return TRUE_; }
 <condition>"false"              { return FALSE_; }
@@ -713,7 +810,7 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
                                         return HOOK_O;
                                     }
 
-               <bytes_regex_range>"]" {
+             <bytes_regex_range>"]" {
                                         POP_STATE;
                                         printf(" !! exiting range\n");
                                         return HOOK_C;
@@ -870,10 +967,6 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
 
 
 
-<wait_for_colon>":"             { POP_STATE; return COLON; }
-
-<raw_block,strings,condition>"}"                  { POP_STATE; return BRACE_OUT; }
-
 
 
 %{ /* Commentaires */ %}
-- 
cgit v0.11.2-87-g4458