diff options
| author | Cyrille Bagard <nocbos@gmail.com> | 2023-01-30 06:59:35 (GMT) | 
|---|---|---|
| committer | Cyrille Bagard <nocbos@gmail.com> | 2023-01-30 06:59:35 (GMT) | 
| commit | db3b204dd7a71b2f74a4e69b2159a96e3ab66614 (patch) | |
| tree | 34174311b7ac504f03a10a889ada7f28db7a06c0 /src/analysis/scan/tokens.l | |
| parent | 34ee1bfca78e8423cfa29329fdc756569d6b1960 (diff) | |
Save an initial version of rost.
Diffstat (limited to 'src/analysis/scan/tokens.l')
| -rw-r--r-- | src/analysis/scan/tokens.l | 306 | 
1 files changed, 306 insertions, 0 deletions
| diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l new file mode 100644 index 0000000..92a5340 --- /dev/null +++ b/src/analysis/scan/tokens.l @@ -0,0 +1,306 @@ + +%top { + +#include "grammar.h" + +} + + +%{ + +//#include "manual.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> + + +#define read_block(tmp)                                                     \ +    ({                                                                      \ +        unsigned int __depth;                                               \ +        bool __is_string;                                                   \ +        char *__iter;                                                       \ +                                                                            \ +        __depth = 1;                                                        \ +        __is_string = false;                                                \ +                                                                            \ +        for (__iter = temp; __depth > 0; __iter += (__depth > 0 ? 1 : 0))   \ +        {                                                                   \ +            *__iter = input();                                              \ +                                                                            \ +            switch (*__iter)                                                \ +            {                                                               \ +                case '"':                                                   \ +                    __is_string = !__is_string;                             \ +                    break;                                                  \ +                                                                            \ +                case '{':                                                   \ +                    if (!__is_string) __depth++;                            \ +                    break;                                                  \ +                                                                            \ +                case '}':                                                   \ +                    if (!__is_string)                                       \ +                    {                                                       \ +                        __depth--;                                          \ +                        if (__depth == 0) unput('}');                       \ +                    }                                                       \ +                    break;                                                  \ +                                                                            \ +            }                                                               \ +                                                                            \ +        }                                                                   \ +                                                                            \ +        *__iter = '\0';                                                     \ +                                                                            \ +    }) + + + + +#define PUSH_STATE(s) yy_push_state(s, yyscanner) +#define POP_STATE     yy_pop_state(yyscanner) + + + +#define EXTEND_BUFFER_IF_NEEDED(extra)      \ +    if ((*used + extra) > *allocated)       \ +    {                                       \ +        *allocated *= 2;                    \ +        *buf = realloc(*buf, *allocated);   \ +    } + + +%} + + +%option bison-bridge reentrant +%option stack +%option nounput +%option noinput +%option noyywrap +%option noyy_top_state +%option yylineno +%option never-interactive + + +%x rule_intro +%x raw_block + +%x strings +%x strval +%x strval_raw +%x strval_hex + +%x condition +%x strlit + +%x wait_for_colon + + +%x comment + + +%% + + + + +"rule"                          { PUSH_STATE(rule_intro); return RAW_RULE; } + +<rule_intro>[A-Za-z0-9_]+       {   yylval->cstring = yytext; return RULE_NAME; } + +<rule_intro>[ \t]*              {  } +<rule_intro>"{"                 { POP_STATE; PUSH_STATE(raw_block); return BRACE_IN; } + +<raw_block>"strings"            { PUSH_STATE(strings); PUSH_STATE(wait_for_colon); return STRINGS; } +<raw_block,strings>"condition"          { PUSH_STATE(condition); PUSH_STATE(wait_for_colon); return CONDITION; } + + + + + + +<condition>"true"               { return TRUE_; } +<condition>"false"              { return FALSE_; } + +<condition>(0|[1-9][0-9]*)      { yylval->integer = strtoull(yytext, NULL, 10); return INTEGER; } +<condition>0x[0-9a-f]+          { yylval->integer = strtoull(yytext, NULL, 16); return INTEGER; } + +<condition>[kK][bB]             { return KB; } +<condition>[mM][bB]             { return MB; } +<condition>[gG][bB]             { return GB; } + +<condition>"\""                 { +                                    *used = 0; +                                    PUSH_STATE(strlit); +                                } + +<strlit>"\""                    { +                                    POP_STATE;  +                                    EXTEND_BUFFER_IF_NEEDED(1); +                                    (*buf)[(*used)++] = '\0'; +                                    yylval->cstring = *buf; +                                    return STRING; +                                } + +<strlit>"\\\""                  { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '"'; } +<strlit>"\\t"                   { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\t'; } +<strlit>"\\r"                   { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\r'; } +<strlit>"\\n"                   { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\n'; } +<strlit>"\\\\"                  { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\\'; } + +<strlit>\\x[0-9a-fA-F]{2}       { +                                    char __ch; +                                    __ch = strtol(yytext + 2, NULL, 16); +                                    EXTEND_BUFFER_IF_NEEDED(1); +                                    (*buf)[(*used)++] = __ch; +                                } + +<strlit>[^\\\"]+                { +                                    size_t __len; +                                    __len = strlen(yytext); +                                    EXTEND_BUFFER_IF_NEEDED(__len); +                                    strcpy(&(*buf)[*used], yytext); +                                    *used += __len; +                                } + + + + +<condition>"and"                { return AND; } +<condition>"or"                 { return OR; } +<condition>"not"                { return NOT; } + +<condition>"<"                  { return LT; } +<condition>"<="                 { return LE; } +<condition>"=="                 { return EQ; } +<condition>"!="                 { return NE; } +<condition>">"                  { return GT; } +<condition>">="                 { return GE; } + +<condition>"contains"           { return CONTAINS; } +<condition>"startswith"         { return STARTSWITH; } +<condition>"endswith"           { return ENDSWITH; } +<condition>"matches"            { return MATCHES; } +<condition>"icontains"          { return ICONTAINS; } +<condition>"istartswith"        { return ISTARTSWITH; } +<condition>"iendswith"          { return IENDSWITH; } +<condition>"iequals"            { return IEQUALS; } + +<condition>"+"                  { return PLUS; } +<condition>"-"                  { return MINUS; } +<condition>"*"                  { return MUL; } +<condition>"\\"                 { return DIV; } +<condition>"%"                  { return MOD; } + +<condition>"("                  { return PAREN_O; } +<condition>")"                  { return PAREN_C; } +<condition>","                  { return COMMA; } +<condition>"."                  { return DOT; } + +<condition>"none"               { return NONE; } +<condition>"any"                { return ANY; } +<condition>"all"                { return ALL; } +<condition>"of"                 { return OF; } +<condition>"them"               { return THEM; } + + +<strings,condition>$[A-Za-z0-9_]* { +                                    yylval->sized_cstring.cstring = yytext + 1; +                                    yylval->sized_cstring.len = yyleng - 1; +                                    return IDENTIFIER; +                                } +<condition>[A-Za-z_][A-Za-z0-9_]* { +                                    yylval->sized_cstring.cstring = yytext; +                                    yylval->sized_cstring.len = yyleng; +                                    return NAME; +                                } + +<strings>"="                    { PUSH_STATE(strval); return ASSIGN; } + + +<strval>\"[^\"\\]+\"            { +                                    POP_STATE; +                                    yylval->sized_cstring.cstring = yytext + 1; +                                    yylval->sized_cstring.len = yyleng - 2; +                                    return PLAIN_STRING; +                                } + + + + +<strval>"\""                    { +                                    POP_STATE; +                                    // *built_pattern = g_bytes_pattern_new(); +                                    PUSH_STATE(strval_raw); +                                } +<strval>"{"                     { +                                    POP_STATE; +                                    // *built_pattern = g_bytes_pattern_new(); +                                    PUSH_STATE(strval_hex); +                                } + +<strval_raw>"\""                { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; } + +<strval_raw>"\\\""              { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); } +<strval_raw>"\\t"               { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); } +<strval_raw>"\\r"               { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); } +<strval_raw>"\\n"               { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); } +<strval_raw>"\\\\"              { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); } + +<strval_raw>\\x[0-9a-fA-F]{2}   { +                                    uint8_t __ch; +                                    __ch = strtol(yytext + 2, NULL, 16); +                                    //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); +                                } + +<strval_raw>.                   { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); } + +<strval_hex>"}"                 { POP_STATE; /*yylval->pattern = *built_pattern;*/ return MASKED_STRING; } + +<strval_hex>[0-9a-fA-F]{2}      { +                                    uint8_t __ch; +                                    __ch = strtol(yytext, NULL, 16); +                                    //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); +                                } + +<strval_hex>"??"                { /*g_bytes_pattern_insert_space(*built_pattern, 1, 1);*/ } +  + + + + +<wait_for_colon>":"             { POP_STATE; return COLON; } + +<raw_block,strings,condition>"}"                  { POP_STATE; return BRACE_OUT; } + + + +%{ /* Commentaires */ %} + +<*>"/*"                         { PUSH_STATE(comment); } +<comment>"*/"                   { POP_STATE; } +<comment>(.|\n)                 { } + +<*>"//"[^\n]*                   { } + + +%{ /* Actions par défaut */ %} + +<*>[ \t\n]+                     { } + +<*>.                            { +                                    char *msg; +                                    int ret; +                                    ret = asprintf(&msg, "Unhandled token in rule definition: '%s'", yytext); +                                    if (ret == -1) +                                        YY_FATAL_ERROR("Unhandled token in undisclosed rule definition"); +                                    else +                                    { +                                        YY_FATAL_ERROR(msg); +                                        free(msg); +                                    } + } + + +%% | 
