diff options
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r-- | src/analysis/scan/tokens.l | 306 |
1 files changed, 306 insertions, 0 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l new file mode 100644 index 0000000..92a5340 --- /dev/null +++ b/src/analysis/scan/tokens.l @@ -0,0 +1,306 @@ + +%top { + +#include "grammar.h" + +} + + +%{ + +//#include "manual.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> + + +#define read_block(tmp) \ + ({ \ + unsigned int __depth; \ + bool __is_string; \ + char *__iter; \ + \ + __depth = 1; \ + __is_string = false; \ + \ + for (__iter = temp; __depth > 0; __iter += (__depth > 0 ? 1 : 0)) \ + { \ + *__iter = input(); \ + \ + switch (*__iter) \ + { \ + case '"': \ + __is_string = !__is_string; \ + break; \ + \ + case '{': \ + if (!__is_string) __depth++; \ + break; \ + \ + case '}': \ + if (!__is_string) \ + { \ + __depth--; \ + if (__depth == 0) unput('}'); \ + } \ + break; \ + \ + } \ + \ + } \ + \ + *__iter = '\0'; \ + \ + }) + + + + +#define PUSH_STATE(s) yy_push_state(s, yyscanner) +#define POP_STATE yy_pop_state(yyscanner) + + + +#define EXTEND_BUFFER_IF_NEEDED(extra) \ + if ((*used + extra) > *allocated) \ + { \ + *allocated *= 2; \ + *buf = realloc(*buf, *allocated); \ + } + + +%} + + +%option bison-bridge reentrant +%option stack +%option nounput +%option noinput +%option noyywrap +%option noyy_top_state +%option yylineno +%option never-interactive + + +%x rule_intro +%x raw_block + +%x strings +%x strval +%x strval_raw +%x strval_hex + +%x condition +%x strlit + +%x wait_for_colon + + +%x comment + + +%% + + + + +"rule" { PUSH_STATE(rule_intro); return RAW_RULE; } + +<rule_intro>[A-Za-z0-9_]+ { yylval->cstring = yytext; return RULE_NAME; } + +<rule_intro>[ \t]* { } +<rule_intro>"{" { POP_STATE; PUSH_STATE(raw_block); return BRACE_IN; } + +<raw_block>"strings" { PUSH_STATE(strings); PUSH_STATE(wait_for_colon); return STRINGS; } +<raw_block,strings>"condition" { PUSH_STATE(condition); PUSH_STATE(wait_for_colon); return CONDITION; } + + + + + + +<condition>"true" { return TRUE_; } +<condition>"false" { return FALSE_; } + +<condition>(0|[1-9][0-9]*) { yylval->integer = strtoull(yytext, NULL, 10); return INTEGER; } +<condition>0x[0-9a-f]+ { yylval->integer = strtoull(yytext, NULL, 16); return INTEGER; } + +<condition>[kK][bB] { return KB; } +<condition>[mM][bB] { return MB; } +<condition>[gG][bB] { return GB; } + +<condition>"\"" { + *used = 0; + PUSH_STATE(strlit); + } + +<strlit>"\"" { + POP_STATE; + EXTEND_BUFFER_IF_NEEDED(1); + (*buf)[(*used)++] = '\0'; + yylval->cstring = *buf; + return STRING; + } + +<strlit>"\\\"" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '"'; } +<strlit>"\\t" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\t'; } +<strlit>"\\r" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\r'; } +<strlit>"\\n" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\n'; } +<strlit>"\\\\" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\\'; } + +<strlit>\\x[0-9a-fA-F]{2} { + char __ch; + __ch = strtol(yytext + 2, NULL, 16); + EXTEND_BUFFER_IF_NEEDED(1); + (*buf)[(*used)++] = __ch; + } + +<strlit>[^\\\"]+ { + size_t __len; + __len = strlen(yytext); + EXTEND_BUFFER_IF_NEEDED(__len); + strcpy(&(*buf)[*used], yytext); + *used += __len; + } + + + + +<condition>"and" { return AND; } +<condition>"or" { return OR; } +<condition>"not" { return NOT; } + +<condition>"<" { return LT; } +<condition>"<=" { return LE; } +<condition>"==" { return EQ; } +<condition>"!=" { return NE; } +<condition>">" { return GT; } +<condition>">=" { return GE; } + +<condition>"contains" { return CONTAINS; } +<condition>"startswith" { return STARTSWITH; } +<condition>"endswith" { return ENDSWITH; } +<condition>"matches" { return MATCHES; } +<condition>"icontains" { return ICONTAINS; } +<condition>"istartswith" { return ISTARTSWITH; } +<condition>"iendswith" { return IENDSWITH; } +<condition>"iequals" { return IEQUALS; } + +<condition>"+" { return PLUS; } +<condition>"-" { return MINUS; } +<condition>"*" { return MUL; } +<condition>"\\" { return DIV; } +<condition>"%" { return MOD; } + +<condition>"(" { return PAREN_O; } +<condition>")" { return PAREN_C; } +<condition>"," { return COMMA; } +<condition>"." { return DOT; } + +<condition>"none" { return NONE; } +<condition>"any" { return ANY; } +<condition>"all" { return ALL; } +<condition>"of" { return OF; } +<condition>"them" { return THEM; } + + +<strings,condition>$[A-Za-z0-9_]* { + yylval->sized_cstring.cstring = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return IDENTIFIER; + } +<condition>[A-Za-z_][A-Za-z0-9_]* { + yylval->sized_cstring.cstring = yytext; + yylval->sized_cstring.len = yyleng; + return NAME; + } + +<strings>"=" { PUSH_STATE(strval); return ASSIGN; } + + +<strval>\"[^\"\\]+\" { + POP_STATE; + yylval->sized_cstring.cstring = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + return PLAIN_STRING; + } + + + + +<strval>"\"" { + POP_STATE; + // *built_pattern = g_bytes_pattern_new(); + PUSH_STATE(strval_raw); + } +<strval>"{" { + POP_STATE; + // *built_pattern = g_bytes_pattern_new(); + PUSH_STATE(strval_hex); + } + +<strval_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; } + +<strval_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); } +<strval_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); } +<strval_raw>"\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); } +<strval_raw>"\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); } +<strval_raw>"\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); } + +<strval_raw>\\x[0-9a-fA-F]{2} { + uint8_t __ch; + __ch = strtol(yytext + 2, NULL, 16); + //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); + } + +<strval_raw>. { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); } + +<strval_hex>"}" { POP_STATE; /*yylval->pattern = *built_pattern;*/ return MASKED_STRING; } + +<strval_hex>[0-9a-fA-F]{2} { + uint8_t __ch; + __ch = strtol(yytext, NULL, 16); + //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); + } + +<strval_hex>"??" { /*g_bytes_pattern_insert_space(*built_pattern, 1, 1);*/ } + + + + + +<wait_for_colon>":" { POP_STATE; return COLON; } + +<raw_block,strings,condition>"}" { POP_STATE; return BRACE_OUT; } + + + +%{ /* Commentaires */ %} + +<*>"/*" { PUSH_STATE(comment); } +<comment>"*/" { POP_STATE; } +<comment>(.|\n) { } + +<*>"//"[^\n]* { } + + +%{ /* Actions par défaut */ %} + +<*>[ \t\n]+ { } + +<*>. { + char *msg; + int ret; + ret = asprintf(&msg, "Unhandled token in rule definition: '%s'", yytext); + if (ret == -1) + YY_FATAL_ERROR("Unhandled token in undisclosed rule definition"); + else + { + YY_FATAL_ERROR(msg); + free(msg); + } + } + + +%% |