%top { #include "grammar.h" } %{ //#include "manual.h" #include #include #include #define read_block(tmp) \ ({ \ unsigned int __depth; \ bool __is_string; \ char *__iter; \ \ __depth = 1; \ __is_string = false; \ \ for (__iter = temp; __depth > 0; __iter += (__depth > 0 ? 1 : 0)) \ { \ *__iter = input(); \ \ switch (*__iter) \ { \ case '"': \ __is_string = !__is_string; \ break; \ \ case '{': \ if (!__is_string) __depth++; \ break; \ \ case '}': \ if (!__is_string) \ { \ __depth--; \ if (__depth == 0) unput('}'); \ } \ break; \ \ } \ \ } \ \ *__iter = '\0'; \ \ }) #define PUSH_STATE(s) yy_push_state(s, yyscanner) #define POP_STATE yy_pop_state(yyscanner) #define EXTEND_BUFFER_IF_NEEDED(extra) \ if ((*used + extra) > *allocated) \ { \ *allocated *= 2; \ *buf = realloc(*buf, *allocated); \ } %} %option bison-bridge reentrant %option stack %option nounput %option noinput %option noyywrap %option noyy_top_state %option yylineno %option never-interactive %x rule_intro %x raw_block %x strings %x strval %x strval_raw %x strval_hex %x condition %x strlit %x wait_for_colon %x comment %% "rule" { PUSH_STATE(rule_intro); return RAW_RULE; } [A-Za-z0-9_]+ { yylval->sized_cstring.data = yytext; yylval->sized_cstring.len = yyleng; return RULE_NAME; } [ \t]* { } "{" { POP_STATE; PUSH_STATE(raw_block); return BRACE_IN; } "strings" { PUSH_STATE(strings); PUSH_STATE(wait_for_colon); return STRINGS; } "condition" { PUSH_STATE(condition); PUSH_STATE(wait_for_colon); return CONDITION; } "true" { return TRUE_; } "false" { return FALSE_; } (0|[1-9][0-9]*) { yylval->integer = strtoull(yytext, NULL, 10); return INTEGER; } 0x[0-9a-f]+ { yylval->integer = strtoull(yytext, NULL, 16); return INTEGER; } [kK][bB] { return KB; } [mM][bB] { return MB; } [gG][bB] { return GB; } "\"" { *used = 0; PUSH_STATE(strlit); } "\"" { POP_STATE; EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\0'; yylval->sized_cstring.data = *buf; yylval->sized_cstring.len = *used; return STRING; } "\\\"" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '"'; } "\\t" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\t'; } "\\r" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\r'; } "\\n" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\n'; } "\\\\" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\\'; } \\x[0-9a-fA-F]{2} { char __ch; __ch = strtol(yytext + 2, NULL, 16); EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = __ch; } [^\\\"]+ { size_t __len; __len = strlen(yytext); EXTEND_BUFFER_IF_NEEDED(__len); strcpy(&(*buf)[*used], yytext); *used += __len; } "and" { return AND; } "or" { return OR; } "not" { return NOT; } "<" { return LT; } "<=" { return LE; } "==" { return EQ; } "!=" { return NE; } ">" { return GT; } ">=" { return GE; } "contains" { return CONTAINS; } "startswith" { return STARTSWITH; } "endswith" { return ENDSWITH; } "matches" { return MATCHES; } "icontains" { return ICONTAINS; } "istartswith" { return ISTARTSWITH; } "iendswith" { return IENDSWITH; } "iequals" { return IEQUALS; } "+" { return PLUS; } "-" { return MINUS; } "*" { return MUL; } "\\" { return DIV; } "%" { return MOD; } "(" { return PAREN_O; } ")" { return PAREN_C; } "," { return COMMA; } "." { return DOT; } "none" { return NONE; } "any" { return ANY; } "all" { return ALL; } "of" { return OF; } "them" { return THEM; } $[A-Za-z0-9_]* { yylval->sized_cstring.data = yytext + 1; yylval->sized_cstring.len = yyleng - 1; return IDENTIFIER; } [A-Za-z_][A-Za-z0-9_]* { yylval->sized_cstring.data = yytext; yylval->sized_cstring.len = yyleng; return NAME; } "=" { PUSH_STATE(strval); return ASSIGN; } \"[^\"\\]+\" { POP_STATE; yylval->sized_cstring.data = yytext + 1; yylval->sized_cstring.len = yyleng - 2; return PLAIN_STRING; } "\"" { POP_STATE; // *built_pattern = g_bytes_pattern_new(); PUSH_STATE(strval_raw); } "{" { POP_STATE; // *built_pattern = g_bytes_pattern_new(); PUSH_STATE(strval_hex); } "\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; } "\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); } "\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); } "\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); } "\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); } "\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); } \\x[0-9a-fA-F]{2} { uint8_t __ch; __ch = strtol(yytext + 2, NULL, 16); //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); } . { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); } "}" { POP_STATE; /*yylval->pattern = *built_pattern;*/ return MASKED_STRING; } [0-9a-fA-F]{2} { uint8_t __ch; __ch = strtol(yytext, NULL, 16); //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); } "??" { /*g_bytes_pattern_insert_space(*built_pattern, 1, 1);*/ } ":" { POP_STATE; return COLON; } "}" { POP_STATE; return BRACE_OUT; } %{ /* Commentaires */ %} <*>"/*" { PUSH_STATE(comment); } "*/" { POP_STATE; } (.|\n) { } <*>"//"[^\n]* { } %{ /* Actions par défaut */ %} <*>[ \t\n]+ { } <*>. { char *msg; int ret; ret = asprintf(&msg, "Unhandled token in rule definition: '%s'", yytext); if (ret == -1) YY_FATAL_ERROR("Unhandled token in undisclosed rule definition"); else { YY_FATAL_ERROR(msg); free(msg); } } %%