diff options
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r-- | src/analysis/scan/tokens.l | 1236 |
1 files changed, 1236 insertions, 0 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l new file mode 100644 index 0000000..e075cee --- /dev/null +++ b/src/analysis/scan/tokens.l @@ -0,0 +1,1236 @@ + +%top { + +#include "grammar.h" + +} + + +%{ + + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> + + +/* Tête de lecture pour conversions */ +typedef union _read_ptr_t +{ + const uint8_t *byte_pos; /* Lecture par blocs de 8 bits */ + const uint16_t *hword_pos; /* Lecture par blocs de 16 bits*/ + +} read_ptr_t; + + +#if __BYTE_ORDER == __LITTLE_ENDIAN + +# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch2 << 8 | ch1)) + +#elif __BYTE_ORDER == __BIG_ENDIAN + +# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch1 << 8 | ch2)) + +#else + + /* __PDP_ENDIAN et Cie... */ +# error "Congratulations! Your byte order is not supported!" + +#endif + + + +/****************************************************************************** +* * +* Paramètres : src = liste d'octets à traiter. * +* len = taille de cette liste. * +* out = série d'octets bruts obtenue. [OUT] * +* * +* Description : Transcrit une série d'octets en en remplaçant certains. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void rost_unescape_string(const char *src, size_t len, sized_string_t *out) +{ + read_ptr_t reader; /* Tête de lecture */ + const bin_t *max; /* Fin du parcours */ + uint16_t half; /* Moitié de mot */ + bin_t byte; /* Octet à analyser */ + bin_t *writer; /* Tête d'écriture */ + + reader.byte_pos = (const uint8_t *)src; + max = reader.byte_pos + len; + + writer = out->bin_data; + + while (reader.byte_pos < max) + { + /** + * La lecture par groupes de deux octets n'est pas forcément toujours + * logique : pour "\nabc", la dernière lecture va considérer 'c"', + * incluant ainsi le caractère '"' qui a été écarté pour l'appel. + * + * Le code est cependant suffisamment souple pour ignore le superflu. + */ + switch (*reader.hword_pos) + { + case MAKE_HWORD('\\', 'a'): + reader.hword_pos++; + *writer++ = '\a'; + break; + + case MAKE_HWORD('\\', 'b'): + reader.hword_pos++; + *writer++ = '\b'; + break; + + case MAKE_HWORD('\\', 't'): + reader.hword_pos++; + *writer++ = '\t'; + break; + + case MAKE_HWORD('\\', 'n'): + reader.hword_pos++; + *writer++ = '\n'; + break; + + case MAKE_HWORD('\\', 'v'): + reader.hword_pos++; + *writer++ = '\v'; + break; + + case MAKE_HWORD('\\', 'f'): + reader.hword_pos++; + *writer++ = '\f'; + break; + + case MAKE_HWORD('\\', 'r'): + reader.hword_pos++; + *writer++ = '\r'; + break; + + case MAKE_HWORD('\\', 'e'): + reader.hword_pos++; + *writer++ = '\e'; + break; + + case MAKE_HWORD('\\', '"'): + reader.hword_pos++; + *writer++ = '\"'; + break; + + case MAKE_HWORD('\\', '\\'): + reader.hword_pos++; + *writer++ = '\\'; + break; + + case MAKE_HWORD('\\', 'x'): + reader.hword_pos++; + + /** + * Le jeu des expressions régulières qui amène à l'appel de + * cette fonction limite les caractères possibles à trois + * ensembles : chiffres et lettres en majuscules et minuscules. + * + * La bascule des lettres en minuscules ramène les possibles + * à deux ensembles uniquement, simplifiant ainsi les règles + * de filtrage : aucun switch case n'est ainsi requis ! + */ + + half = *reader.hword_pos++; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half & 0xff); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half >> 8); +#endif + + /* '0' ... '9' */ + if (byte <= '9') + *writer = (byte - '0'); + + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer = 0xa + (byte - 'a'); + } + + *writer <<= 4; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half >> 8); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half & 0xff); +#endif + + /* '0' ... '9' */ + if (byte <= '9') + *writer++ |= (byte - '0'); + + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer++ |= 0xa + (byte - 'a'); + } + + break; + + default: + *writer++ = *reader.byte_pos++; + break; + + } + + } + + out->len = writer - out->bin_data; + +} + + +/****************************************************************************** +* * +* Paramètres : src = liste d'octets à traiter. * +* len = taille de cette liste. * +* out = série d'octets bruts obtenue. [OUT] * +* * +* Description : Transcrit une série d'octets en en remplaçant certains. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void rost_unescape_regex(const char *src, size_t len, sized_string_t *out) +{ + read_ptr_t reader; /* Tête de lecture */ + const bin_t *max; /* Fin du parcours */ + uint16_t half; /* Moitié de mot */ + bin_t byte; /* Octet à analyser */ + bin_t *writer; /* Tête d'écriture */ + + reader.byte_pos = (const uint8_t *)src; + max = reader.byte_pos + len; + + writer = out->bin_data; + + while (reader.byte_pos < max) + { + /** + * La lecture par groupes de deux octets n'est pas forcément toujours + * logique : pour "\nabc", la dernière lecture va considérer 'c"', + * incluant ainsi le caractère '"' qui a été écarté pour l'appel. + * + * Le code est cependant suffisamment souple pour ignore le superflu. + */ + switch (*reader.hword_pos) + { + case MAKE_HWORD('\\', 'a'): + reader.hword_pos++; + *writer++ = '\a'; + break; + + case MAKE_HWORD('\\', 'b'): + reader.hword_pos++; + *writer++ = '\b'; + break; + + case MAKE_HWORD('\\', 't'): + reader.hword_pos++; + *writer++ = '\t'; + break; + + case MAKE_HWORD('\\', 'n'): + reader.hword_pos++; + *writer++ = '\n'; + break; + + case MAKE_HWORD('\\', 'v'): + reader.hword_pos++; + *writer++ = '\v'; + break; + + case MAKE_HWORD('\\', 'f'): + reader.hword_pos++; + *writer++ = '\f'; + break; + + case MAKE_HWORD('\\', 'r'): + reader.hword_pos++; + *writer++ = '\r'; + break; + + case MAKE_HWORD('\\', 'e'): + reader.hword_pos++; + *writer++ = '\e'; + break; + + case MAKE_HWORD('\\', '"'): + reader.hword_pos++; + *writer++ = '\"'; + break; + + case MAKE_HWORD('\\', '\\'): + reader.hword_pos++; + *writer++ = '\\'; + break; + + case MAKE_HWORD('\\', 'x'): + reader.hword_pos++; + + /** + * Le jeu des expressions régulières qui amène à l'appel de + * cette fonction limite les caractères possibles à trois + * ensembles : chiffres et lettres en majuscules et minuscules. + * + * La bascule des lettres en minuscules ramène les possibles + * à deux ensembles uniquement, simplifiant ainsi les règles + * de filtrage : aucun switch case n'est ainsi requis ! + */ + + half = *reader.hword_pos++; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half & 0xff); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half >> 8); +#endif + + /* '0' ... '9' */ + if (byte <= '9') + *writer = (byte - '0'); + + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer = 0xa + (byte - 'a'); + } + + *writer <<= 4; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half >> 8); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half & 0xff); +#endif + + /* '0' ... '9' */ + if (byte <= '9') + *writer++ |= (byte - '0'); + + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer++ |= 0xa + (byte - 'a'); + } + + break; + + case MAKE_HWORD('\\', '{'): + reader.hword_pos++; + *writer++ = '{'; + break; + + case MAKE_HWORD('\\', '}'): + reader.hword_pos++; + *writer++ = '}'; + break; + + default: + *writer++ = *reader.byte_pos++; + break; + + } + + } + + out->len = writer - out->bin_data; + +} + + +#define PUSH_STATE(s) yy_push_state(s, yyscanner) +#define POP_STATE yy_pop_state(yyscanner) + + +#define STOP_LEXER(msg, fbmsg) \ + do \ + { \ + char *__text; \ + int __ret; \ + __ret = asprintf(&__text, "%s: '%s'", msg, yytext); \ + if (__ret == -1) \ + YY_FATAL_ERROR(fbmsg); \ + else \ + { \ + YY_FATAL_ERROR(__text); \ + free(__text); \ + } \ + } \ + while (0) + +#define HANDLE_UNCOMPLETED_TOKEN \ + STOP_LEXER("Uncompleted token in rule definition", "Undisclosed uncompleted token in rule definition") + + +%} + + +%option bison-bridge reentrant +%option stack +%option nounput +%option noinput +%option noyywrap +%option noyy_top_state +%option yylineno +%option never-interactive + +%x inc_path + +%x rule_intro +%x raw_block + +%x meta +%x meta_value + +%x bytes +%x bytes_value +%x bytes_value_raw + +%x bytes_hex +%x bytes_hex_range + +%x bytes_regex +%x bytes_regex_quantifier +%x bytes_regex_range + +%x condition + +%x wait_for_colon + + +%x comment + + +str_not_escaped [^\"\\] +str_escaped \\a|\\b|\\t|\\n|\\v|\\f|\\r|\\e|\\\"|\\\\|\\x{hbyte} +str_mixed ({str_not_escaped}|{str_escaped}) + +hbyte [0-9a-fA-F]{2} +mbyte (\?[0-9a-fA-F]|[0-9a-fA-F]\?) + +reg_allowed [^^$.|/{}()\[\]*+?\\] +reg_allowed_escaped \\^|\\$|\\\.|\\\||\\\/|\\\{|\\\}|\\\(|\\\)|\\\[|\\\]|\\\*|\\\+|\\\?|\\\\ +reg_escaped \\a|\\t|\\n|\\v|\\f|\\r +reg_byte \\x[0-9a-fA-F]{2} + +regular_chars {reg_allowed}|{reg_allowed_escaped}|{reg_escaped}|{reg_byte} + +reg_classes \\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B + + +bytes_id [A-Za-z_][A-Za-z0-9_]* +bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* + + +%% + + +"include" { PUSH_STATE(inc_path); return INCLUDE; } + +<inc_path>\"{str_not_escaped}+\" { + POP_STATE; + + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + + return PLAIN_TEXT; + } + +<inc_path>\"{str_mixed}+\" { + POP_STATE; + + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } + + +%{ /* Définition locale d'une règle */ %} + + "global" { return GLOBAL; } + "private" { return PRIVATE; } + + "rule" { + PUSH_STATE(rule_intro); + return RAW_RULE; + } + + <rule_intro>{bytes_id} { + yylval->sized_cstring.data = yytext; + yylval->sized_cstring.len = yyleng; + return RULE_IDENTIFIER; + } + + <rule_intro>":" { return COLON; } + + <rule_intro>[ \t]* { } + + <rule_intro>"{" { + POP_STATE; + PUSH_STATE(raw_block); + return BRACE_IN; + } + + <raw_block>"meta" { + POP_STATE; + PUSH_STATE(meta); + PUSH_STATE(wait_for_colon); + return META; + } + + <raw_block,meta>"bytes" { + POP_STATE; + PUSH_STATE(bytes); + PUSH_STATE(wait_for_colon); + return BYTES; + } + + <raw_block,meta,bytes>"condition" { + POP_STATE; + PUSH_STATE(condition); + PUSH_STATE(wait_for_colon); + return CONDITION; + } + + <wait_for_colon>":" { + POP_STATE; + return COLON; + } + +<raw_block,meta,bytes,condition>"}" { + POP_STATE; + return BRACE_OUT; + } + + +%{ /* Définitions communes pour la section "meta:" */ %} + + <meta>{bytes_id} { + yylval->sized_cstring.data = yytext; + yylval->sized_cstring.len = yyleng; + return INFO_KEY; + } + + <meta>"=" { PUSH_STATE(meta_value); return ASSIGN; } + + <meta_value>"true" { POP_STATE; return TRUE_; } + <meta_value>"false" { POP_STATE; return FALSE_; } + + <meta_value>-(0|[1-9][0-9]*) { + POP_STATE; + yylval->signed_integer = strtoll(yytext, NULL, 10); + return SIGNED_INTEGER; + } + + <meta_value>-0x[0-9a-f]+ { + POP_STATE; + yylval->signed_integer = strtoll(yytext, NULL, 16); + return SIGNED_INTEGER; + } + + <meta_value>(0|[1-9][0-9]*) { + POP_STATE; + yylval->unsigned_integer = strtoull(yytext, NULL, 10); + return UNSIGNED_INTEGER; + } + + <meta_value>0x[0-9a-f]+ { + POP_STATE; + yylval->unsigned_integer = strtoull(yytext, NULL, 16); + return UNSIGNED_INTEGER; + } + + <meta_value>\"{str_not_escaped}*\" { + POP_STATE; + + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + + return PLAIN_TEXT; + } + + <meta_value>\"{str_mixed}*\" { + POP_STATE; + + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } + + +%{ /* A déplacer... */ %} + + +<condition>"true" { return TRUE_; } +<condition>"false" { return FALSE_; } + +<condition>-(0|[1-9][0-9]*) { yylval->signed_integer = strtoll(yytext, NULL, 10); return SIGNED_INTEGER; } +<condition>-0x[0-9a-f]+ { yylval->signed_integer = strtoll(yytext, NULL, 16); return SIGNED_INTEGER; } + +<bytes_hex_range,bytes_regex_quantifier,condition>(0|[1-9][0-9]*) { yylval->unsigned_integer = strtoull(yytext, NULL, 10); return UNSIGNED_INTEGER; } +<bytes_hex_range,bytes_regex_quantifier,condition>0x[0-9a-f]+ { yylval->unsigned_integer = strtoull(yytext, NULL, 16); return UNSIGNED_INTEGER; } + +<condition>[kK][bB] { return KB; } +<condition>[mM][bB] { return MB; } +<condition>[gG][bB] { return GB; } + +<condition>\"{str_not_escaped}*\" { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + + return PLAIN_TEXT; + } + +<condition>\"{str_mixed}*\" { + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } + + +%{ /* Définitions communes pour la section "bytes:" */ %} + + <bytes>"fullword" { return FULLWORD; } + <bytes>"nocase" { return NOCASE; } + <bytes>"private" { return PRIVATE; } + + <bytes>"=" { PUSH_STATE(bytes_value); return ASSIGN; } + + +%{ /* Définition de motif en texte brut */ %} + +<bytes_value>\"{str_not_escaped}+\" { + POP_STATE; + + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + + return PLAIN_TEXT; + } + +<bytes_value>\"{str_mixed}+\" { + POP_STATE; + + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } + + + +<bytes>[A-Za-z_][A-Za-z0-9_]* { + yylval->sized_cstring.data = yytext; + yylval->sized_cstring.len = yyleng; + return NAME; + } + + + <bytes>"((" { return MOD_GROUP_O; } + + <bytes>"))" { return MOD_GROUP_C; } + + <bytes>"(" { return PAREN_O; } + + <bytes>")" { return PAREN_C; } + + <bytes>"," { return COMMA; } + + +<bytes>\"{str_not_escaped}+\" { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + + return PLAIN_TEXT; + } + + + + + +%{ /* Définition de motif en hexadécimal */ %} + + <bytes_value>"{" { + POP_STATE; + PUSH_STATE(bytes_hex); + } + + <bytes_hex>"}" { POP_STATE; } + + <bytes_hex>"[" { + PUSH_STATE(bytes_hex_range); + return HOOK_O; + } + + <bytes_hex_range>"-" { return MINUS; } + + <bytes_hex_range>"]" { + POP_STATE; + return HOOK_C; + } + + <bytes_hex>"(" { return PAREN_O; } + + <bytes_hex>")" { return PAREN_C; } + + <bytes_hex>"|" { return PIPE; } + + <bytes_hex>"~" { return TILDE; } + + <bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; + + tmp_0->len = 0; + + even = true; + + for (i = 0; i < yyleng; i++) + { + byte = yytext[i]; + + switch (byte) + { + case ' ': + continue; + break; + + case '0' ... '9': + value = (byte - '0'); + break; + + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; + + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; + + } + + if (even) + tmp_0->data[tmp_0->len] = (value << 4); + else + tmp_0->data[tmp_0->len++] |= value; + + even = !even; + + } + + assert(even); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + return HEX_BYTES; + + } + + <bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* { + unsigned long long counter; + size_t i; + + counter = 0; + + for (i = 0; i < yyleng; i++) + if (yytext[i] == '?') + counter++; + + assert(counter % 2 == 0); + + yylval->unsigned_integer = counter / 2; + return FULL_MASK; + + } + + <bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; + + tmp_0->len = 0; + tmp_1->len = 0; + + even = true; + + for (i = 0; i < yyleng; i++) + { + byte = yytext[i]; + + switch (byte) + { + case ' ': + continue; + break; + + case '?': + even = !even; + continue; + break; + + case '0' ... '9': + value = (byte - '0'); + break; + + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; + + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; + + } + + if (even) + { + tmp_0->data[tmp_0->len++] = (value << 4); + tmp_1->data[tmp_1->len++] = 0xf0; + } + else + { + tmp_0->data[tmp_0->len++] = value; + tmp_1->data[tmp_1->len++] = 0x0f; + } + + even = !even; + + } + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; + tmp_1->data[tmp_1->len] = '\0'; +#endif + + yylval->masked.tmp_values = tmp_0; + yylval->masked.tmp_masks = tmp_1; + return SEMI_MASK; + + } + + +%{ /* Définition d'expressions régulières */ %} + + <bytes_value>"/" { + POP_STATE; + printf(" -- regex\n"); + PUSH_STATE(bytes_regex); + } + + <bytes_regex>"/" { printf("exit regex\n"); POP_STATE; } + + <bytes_regex>"." { return DOT; } + + <bytes_regex>({regular_chars})+ { + rost_unescape_regex(yytext, yyleng, tmp_0); + + printf(" regular: '%s'\n", yytext); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + return REGEX_BYTES; + + } + + <bytes_regex>({reg_classes})+ { + + return REGEX_CLASSES; + + } + +%{ /* <bytes_regex>\[({regular_chars}|({regular_chars})-z|{reg_classes})+\] { */ %} + + + <bytes_regex>"[" { + PUSH_STATE(bytes_regex_range); + printf(" !! entering range\n"); + return HOOK_O; + } + + <bytes_regex_range>"]" { + POP_STATE; + printf(" !! exiting range\n"); + return HOOK_C; + } + + + + +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+ { + + printf("range: '%s'\n", yytext); + return REGEX_RANGE; + + } + + <bytes_regex>"(" { return PAREN_O; } + + <bytes_regex>")" { return PAREN_C; } + + <bytes_regex>"|" { return PIPE; } + + <bytes_regex>"*" { return MUL; } + <bytes_regex>"+" { return PLUS; } + <bytes_regex>"?" { return QUESTION; } + + <bytes_regex>"{" { + PUSH_STATE(bytes_regex_quantifier); + return BRACE_IN; + } + + <bytes_regex_quantifier>"," { return COMMA; } + + <bytes_regex_quantifier>"}" { + POP_STATE; + return BRACE_OUT; + } + + +%{ /* Condition de correspondance */ %} + +<condition>"and" { return AND; } +<condition>"or" { return OR; } +<condition>"not" { return NOT; } + +<condition>"<" { return LT; } +<condition>"<=" { return LE; } +<condition>"==" { return EQ; } +<condition>"!=" { return NE; } +<condition>">" { return GT; } +<condition>">=" { return GE; } + +<condition>"contains" { return CONTAINS; } +<condition>"startswith" { return STARTSWITH; } +<condition>"endswith" { return ENDSWITH; } +<condition>"matches" { return MATCHES; } +<condition>"icontains" { return ICONTAINS; } +<condition>"istartswith" { return ISTARTSWITH; } +<condition>"iendswith" { return IENDSWITH; } +<condition>"iequals" { return IEQUALS; } + +<condition>"+" { return PLUS; } +<condition>"-" { return MINUS; } +<condition>"*" { return MUL; } +<condition>"/" { return DIV; } +<condition>"%" { return MOD; } + +<condition>"(" { return PAREN_O; } +<condition>")" { return PAREN_C; } +<condition>"," { return COMMA; } + + +<condition>"[" { return HOOK_O; } +<condition>"]" { return HOOK_C; } + + +<condition>"." { return DOT; } +<bytes>"|" { return PIPE; } + +<condition>"none" { return NONE; } +<condition>"any" { return ANY; } +<condition>"all" { return ALL; } +<condition>"of" { return OF; } +<condition>"them" { return THEM; } +<condition>"in" { return IN; } + + + <bytes,condition>${bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID; + } + + <condition>${bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID; + } + + <condition>#{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_COUNTER; + } + + <condition>#{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_COUNTER; + } + + <condition>@{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_START; + } + + <condition>@{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_START; + } + + <condition>!{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_LENGTH; + } + + <condition>!{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_LENGTH; + } + + <condition>~{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_END; + } + + <condition>~{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_END; + } + + + + + + +<condition>[A-Za-z_][A-Za-z0-9_]* { + yylval->sized_cstring.data = yytext; + yylval->sized_cstring.len = yyleng; + return NAME; + } + + + + + + + +%{ /* Commentaires */ %} + +<*>"/*" { PUSH_STATE(comment); } +<comment>"*/" { POP_STATE; } +<comment>(.|\n) { } + +<*>"//"[^\n]* { } + + +%{ /* Suppression du besoin de sauvegardes pour retours en arrière */ %} + +"i" { HANDLE_UNCOMPLETED_TOKEN; } +"in" { HANDLE_UNCOMPLETED_TOKEN; } +"inc" { HANDLE_UNCOMPLETED_TOKEN; } +"incl" { HANDLE_UNCOMPLETED_TOKEN; } +"inclu" { HANDLE_UNCOMPLETED_TOKEN; } +"includ" { HANDLE_UNCOMPLETED_TOKEN; } + +<inc_path>\" { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; } + +<inc_path>\"\\ { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"\\x { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + +"g" { HANDLE_UNCOMPLETED_TOKEN; } +"gl" { HANDLE_UNCOMPLETED_TOKEN; } +"glo" { HANDLE_UNCOMPLETED_TOKEN; } +"glob" { HANDLE_UNCOMPLETED_TOKEN; } +"globa" { HANDLE_UNCOMPLETED_TOKEN; } + +"p" { HANDLE_UNCOMPLETED_TOKEN; } +"pr" { HANDLE_UNCOMPLETED_TOKEN; } +"pri" { HANDLE_UNCOMPLETED_TOKEN; } +"priv" { HANDLE_UNCOMPLETED_TOKEN; } +"priva" { HANDLE_UNCOMPLETED_TOKEN; } +"privat" { HANDLE_UNCOMPLETED_TOKEN; } + +"r" { HANDLE_UNCOMPLETED_TOKEN; } +"ru" { HANDLE_UNCOMPLETED_TOKEN; } +"rul" { HANDLE_UNCOMPLETED_TOKEN; } + +<raw_block>"m" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block>"me" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block>"met" { HANDLE_UNCOMPLETED_TOKEN; } + +<raw_block,meta>"b" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta>"by" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta>"byt" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta>"byte" { HANDLE_UNCOMPLETED_TOKEN; } + +<raw_block,meta,bytes>"c" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"co" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"con" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"cond" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"condi" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"condit" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"conditi" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"conditio" { HANDLE_UNCOMPLETED_TOKEN; } + + +<meta_value>"t" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"tr" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"tru" { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>"f" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"fa" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"fal" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"fals" { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>-0x { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>0x { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>\"{str_mixed}* { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>\"{str_mixed}*\\ { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>\"{str_mixed}*\\x { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>\"{str_mixed}*\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<condition>-0x { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex_range,bytes_regex_quantifier,condition>0x { HANDLE_UNCOMPLETED_TOKEN; } + + +<condition>\"{str_not_escaped}* { HANDLE_UNCOMPLETED_TOKEN; } + +<condition>\" { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"\\ { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"\\x { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_value>\" { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F]/[^?] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\?/[^?] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_regex>\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>({regular_chars})+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>({regular_chars})+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>({regular_chars})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_regex>({reg_classes})+\\ + + +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +%{ /* Actions par défaut */ %} + +<*>[ \t]+ { } + +<*>[\n] { static int ln = 1; if (0) printf("----------- %%< -------------- %%< ---- %d\n", ln++); } + +<*>. { + char *msg; + int ret; + ret = asprintf(&msg, "Unhandled token in rule definition: '%s'", yytext); + if (ret == -1) + YY_FATAL_ERROR("Unhandled token in undisclosed rule definition"); + else + { + YY_FATAL_ERROR(msg); + free(msg); + } + } + + +%% |