diff options
Diffstat (limited to 'src/analysis/scan/tokens.l')
| -rw-r--r-- | src/analysis/scan/tokens.l | 961 |
1 files changed, 592 insertions, 369 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l index 1a17344..e075cee 100644 --- a/src/analysis/scan/tokens.l +++ b/src/analysis/scan/tokens.l @@ -8,13 +8,35 @@ %{ -//#include "manual.h" #include <assert.h> #include <stdbool.h> #include <stdlib.h> +/* Tête de lecture pour conversions */ +typedef union _read_ptr_t +{ + const uint8_t *byte_pos; /* Lecture par blocs de 8 bits */ + const uint16_t *hword_pos; /* Lecture par blocs de 16 bits*/ + +} read_ptr_t; + + +#if __BYTE_ORDER == __LITTLE_ENDIAN + +# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch2 << 8 | ch1)) + +#elif __BYTE_ORDER == __BIG_ENDIAN + +# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch1 << 8 | ch2)) + +#else + + /* __PDP_ENDIAN et Cie... */ +# error "Congratulations! Your byte order is not supported!" + +#endif @@ -32,124 +54,143 @@ * * ******************************************************************************/ -static void rost_unescape_string_bytes(const char *src, size_t len, sized_string_t *out) +static void rost_unescape_string(const char *src, size_t len, sized_string_t *out) { - size_t i; /* Boucle de parcours */ + read_ptr_t reader; /* Tête de lecture */ + const bin_t *max; /* Fin du parcours */ + uint16_t half; /* Moitié de mot */ bin_t byte; /* Octet à analyser */ - bin_t next; /* Octet suivant */ + bin_t *writer; /* Tête d'écriture */ - out->len = 0; + reader.byte_pos = (const uint8_t *)src; + max = reader.byte_pos + len; - for (i = 0; i < len; i++) - { - byte = src[i]; + writer = out->bin_data; - switch (byte) + while (reader.byte_pos < max) + { + /** + * La lecture par groupes de deux octets n'est pas forcément toujours + * logique : pour "\nabc", la dernière lecture va considérer 'c"', + * incluant ainsi le caractère '"' qui a été écarté pour l'appel. + * + * Le code est cependant suffisamment souple pour ignore le superflu. + */ + switch (*reader.hword_pos) { - case '\\': - - next = src[i + 1]; - - switch (next) - { - case 'a': - out->data[out->len++] = '\a'; - break; - - case 'b': - out->data[out->len++] = '\b'; - break; - - case 't': - out->data[out->len++] = '\t'; - break; - - case 'n': - out->data[out->len++] = '\n'; - break; - - case 'v': - out->data[out->len++] = '\v'; - break; - - case 'f': - out->data[out->len++] = '\f'; - break; - - case 'r': - out->data[out->len++] = '\r'; - break; - - case 'e': - out->data[out->len++] = '\e'; - break; - - case '"': - out->data[out->len++] = '\"'; - break; + case MAKE_HWORD('\\', 'a'): + reader.hword_pos++; + *writer++ = '\a'; + break; - case '\\': - out->data[out->len++] = '\\'; - break; + case MAKE_HWORD('\\', 'b'): + reader.hword_pos++; + *writer++ = '\b'; + break; - case 'x': + case MAKE_HWORD('\\', 't'): + reader.hword_pos++; + *writer++ = '\t'; + break; - next = src[i + 2]; + case MAKE_HWORD('\\', 'n'): + reader.hword_pos++; + *writer++ = '\n'; + break; - switch (next) - { - case '0' ... '9': - out->data[out->len] = (next - '0'); - break; + case MAKE_HWORD('\\', 'v'): + reader.hword_pos++; + *writer++ = '\v'; + break; - case 'A' ... 'F': - out->data[out->len] = 0xa + (next - 'A'); - break; + case MAKE_HWORD('\\', 'f'): + reader.hword_pos++; + *writer++ = '\f'; + break; - case 'a' ... 'f': - out->data[out->len] = 0xa + (next - 'a'); - break; + case MAKE_HWORD('\\', 'r'): + reader.hword_pos++; + *writer++ = '\r'; + break; - } + case MAKE_HWORD('\\', 'e'): + reader.hword_pos++; + *writer++ = '\e'; + break; - out->data[out->len] <<= 4; + case MAKE_HWORD('\\', '"'): + reader.hword_pos++; + *writer++ = '\"'; + break; - next = src[i + 3]; + case MAKE_HWORD('\\', '\\'): + reader.hword_pos++; + *writer++ = '\\'; + break; - switch (next) - { - case '0' ... '9': - out->data[out->len] |= (next - '0'); - break; + case MAKE_HWORD('\\', 'x'): + reader.hword_pos++; + + /** + * Le jeu des expressions régulières qui amène à l'appel de + * cette fonction limite les caractères possibles à trois + * ensembles : chiffres et lettres en majuscules et minuscules. + * + * La bascule des lettres en minuscules ramène les possibles + * à deux ensembles uniquement, simplifiant ainsi les règles + * de filtrage : aucun switch case n'est ainsi requis ! + */ + + half = *reader.hword_pos++; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half & 0xff); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half >> 8); +#endif - case 'A' ... 'F': - out->data[out->len] |= 0xa + (next - 'A'); - break; + /* '0' ... '9' */ + if (byte <= '9') + *writer = (byte - '0'); - case 'a' ... 'f': - out->data[out->len] |= 0xa + (next - 'a'); - break; + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer = 0xa + (byte - 'a'); + } - } + *writer <<= 4; - out->len++; +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half >> 8); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half & 0xff); +#endif - i += 2; - break; + /* '0' ... '9' */ + if (byte <= '9') + *writer++ |= (byte - '0'); + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer++ |= 0xa + (byte - 'a'); } - i++; break; default: - out->data[out->len++] = byte; + *writer++ = *reader.byte_pos++; break; } } + out->len = writer - out->bin_data; + } @@ -167,147 +208,178 @@ static void rost_unescape_string_bytes(const char *src, size_t len, sized_string * * ******************************************************************************/ -static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out) +static void rost_unescape_regex(const char *src, size_t len, sized_string_t *out) { - size_t i; /* Boucle de parcours */ + read_ptr_t reader; /* Tête de lecture */ + const bin_t *max; /* Fin du parcours */ + uint16_t half; /* Moitié de mot */ bin_t byte; /* Octet à analyser */ - bin_t next; /* Octet suivant */ + bin_t *writer; /* Tête d'écriture */ - out->len = 0; + reader.byte_pos = (const uint8_t *)src; + max = reader.byte_pos + len; - for (i = 0; i < len; i++) - { - byte = src[i]; + writer = out->bin_data; - switch (byte) + while (reader.byte_pos < max) + { + /** + * La lecture par groupes de deux octets n'est pas forcément toujours + * logique : pour "\nabc", la dernière lecture va considérer 'c"', + * incluant ainsi le caractère '"' qui a été écarté pour l'appel. + * + * Le code est cependant suffisamment souple pour ignore le superflu. + */ + switch (*reader.hword_pos) { - case '\\': - - next = src[i + 1]; - - switch (next) - { - case 'a': - out->data[out->len++] = '\a'; - break; - - case 'b': - out->data[out->len++] = '\b'; - break; - - case 't': - out->data[out->len++] = '\t'; - break; - - case 'n': - out->data[out->len++] = '\n'; - break; - - case 'v': - out->data[out->len++] = '\v'; - break; - - case 'f': - out->data[out->len++] = '\f'; - break; - - case 'r': - out->data[out->len++] = '\r'; - break; - - case 'e': - out->data[out->len++] = '\e'; - break; - - case '"': - out->data[out->len++] = '\"'; - break; + case MAKE_HWORD('\\', 'a'): + reader.hword_pos++; + *writer++ = '\a'; + break; - case '\\': - out->data[out->len++] = '\\'; - break; + case MAKE_HWORD('\\', 'b'): + reader.hword_pos++; + *writer++ = '\b'; + break; - case 'x': + case MAKE_HWORD('\\', 't'): + reader.hword_pos++; + *writer++ = '\t'; + break; - next = src[i + 2]; + case MAKE_HWORD('\\', 'n'): + reader.hword_pos++; + *writer++ = '\n'; + break; - switch (next) - { - case '0' ... '9': - out->data[out->len] = (next - '0'); - break; + case MAKE_HWORD('\\', 'v'): + reader.hword_pos++; + *writer++ = '\v'; + break; - case 'A' ... 'F': - out->data[out->len] = 0xa + (next - 'A'); - break; + case MAKE_HWORD('\\', 'f'): + reader.hword_pos++; + *writer++ = '\f'; + break; - case 'a' ... 'f': - out->data[out->len] = 0xa + (next - 'a'); - break; + case MAKE_HWORD('\\', 'r'): + reader.hword_pos++; + *writer++ = '\r'; + break; - } + case MAKE_HWORD('\\', 'e'): + reader.hword_pos++; + *writer++ = '\e'; + break; - out->data[out->len] <<= 4; + case MAKE_HWORD('\\', '"'): + reader.hword_pos++; + *writer++ = '\"'; + break; - next = src[i + 3]; + case MAKE_HWORD('\\', '\\'): + reader.hword_pos++; + *writer++ = '\\'; + break; - switch (next) - { - case '0' ... '9': - out->data[out->len] |= (next - '0'); - break; + case MAKE_HWORD('\\', 'x'): + reader.hword_pos++; + + /** + * Le jeu des expressions régulières qui amène à l'appel de + * cette fonction limite les caractères possibles à trois + * ensembles : chiffres et lettres en majuscules et minuscules. + * + * La bascule des lettres en minuscules ramène les possibles + * à deux ensembles uniquement, simplifiant ainsi les règles + * de filtrage : aucun switch case n'est ainsi requis ! + */ + + half = *reader.hword_pos++; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half & 0xff); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half >> 8); +#endif - case 'A' ... 'F': - out->data[out->len] |= 0xa + (next - 'A'); - break; + /* '0' ... '9' */ + if (byte <= '9') + *writer = (byte - '0'); - case 'a' ... 'f': - out->data[out->len] |= 0xa + (next - 'a'); - break; + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer = 0xa + (byte - 'a'); + } - } + *writer <<= 4; - out->len++; +#if __BYTE_ORDER == __LITTLE_ENDIAN + byte = (half >> 8); +#elif __BYTE_ORDER == __BIG_ENDIAN + byte = (half & 0xff); +#endif - i += 2; - break; + /* '0' ... '9' */ + if (byte <= '9') + *writer++ |= (byte - '0'); - case '{': - out->data[out->len++] = '{'; - break; + /* 'A' ... 'F' || 'a' ... 'f' */ + else + { + byte |= 0x20; + *writer++ |= 0xa + (byte - 'a'); + } - case '}': - out->data[out->len++] = '}'; - break; + break; - } + case MAKE_HWORD('\\', '{'): + reader.hword_pos++; + *writer++ = '{'; + break; - i++; + case MAKE_HWORD('\\', '}'): + reader.hword_pos++; + *writer++ = '}'; break; default: - out->data[out->len++] = byte; + *writer++ = *reader.byte_pos++; break; } } -} + out->len = writer - out->bin_data; +} #define PUSH_STATE(s) yy_push_state(s, yyscanner) #define POP_STATE yy_pop_state(yyscanner) +#define STOP_LEXER(msg, fbmsg) \ + do \ + { \ + char *__text; \ + int __ret; \ + __ret = asprintf(&__text, "%s: '%s'", msg, yytext); \ + if (__ret == -1) \ + YY_FATAL_ERROR(fbmsg); \ + else \ + { \ + YY_FATAL_ERROR(__text); \ + free(__text); \ + } \ + } \ + while (0) -#define EXTEND_BUFFER_IF_NEEDED(extra) \ - if ((*used + extra) > *allocated) \ - { \ - *allocated *= 2; \ - *buf = realloc(*buf, *allocated); \ - } +#define HANDLE_UNCOMPLETED_TOKEN \ + STOP_LEXER("Uncompleted token in rule definition", "Undisclosed uncompleted token in rule definition") %} @@ -342,7 +414,6 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out %x bytes_regex_range %x condition -%x strlit %x wait_for_colon @@ -388,7 +459,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* <inc_path>\"{str_mixed}+\" { POP_STATE; - rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0); + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); #ifndef NDEBUG /* Pour rendre plus lisibles les impressions de débogage */ @@ -411,12 +482,14 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* return RAW_RULE; } - <rule_intro>[A-Za-z0-9_]+ { + <rule_intro>{bytes_id} { yylval->sized_cstring.data = yytext; yylval->sized_cstring.len = yyleng; - return RULE_NAME; + return RULE_IDENTIFIER; } + <rule_intro>":" { return COLON; } + <rule_intro>[ \t]* { } <rule_intro>"{" { @@ -494,7 +567,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* return UNSIGNED_INTEGER; } - <meta_value>\"{str_not_escaped}+\" { + <meta_value>\"{str_not_escaped}*\" { POP_STATE; yylval->sized_cstring.data = yytext + 1; @@ -503,10 +576,10 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* return PLAIN_TEXT; } - <meta_value>\"{str_mixed}+\" { + <meta_value>\"{str_mixed}*\" { POP_STATE; - rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0); + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); #ifndef NDEBUG /* Pour rendre plus lisibles les impressions de débogage */ @@ -535,38 +608,25 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* <condition>[mM][bB] { return MB; } <condition>[gG][bB] { return GB; } -<condition>"\"" { - *used = 0; - PUSH_STATE(strlit); - } +<condition>\"{str_not_escaped}*\" { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; -<strlit>"\"" { - POP_STATE; - yylval->sized_cstring.data = *buf; - yylval->sized_cstring.len = *used; - return STRING; - } + return PLAIN_TEXT; + } -<strlit>"\\\"" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '"'; } -<strlit>"\\t" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\t'; } -<strlit>"\\r" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\r'; } -<strlit>"\\n" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\n'; } -<strlit>"\\\\" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\\'; } - -<strlit>\\x[0-9a-fA-F]{2} { - char __ch; - __ch = strtol(yytext + 2, NULL, 16); - EXTEND_BUFFER_IF_NEEDED(1); - (*buf)[(*used)++] = __ch; - } +<condition>\"{str_mixed}*\" { + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); -<strlit>[^\\\"]+ { - size_t __len; - __len = strlen(yytext); - EXTEND_BUFFER_IF_NEEDED(__len); - strcpy(&(*buf)[*used], yytext); - *used += __len; - } +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } %{ /* Définitions communes pour la section "bytes:" */ %} @@ -592,7 +652,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* <bytes_value>\"{str_mixed}+\" { POP_STATE; - rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0); + rost_unescape_string(yytext + 1, yyleng - 2, tmp_0); #ifndef NDEBUG /* Pour rendre plus lisibles les impressions de débogage */ @@ -605,173 +665,203 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* } -%{ /* Définition de motif en hexadécimal */ %} - <bytes_value>"{" { - POP_STATE; - PUSH_STATE(bytes_hex); - } - - <bytes_hex>"}" { POP_STATE; } +<bytes>[A-Za-z_][A-Za-z0-9_]* { + yylval->sized_cstring.data = yytext; + yylval->sized_cstring.len = yyleng; + return NAME; + } - <bytes_hex>"[" { - PUSH_STATE(bytes_hex_range); - return HOOK_O; - } - <bytes_hex_range>"-" { return MINUS; } + <bytes>"((" { return MOD_GROUP_O; } - <bytes_hex_range>"]" { - POP_STATE; - return HOOK_C; - } + <bytes>"))" { return MOD_GROUP_C; } - <bytes_hex>"(" { return PAREN_O; } + <bytes>"(" { return PAREN_O; } - <bytes_hex>")" { return PAREN_C; } + <bytes>")" { return PAREN_C; } - <bytes_hex>"|" { return PIPE; } + <bytes>"," { return COMMA; } - <bytes_hex>"~" { return TILDE; } - <bytes_hex>{hbyte}([ ]*{hbyte})* { - bool even; - size_t i; - bin_t byte; - bin_t value; +<bytes>\"{str_not_escaped}+\" { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; - tmp_0->len = 0; + return PLAIN_TEXT; + } - even = true; - for (i = 0; i < yyleng; i++) - { - byte = yytext[i]; - switch (byte) - { - case ' ': - continue; - break; - case '0' ... '9': - value = (byte - '0'); - break; - case 'A' ... 'F': - value = 0xa + (byte - 'A'); - break; +%{ /* Définition de motif en hexadécimal */ %} - case 'a' ... 'f': - value = 0xa + (byte - 'a'); - break; + <bytes_value>"{" { + POP_STATE; + PUSH_STATE(bytes_hex); + } - } + <bytes_hex>"}" { POP_STATE; } - if (even) - tmp_0->data[tmp_0->len] = (value << 4); - else - tmp_0->data[tmp_0->len++] |= value; + <bytes_hex>"[" { + PUSH_STATE(bytes_hex_range); + return HOOK_O; + } - even = !even; + <bytes_hex_range>"-" { return MINUS; } + <bytes_hex_range>"]" { + POP_STATE; + return HOOK_C; } - assert(even); + <bytes_hex>"(" { return PAREN_O; } -#ifndef NDEBUG - /* Pour rendre plus lisibles les impressions de débogage */ - tmp_0->data[tmp_0->len] = '\0'; -#endif + <bytes_hex>")" { return PAREN_C; } - yylval->tmp_cstring = tmp_0; - return HEX_BYTES; + <bytes_hex>"|" { return PIPE; } - } + <bytes_hex>"~" { return TILDE; } - <bytes_hex>[\?]{2}([ ]*[\?]{2})* { - unsigned long long counter; - size_t i; + <bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; - counter = 0; + tmp_0->len = 0; - for (i = 0; i < yyleng; i++) - if (yytext[i] == '?') - counter++; + even = true; - assert(counter % 2 == 0); + for (i = 0; i < yyleng; i++) + { + byte = yytext[i]; - yylval->unsigned_integer = counter / 2; - return FULL_MASK; + switch (byte) + { + case ' ': + continue; + break; - } + case '0' ... '9': + value = (byte - '0'); + break; - <bytes_hex>{mbyte}([ ]*{mbyte})* { - bool even; - size_t i; - bin_t byte; - bin_t value; + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; - tmp_0->len = 0; - tmp_1->len = 0; + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; - even = true; + } - for (i = 0; i < yyleng; i++) - { - byte = yytext[i]; + if (even) + tmp_0->data[tmp_0->len] = (value << 4); + else + tmp_0->data[tmp_0->len++] |= value; - switch (byte) - { - case ' ': - continue; - break; + even = !even; - case '?': - even = !even; - continue; - break; + } - case '0' ... '9': - value = (byte - '0'); - break; + assert(even); - case 'A' ... 'F': - value = 0xa + (byte - 'A'); - break; +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif - case 'a' ... 'f': - value = 0xa + (byte - 'a'); - break; + yylval->tmp_cstring = tmp_0; + return HEX_BYTES; - } + } - if (even) - { - tmp_0->data[tmp_0->len++] = (value << 4); - tmp_1->data[tmp_1->len++] = 0xf0; - } - else - { - tmp_0->data[tmp_0->len++] = value; - tmp_1->data[tmp_1->len++] = 0x0f; - } + <bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* { + unsigned long long counter; + size_t i; + + counter = 0; + + for (i = 0; i < yyleng; i++) + if (yytext[i] == '?') + counter++; + + assert(counter % 2 == 0); - even = !even; + yylval->unsigned_integer = counter / 2; + return FULL_MASK; } + <bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; + + tmp_0->len = 0; + tmp_1->len = 0; + + even = true; + + for (i = 0; i < yyleng; i++) + { + byte = yytext[i]; + + switch (byte) + { + case ' ': + continue; + break; + + case '?': + even = !even; + continue; + break; + + case '0' ... '9': + value = (byte - '0'); + break; + + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; + + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; + + } + + if (even) + { + tmp_0->data[tmp_0->len++] = (value << 4); + tmp_1->data[tmp_1->len++] = 0xf0; + } + else + { + tmp_0->data[tmp_0->len++] = value; + tmp_1->data[tmp_1->len++] = 0x0f; + } + + even = !even; + + } + #ifndef NDEBUG - /* Pour rendre plus lisibles les impressions de débogage */ - tmp_0->data[tmp_0->len] = '\0'; - tmp_1->data[tmp_1->len] = '\0'; + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; + tmp_1->data[tmp_1->len] = '\0'; #endif - yylval->masked.tmp_values = tmp_0; - yylval->masked.tmp_masks = tmp_1; - return SEMI_MASK; + yylval->masked.tmp_values = tmp_0; + yylval->masked.tmp_masks = tmp_1; + return SEMI_MASK; - } + } %{ /* Définition d'expressions régulières */ %} @@ -787,7 +877,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* <bytes_regex>"." { return DOT; } <bytes_regex>({regular_chars})+ { - rost_unescape_bytes(yytext, yyleng, tmp_0); + rost_unescape_regex(yytext, yyleng, tmp_0); printf(" regular: '%s'\n", yytext); @@ -844,14 +934,14 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* <bytes_regex>"{" { PUSH_STATE(bytes_regex_quantifier); - return BRACKET_O; + return BRACE_IN; } <bytes_regex_quantifier>"," { return COMMA; } <bytes_regex_quantifier>"}" { POP_STATE; - return BRACKET_C; + return BRACE_OUT; } @@ -883,9 +973,9 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* <condition>"/" { return DIV; } <condition>"%" { return MOD; } -<bytes,condition>"(" { return PAREN_O; } -<bytes,condition>")" { return PAREN_C; } -<bytes,condition>"," { return COMMA; } +<condition>"(" { return PAREN_O; } +<condition>")" { return PAREN_C; } +<condition>"," { return COMMA; } <condition>"[" { return HOOK_O; } @@ -921,30 +1011,54 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* return BYTES_ID_COUNTER; } + <condition>#{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_COUNTER; + } + <condition>@{bytes_id} { yylval->sized_cstring.data = yytext + 1; yylval->sized_cstring.len = yyleng - 1; return BYTES_ID_START; } + <condition>@{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_START; + } + <condition>!{bytes_id} { yylval->sized_cstring.data = yytext + 1; yylval->sized_cstring.len = yyleng - 1; return BYTES_ID_LENGTH; } + <condition>!{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_LENGTH; + } + <condition>~{bytes_id} { yylval->sized_cstring.data = yytext + 1; yylval->sized_cstring.len = yyleng - 1; return BYTES_ID_END; } + <condition>~{bytes_fuzzy_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_FUZZY_ID_END; + } + -<bytes,condition>[A-Za-z_][A-Za-z0-9_]* { +<condition>[A-Za-z_][A-Za-z0-9_]* { yylval->sized_cstring.data = yytext; yylval->sized_cstring.len = yyleng; return NAME; @@ -953,41 +1067,150 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* -<bytes_value>"\"" { - POP_STATE; - // *built_pattern = g_bytes_pattern_new(); - PUSH_STATE(bytes_value_raw); - } -<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return 11111/*MASKED_STRING*/; } -<bytes_value_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); } -<bytes_value_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); } -<bytes_value_raw>"\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); } -<bytes_value_raw>"\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); } -<bytes_value_raw>"\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); } -<bytes_value_raw>\\x[0-9a-fA-F]{2} { - uint8_t __ch; - __ch = strtol(yytext + 2, NULL, 16); - printf("__ch: %hhx\n", __ch); - //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); - } +%{ /* Commentaires */ %} + +<*>"/*" { PUSH_STATE(comment); } +<comment>"*/" { POP_STATE; } +<comment>(.|\n) { } -<bytes_value_raw>. { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); } +<*>"//"[^\n]* { } +%{ /* Suppression du besoin de sauvegardes pour retours en arrière */ %} +"i" { HANDLE_UNCOMPLETED_TOKEN; } +"in" { HANDLE_UNCOMPLETED_TOKEN; } +"inc" { HANDLE_UNCOMPLETED_TOKEN; } +"incl" { HANDLE_UNCOMPLETED_TOKEN; } +"inclu" { HANDLE_UNCOMPLETED_TOKEN; } +"includ" { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\" { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"\\ { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"\\x { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<inc_path>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } -%{ /* Commentaires */ %} +"g" { HANDLE_UNCOMPLETED_TOKEN; } +"gl" { HANDLE_UNCOMPLETED_TOKEN; } +"glo" { HANDLE_UNCOMPLETED_TOKEN; } +"glob" { HANDLE_UNCOMPLETED_TOKEN; } +"globa" { HANDLE_UNCOMPLETED_TOKEN; } -<*>"/*" { PUSH_STATE(comment); } -<comment>"*/" { POP_STATE; } -<comment>(.|\n) { } +"p" { HANDLE_UNCOMPLETED_TOKEN; } +"pr" { HANDLE_UNCOMPLETED_TOKEN; } +"pri" { HANDLE_UNCOMPLETED_TOKEN; } +"priv" { HANDLE_UNCOMPLETED_TOKEN; } +"priva" { HANDLE_UNCOMPLETED_TOKEN; } +"privat" { HANDLE_UNCOMPLETED_TOKEN; } -<*>"//"[^\n]* { } +"r" { HANDLE_UNCOMPLETED_TOKEN; } +"ru" { HANDLE_UNCOMPLETED_TOKEN; } +"rul" { HANDLE_UNCOMPLETED_TOKEN; } + +<raw_block>"m" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block>"me" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block>"met" { HANDLE_UNCOMPLETED_TOKEN; } + +<raw_block,meta>"b" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta>"by" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta>"byt" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta>"byte" { HANDLE_UNCOMPLETED_TOKEN; } + +<raw_block,meta,bytes>"c" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"co" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"con" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"cond" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"condi" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"condit" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"conditi" { HANDLE_UNCOMPLETED_TOKEN; } +<raw_block,meta,bytes>"conditio" { HANDLE_UNCOMPLETED_TOKEN; } + + +<meta_value>"t" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"tr" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"tru" { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>"f" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"fa" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"fal" { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>"fals" { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>-0x { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>0x { HANDLE_UNCOMPLETED_TOKEN; } + +<meta_value>\"{str_mixed}* { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>\"{str_mixed}*\\ { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>\"{str_mixed}*\\x { HANDLE_UNCOMPLETED_TOKEN; } +<meta_value>\"{str_mixed}*\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<condition>-0x { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex_range,bytes_regex_quantifier,condition>0x { HANDLE_UNCOMPLETED_TOKEN; } + + +<condition>\"{str_not_escaped}* { HANDLE_UNCOMPLETED_TOKEN; } + +<condition>\" { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"\\ { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"\\x { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<condition>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_value>\" { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_value>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F]/[^?] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\?/[^?] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_regex>\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>({regular_chars})+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>({regular_chars})+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex>({regular_chars})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } + + +<bytes_regex>({reg_classes})+\\ + + +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\ { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>\\x { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_regex_range>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } %{ /* Actions par défaut */ %} |
