summaryrefslogtreecommitdiff
path: root/src/analysis/scan/tokens.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r--src/analysis/scan/tokens.l1236
1 files changed, 1236 insertions, 0 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l
new file mode 100644
index 0000000..e075cee
--- /dev/null
+++ b/src/analysis/scan/tokens.l
@@ -0,0 +1,1236 @@
+
+%top {
+
+#include "grammar.h"
+
+}
+
+
+%{
+
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+
+/* Tête de lecture pour conversions */
+typedef union _read_ptr_t
+{
+ const uint8_t *byte_pos; /* Lecture par blocs de 8 bits */
+ const uint16_t *hword_pos; /* Lecture par blocs de 16 bits*/
+
+} read_ptr_t;
+
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch2 << 8 | ch1))
+
+#elif __BYTE_ORDER == __BIG_ENDIAN
+
+# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch1 << 8 | ch2))
+
+#else
+
+ /* __PDP_ENDIAN et Cie... */
+# error "Congratulations! Your byte order is not supported!"
+
+#endif
+
+
+
+/******************************************************************************
+* *
+* Paramètres : src = liste d'octets à traiter. *
+* len = taille de cette liste. *
+* out = série d'octets bruts obtenue. [OUT] *
+* *
+* Description : Transcrit une série d'octets en en remplaçant certains. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void rost_unescape_string(const char *src, size_t len, sized_string_t *out)
+{
+ read_ptr_t reader; /* Tête de lecture */
+ const bin_t *max; /* Fin du parcours */
+ uint16_t half; /* Moitié de mot */
+ bin_t byte; /* Octet à analyser */
+ bin_t *writer; /* Tête d'écriture */
+
+ reader.byte_pos = (const uint8_t *)src;
+ max = reader.byte_pos + len;
+
+ writer = out->bin_data;
+
+ while (reader.byte_pos < max)
+ {
+ /**
+ * La lecture par groupes de deux octets n'est pas forcément toujours
+ * logique : pour "\nabc", la dernière lecture va considérer 'c"',
+ * incluant ainsi le caractère '"' qui a été écarté pour l'appel.
+ *
+ * Le code est cependant suffisamment souple pour ignore le superflu.
+ */
+ switch (*reader.hword_pos)
+ {
+ case MAKE_HWORD('\\', 'a'):
+ reader.hword_pos++;
+ *writer++ = '\a';
+ break;
+
+ case MAKE_HWORD('\\', 'b'):
+ reader.hword_pos++;
+ *writer++ = '\b';
+ break;
+
+ case MAKE_HWORD('\\', 't'):
+ reader.hword_pos++;
+ *writer++ = '\t';
+ break;
+
+ case MAKE_HWORD('\\', 'n'):
+ reader.hword_pos++;
+ *writer++ = '\n';
+ break;
+
+ case MAKE_HWORD('\\', 'v'):
+ reader.hword_pos++;
+ *writer++ = '\v';
+ break;
+
+ case MAKE_HWORD('\\', 'f'):
+ reader.hword_pos++;
+ *writer++ = '\f';
+ break;
+
+ case MAKE_HWORD('\\', 'r'):
+ reader.hword_pos++;
+ *writer++ = '\r';
+ break;
+
+ case MAKE_HWORD('\\', 'e'):
+ reader.hword_pos++;
+ *writer++ = '\e';
+ break;
+
+ case MAKE_HWORD('\\', '"'):
+ reader.hword_pos++;
+ *writer++ = '\"';
+ break;
+
+ case MAKE_HWORD('\\', '\\'):
+ reader.hword_pos++;
+ *writer++ = '\\';
+ break;
+
+ case MAKE_HWORD('\\', 'x'):
+ reader.hword_pos++;
+
+ /**
+ * Le jeu des expressions régulières qui amène à l'appel de
+ * cette fonction limite les caractères possibles à trois
+ * ensembles : chiffres et lettres en majuscules et minuscules.
+ *
+ * La bascule des lettres en minuscules ramène les possibles
+ * à deux ensembles uniquement, simplifiant ainsi les règles
+ * de filtrage : aucun switch case n'est ainsi requis !
+ */
+
+ half = *reader.hword_pos++;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half & 0xff);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half >> 8);
+#endif
+
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer = (byte - '0');
+
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer = 0xa + (byte - 'a');
+ }
+
+ *writer <<= 4;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half >> 8);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half & 0xff);
+#endif
+
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer++ |= (byte - '0');
+
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer++ |= 0xa + (byte - 'a');
+ }
+
+ break;
+
+ default:
+ *writer++ = *reader.byte_pos++;
+ break;
+
+ }
+
+ }
+
+ out->len = writer - out->bin_data;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : src = liste d'octets à traiter. *
+* len = taille de cette liste. *
+* out = série d'octets bruts obtenue. [OUT] *
+* *
+* Description : Transcrit une série d'octets en en remplaçant certains. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void rost_unescape_regex(const char *src, size_t len, sized_string_t *out)
+{
+ read_ptr_t reader; /* Tête de lecture */
+ const bin_t *max; /* Fin du parcours */
+ uint16_t half; /* Moitié de mot */
+ bin_t byte; /* Octet à analyser */
+ bin_t *writer; /* Tête d'écriture */
+
+ reader.byte_pos = (const uint8_t *)src;
+ max = reader.byte_pos + len;
+
+ writer = out->bin_data;
+
+ while (reader.byte_pos < max)
+ {
+ /**
+ * La lecture par groupes de deux octets n'est pas forcément toujours
+ * logique : pour "\nabc", la dernière lecture va considérer 'c"',
+ * incluant ainsi le caractère '"' qui a été écarté pour l'appel.
+ *
+ * Le code est cependant suffisamment souple pour ignore le superflu.
+ */
+ switch (*reader.hword_pos)
+ {
+ case MAKE_HWORD('\\', 'a'):
+ reader.hword_pos++;
+ *writer++ = '\a';
+ break;
+
+ case MAKE_HWORD('\\', 'b'):
+ reader.hword_pos++;
+ *writer++ = '\b';
+ break;
+
+ case MAKE_HWORD('\\', 't'):
+ reader.hword_pos++;
+ *writer++ = '\t';
+ break;
+
+ case MAKE_HWORD('\\', 'n'):
+ reader.hword_pos++;
+ *writer++ = '\n';
+ break;
+
+ case MAKE_HWORD('\\', 'v'):
+ reader.hword_pos++;
+ *writer++ = '\v';
+ break;
+
+ case MAKE_HWORD('\\', 'f'):
+ reader.hword_pos++;
+ *writer++ = '\f';
+ break;
+
+ case MAKE_HWORD('\\', 'r'):
+ reader.hword_pos++;
+ *writer++ = '\r';
+ break;
+
+ case MAKE_HWORD('\\', 'e'):
+ reader.hword_pos++;
+ *writer++ = '\e';
+ break;
+
+ case MAKE_HWORD('\\', '"'):
+ reader.hword_pos++;
+ *writer++ = '\"';
+ break;
+
+ case MAKE_HWORD('\\', '\\'):
+ reader.hword_pos++;
+ *writer++ = '\\';
+ break;
+
+ case MAKE_HWORD('\\', 'x'):
+ reader.hword_pos++;
+
+ /**
+ * Le jeu des expressions régulières qui amène à l'appel de
+ * cette fonction limite les caractères possibles à trois
+ * ensembles : chiffres et lettres en majuscules et minuscules.
+ *
+ * La bascule des lettres en minuscules ramène les possibles
+ * à deux ensembles uniquement, simplifiant ainsi les règles
+ * de filtrage : aucun switch case n'est ainsi requis !
+ */
+
+ half = *reader.hword_pos++;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half & 0xff);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half >> 8);
+#endif
+
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer = (byte - '0');
+
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer = 0xa + (byte - 'a');
+ }
+
+ *writer <<= 4;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half >> 8);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half & 0xff);
+#endif
+
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer++ |= (byte - '0');
+
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer++ |= 0xa + (byte - 'a');
+ }
+
+ break;
+
+ case MAKE_HWORD('\\', '{'):
+ reader.hword_pos++;
+ *writer++ = '{';
+ break;
+
+ case MAKE_HWORD('\\', '}'):
+ reader.hword_pos++;
+ *writer++ = '}';
+ break;
+
+ default:
+ *writer++ = *reader.byte_pos++;
+ break;
+
+ }
+
+ }
+
+ out->len = writer - out->bin_data;
+
+}
+
+
+#define PUSH_STATE(s) yy_push_state(s, yyscanner)
+#define POP_STATE yy_pop_state(yyscanner)
+
+
+#define STOP_LEXER(msg, fbmsg) \
+ do \
+ { \
+ char *__text; \
+ int __ret; \
+ __ret = asprintf(&__text, "%s: '%s'", msg, yytext); \
+ if (__ret == -1) \
+ YY_FATAL_ERROR(fbmsg); \
+ else \
+ { \
+ YY_FATAL_ERROR(__text); \
+ free(__text); \
+ } \
+ } \
+ while (0)
+
+#define HANDLE_UNCOMPLETED_TOKEN \
+ STOP_LEXER("Uncompleted token in rule definition", "Undisclosed uncompleted token in rule definition")
+
+
+%}
+
+
+%option bison-bridge reentrant
+%option stack
+%option nounput
+%option noinput
+%option noyywrap
+%option noyy_top_state
+%option yylineno
+%option never-interactive
+
+%x inc_path
+
+%x rule_intro
+%x raw_block
+
+%x meta
+%x meta_value
+
+%x bytes
+%x bytes_value
+%x bytes_value_raw
+
+%x bytes_hex
+%x bytes_hex_range
+
+%x bytes_regex
+%x bytes_regex_quantifier
+%x bytes_regex_range
+
+%x condition
+
+%x wait_for_colon
+
+
+%x comment
+
+
+str_not_escaped [^\"\\]
+str_escaped \\a|\\b|\\t|\\n|\\v|\\f|\\r|\\e|\\\"|\\\\|\\x{hbyte}
+str_mixed ({str_not_escaped}|{str_escaped})
+
+hbyte [0-9a-fA-F]{2}
+mbyte (\?[0-9a-fA-F]|[0-9a-fA-F]\?)
+
+reg_allowed [^^$.|/{}()\[\]*+?\\]
+reg_allowed_escaped \\^|\\$|\\\.|\\\||\\\/|\\\{|\\\}|\\\(|\\\)|\\\[|\\\]|\\\*|\\\+|\\\?|\\\\
+reg_escaped \\a|\\t|\\n|\\v|\\f|\\r
+reg_byte \\x[0-9a-fA-F]{2}
+
+regular_chars {reg_allowed}|{reg_allowed_escaped}|{reg_escaped}|{reg_byte}
+
+reg_classes \\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B
+
+
+bytes_id [A-Za-z_][A-Za-z0-9_]*
+bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
+
+
+%%
+
+
+"include" { PUSH_STATE(inc_path); return INCLUDE; }
+
+<inc_path>\"{str_not_escaped}+\" {
+ POP_STATE;
+
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
+
+ return PLAIN_TEXT;
+ }
+
+<inc_path>\"{str_mixed}+\" {
+ POP_STATE;
+
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
+
+
+%{ /* Définition locale d'une règle */ %}
+
+ "global" { return GLOBAL; }
+ "private" { return PRIVATE; }
+
+ "rule" {
+ PUSH_STATE(rule_intro);
+ return RAW_RULE;
+ }
+
+ <rule_intro>{bytes_id} {
+ yylval->sized_cstring.data = yytext;
+ yylval->sized_cstring.len = yyleng;
+ return RULE_IDENTIFIER;
+ }
+
+ <rule_intro>":" { return COLON; }
+
+ <rule_intro>[ \t]* { }
+
+ <rule_intro>"{" {
+ POP_STATE;
+ PUSH_STATE(raw_block);
+ return BRACE_IN;
+ }
+
+ <raw_block>"meta" {
+ POP_STATE;
+ PUSH_STATE(meta);
+ PUSH_STATE(wait_for_colon);
+ return META;
+ }
+
+ <raw_block,meta>"bytes" {
+ POP_STATE;
+ PUSH_STATE(bytes);
+ PUSH_STATE(wait_for_colon);
+ return BYTES;
+ }
+
+ <raw_block,meta,bytes>"condition" {
+ POP_STATE;
+ PUSH_STATE(condition);
+ PUSH_STATE(wait_for_colon);
+ return CONDITION;
+ }
+
+ <wait_for_colon>":" {
+ POP_STATE;
+ return COLON;
+ }
+
+<raw_block,meta,bytes,condition>"}" {
+ POP_STATE;
+ return BRACE_OUT;
+ }
+
+
+%{ /* Définitions communes pour la section "meta:" */ %}
+
+ <meta>{bytes_id} {
+ yylval->sized_cstring.data = yytext;
+ yylval->sized_cstring.len = yyleng;
+ return INFO_KEY;
+ }
+
+ <meta>"=" { PUSH_STATE(meta_value); return ASSIGN; }
+
+ <meta_value>"true" { POP_STATE; return TRUE_; }
+ <meta_value>"false" { POP_STATE; return FALSE_; }
+
+ <meta_value>-(0|[1-9][0-9]*) {
+ POP_STATE;
+ yylval->signed_integer = strtoll(yytext, NULL, 10);
+ return SIGNED_INTEGER;
+ }
+
+ <meta_value>-0x[0-9a-f]+ {
+ POP_STATE;
+ yylval->signed_integer = strtoll(yytext, NULL, 16);
+ return SIGNED_INTEGER;
+ }
+
+ <meta_value>(0|[1-9][0-9]*) {
+ POP_STATE;
+ yylval->unsigned_integer = strtoull(yytext, NULL, 10);
+ return UNSIGNED_INTEGER;
+ }
+
+ <meta_value>0x[0-9a-f]+ {
+ POP_STATE;
+ yylval->unsigned_integer = strtoull(yytext, NULL, 16);
+ return UNSIGNED_INTEGER;
+ }
+
+ <meta_value>\"{str_not_escaped}*\" {
+ POP_STATE;
+
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
+
+ return PLAIN_TEXT;
+ }
+
+ <meta_value>\"{str_mixed}*\" {
+ POP_STATE;
+
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
+
+
+%{ /* A déplacer... */ %}
+
+
+<condition>"true" { return TRUE_; }
+<condition>"false" { return FALSE_; }
+
+<condition>-(0|[1-9][0-9]*) { yylval->signed_integer = strtoll(yytext, NULL, 10); return SIGNED_INTEGER; }
+<condition>-0x[0-9a-f]+ { yylval->signed_integer = strtoll(yytext, NULL, 16); return SIGNED_INTEGER; }
+
+<bytes_hex_range,bytes_regex_quantifier,condition>(0|[1-9][0-9]*) { yylval->unsigned_integer = strtoull(yytext, NULL, 10); return UNSIGNED_INTEGER; }
+<bytes_hex_range,bytes_regex_quantifier,condition>0x[0-9a-f]+ { yylval->unsigned_integer = strtoull(yytext, NULL, 16); return UNSIGNED_INTEGER; }
+
+<condition>[kK][bB] { return KB; }
+<condition>[mM][bB] { return MB; }
+<condition>[gG][bB] { return GB; }
+
+<condition>\"{str_not_escaped}*\" {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
+
+ return PLAIN_TEXT;
+ }
+
+<condition>\"{str_mixed}*\" {
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
+
+
+%{ /* Définitions communes pour la section "bytes:" */ %}
+
+ <bytes>"fullword" { return FULLWORD; }
+ <bytes>"nocase" { return NOCASE; }
+ <bytes>"private" { return PRIVATE; }
+
+ <bytes>"=" { PUSH_STATE(bytes_value); return ASSIGN; }
+
+
+%{ /* Définition de motif en texte brut */ %}
+
+<bytes_value>\"{str_not_escaped}+\" {
+ POP_STATE;
+
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
+
+ return PLAIN_TEXT;
+ }
+
+<bytes_value>\"{str_mixed}+\" {
+ POP_STATE;
+
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
+
+
+
+<bytes>[A-Za-z_][A-Za-z0-9_]* {
+ yylval->sized_cstring.data = yytext;
+ yylval->sized_cstring.len = yyleng;
+ return NAME;
+ }
+
+
+ <bytes>"((" { return MOD_GROUP_O; }
+
+ <bytes>"))" { return MOD_GROUP_C; }
+
+ <bytes>"(" { return PAREN_O; }
+
+ <bytes>")" { return PAREN_C; }
+
+ <bytes>"," { return COMMA; }
+
+
+<bytes>\"{str_not_escaped}+\" {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
+
+ return PLAIN_TEXT;
+ }
+
+
+
+
+
+%{ /* Définition de motif en hexadécimal */ %}
+
+ <bytes_value>"{" {
+ POP_STATE;
+ PUSH_STATE(bytes_hex);
+ }
+
+ <bytes_hex>"}" { POP_STATE; }
+
+ <bytes_hex>"[" {
+ PUSH_STATE(bytes_hex_range);
+ return HOOK_O;
+ }
+
+ <bytes_hex_range>"-" { return MINUS; }
+
+ <bytes_hex_range>"]" {
+ POP_STATE;
+ return HOOK_C;
+ }
+
+ <bytes_hex>"(" { return PAREN_O; }
+
+ <bytes_hex>")" { return PAREN_C; }
+
+ <bytes_hex>"|" { return PIPE; }
+
+ <bytes_hex>"~" { return TILDE; }
+
+ <bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* {
+ bool even;
+ size_t i;
+ bin_t byte;
+ bin_t value;
+
+ tmp_0->len = 0;
+
+ even = true;
+
+ for (i = 0; i < yyleng; i++)
+ {
+ byte = yytext[i];
+
+ switch (byte)
+ {
+ case ' ':
+ continue;
+ break;
+
+ case '0' ... '9':
+ value = (byte - '0');
+ break;
+
+ case 'A' ... 'F':
+ value = 0xa + (byte - 'A');
+ break;
+
+ case 'a' ... 'f':
+ value = 0xa + (byte - 'a');
+ break;
+
+ }
+
+ if (even)
+ tmp_0->data[tmp_0->len] = (value << 4);
+ else
+ tmp_0->data[tmp_0->len++] |= value;
+
+ even = !even;
+
+ }
+
+ assert(even);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+ return HEX_BYTES;
+
+ }
+
+ <bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* {
+ unsigned long long counter;
+ size_t i;
+
+ counter = 0;
+
+ for (i = 0; i < yyleng; i++)
+ if (yytext[i] == '?')
+ counter++;
+
+ assert(counter % 2 == 0);
+
+ yylval->unsigned_integer = counter / 2;
+ return FULL_MASK;
+
+ }
+
+ <bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* {
+ bool even;
+ size_t i;
+ bin_t byte;
+ bin_t value;
+
+ tmp_0->len = 0;
+ tmp_1->len = 0;
+
+ even = true;
+
+ for (i = 0; i < yyleng; i++)
+ {
+ byte = yytext[i];
+
+ switch (byte)
+ {
+ case ' ':
+ continue;
+ break;
+
+ case '?':
+ even = !even;
+ continue;
+ break;
+
+ case '0' ... '9':
+ value = (byte - '0');
+ break;
+
+ case 'A' ... 'F':
+ value = 0xa + (byte - 'A');
+ break;
+
+ case 'a' ... 'f':
+ value = 0xa + (byte - 'a');
+ break;
+
+ }
+
+ if (even)
+ {
+ tmp_0->data[tmp_0->len++] = (value << 4);
+ tmp_1->data[tmp_1->len++] = 0xf0;
+ }
+ else
+ {
+ tmp_0->data[tmp_0->len++] = value;
+ tmp_1->data[tmp_1->len++] = 0x0f;
+ }
+
+ even = !even;
+
+ }
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+ tmp_1->data[tmp_1->len] = '\0';
+#endif
+
+ yylval->masked.tmp_values = tmp_0;
+ yylval->masked.tmp_masks = tmp_1;
+ return SEMI_MASK;
+
+ }
+
+
+%{ /* Définition d'expressions régulières */ %}
+
+ <bytes_value>"/" {
+ POP_STATE;
+ printf(" -- regex\n");
+ PUSH_STATE(bytes_regex);
+ }
+
+ <bytes_regex>"/" { printf("exit regex\n"); POP_STATE; }
+
+ <bytes_regex>"." { return DOT; }
+
+ <bytes_regex>({regular_chars})+ {
+ rost_unescape_regex(yytext, yyleng, tmp_0);
+
+ printf(" regular: '%s'\n", yytext);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+ return REGEX_BYTES;
+
+ }
+
+ <bytes_regex>({reg_classes})+ {
+
+ return REGEX_CLASSES;
+
+ }
+
+%{ /* <bytes_regex>\[({regular_chars}|({regular_chars})-z|{reg_classes})+\] { */ %}
+
+
+ <bytes_regex>"[" {
+ PUSH_STATE(bytes_regex_range);
+ printf(" !! entering range\n");
+ return HOOK_O;
+ }
+
+ <bytes_regex_range>"]" {
+ POP_STATE;
+ printf(" !! exiting range\n");
+ return HOOK_C;
+ }
+
+
+
+
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+ {
+
+ printf("range: '%s'\n", yytext);
+ return REGEX_RANGE;
+
+ }
+
+ <bytes_regex>"(" { return PAREN_O; }
+
+ <bytes_regex>")" { return PAREN_C; }
+
+ <bytes_regex>"|" { return PIPE; }
+
+ <bytes_regex>"*" { return MUL; }
+ <bytes_regex>"+" { return PLUS; }
+ <bytes_regex>"?" { return QUESTION; }
+
+ <bytes_regex>"{" {
+ PUSH_STATE(bytes_regex_quantifier);
+ return BRACE_IN;
+ }
+
+ <bytes_regex_quantifier>"," { return COMMA; }
+
+ <bytes_regex_quantifier>"}" {
+ POP_STATE;
+ return BRACE_OUT;
+ }
+
+
+%{ /* Condition de correspondance */ %}
+
+<condition>"and" { return AND; }
+<condition>"or" { return OR; }
+<condition>"not" { return NOT; }
+
+<condition>"<" { return LT; }
+<condition>"<=" { return LE; }
+<condition>"==" { return EQ; }
+<condition>"!=" { return NE; }
+<condition>">" { return GT; }
+<condition>">=" { return GE; }
+
+<condition>"contains" { return CONTAINS; }
+<condition>"startswith" { return STARTSWITH; }
+<condition>"endswith" { return ENDSWITH; }
+<condition>"matches" { return MATCHES; }
+<condition>"icontains" { return ICONTAINS; }
+<condition>"istartswith" { return ISTARTSWITH; }
+<condition>"iendswith" { return IENDSWITH; }
+<condition>"iequals" { return IEQUALS; }
+
+<condition>"+" { return PLUS; }
+<condition>"-" { return MINUS; }
+<condition>"*" { return MUL; }
+<condition>"/" { return DIV; }
+<condition>"%" { return MOD; }
+
+<condition>"(" { return PAREN_O; }
+<condition>")" { return PAREN_C; }
+<condition>"," { return COMMA; }
+
+
+<condition>"[" { return HOOK_O; }
+<condition>"]" { return HOOK_C; }
+
+
+<condition>"." { return DOT; }
+<bytes>"|" { return PIPE; }
+
+<condition>"none" { return NONE; }
+<condition>"any" { return ANY; }
+<condition>"all" { return ALL; }
+<condition>"of" { return OF; }
+<condition>"them" { return THEM; }
+<condition>"in" { return IN; }
+
+
+ <bytes,condition>${bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID;
+ }
+
+ <condition>${bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID;
+ }
+
+ <condition>#{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_COUNTER;
+ }
+
+ <condition>#{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_COUNTER;
+ }
+
+ <condition>@{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_START;
+ }
+
+ <condition>@{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_START;
+ }
+
+ <condition>!{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_LENGTH;
+ }
+
+ <condition>!{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_LENGTH;
+ }
+
+ <condition>~{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_END;
+ }
+
+ <condition>~{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_END;
+ }
+
+
+
+
+
+
+<condition>[A-Za-z_][A-Za-z0-9_]* {
+ yylval->sized_cstring.data = yytext;
+ yylval->sized_cstring.len = yyleng;
+ return NAME;
+ }
+
+
+
+
+
+
+
+%{ /* Commentaires */ %}
+
+<*>"/*" { PUSH_STATE(comment); }
+<comment>"*/" { POP_STATE; }
+<comment>(.|\n) { }
+
+<*>"//"[^\n]* { }
+
+
+%{ /* Suppression du besoin de sauvegardes pour retours en arrière */ %}
+
+"i" { HANDLE_UNCOMPLETED_TOKEN; }
+"in" { HANDLE_UNCOMPLETED_TOKEN; }
+"inc" { HANDLE_UNCOMPLETED_TOKEN; }
+"incl" { HANDLE_UNCOMPLETED_TOKEN; }
+"inclu" { HANDLE_UNCOMPLETED_TOKEN; }
+"includ" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<inc_path>\" { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; }
+
+<inc_path>\"\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+"g" { HANDLE_UNCOMPLETED_TOKEN; }
+"gl" { HANDLE_UNCOMPLETED_TOKEN; }
+"glo" { HANDLE_UNCOMPLETED_TOKEN; }
+"glob" { HANDLE_UNCOMPLETED_TOKEN; }
+"globa" { HANDLE_UNCOMPLETED_TOKEN; }
+
+"p" { HANDLE_UNCOMPLETED_TOKEN; }
+"pr" { HANDLE_UNCOMPLETED_TOKEN; }
+"pri" { HANDLE_UNCOMPLETED_TOKEN; }
+"priv" { HANDLE_UNCOMPLETED_TOKEN; }
+"priva" { HANDLE_UNCOMPLETED_TOKEN; }
+"privat" { HANDLE_UNCOMPLETED_TOKEN; }
+
+"r" { HANDLE_UNCOMPLETED_TOKEN; }
+"ru" { HANDLE_UNCOMPLETED_TOKEN; }
+"rul" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<raw_block>"m" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block>"me" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block>"met" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<raw_block,meta>"b" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta>"by" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta>"byt" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta>"byte" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<raw_block,meta,bytes>"c" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"co" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"con" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"cond" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"condi" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"condit" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"conditi" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"conditio" { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<meta_value>"t" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"tr" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"tru" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>"f" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"fa" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"fal" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"fals" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>-0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>\"{str_mixed}* { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>\"{str_mixed}*\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>\"{str_mixed}*\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>\"{str_mixed}*\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<condition>-0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex_range,bytes_regex_quantifier,condition>0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<condition>\"{str_not_escaped}* { HANDLE_UNCOMPLETED_TOKEN; }
+
+<condition>\" { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_value>\" { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F]/[^?] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\?/[^?] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_regex>\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>({regular_chars})+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>({regular_chars})+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>({regular_chars})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_regex>({reg_classes})+\\
+
+
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+%{ /* Actions par défaut */ %}
+
+<*>[ \t]+ { }
+
+<*>[\n] { static int ln = 1; if (0) printf("----------- %%< -------------- %%< ---- %d\n", ln++); }
+
+<*>. {
+ char *msg;
+ int ret;
+ ret = asprintf(&msg, "Unhandled token in rule definition: '%s'", yytext);
+ if (ret == -1)
+ YY_FATAL_ERROR("Unhandled token in undisclosed rule definition");
+ else
+ {
+ YY_FATAL_ERROR(msg);
+ free(msg);
+ }
+ }
+
+
+%%