summaryrefslogtreecommitdiff
path: root/src/analysis/scan/tokens.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r--src/analysis/scan/tokens.l961
1 files changed, 592 insertions, 369 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l
index 1a17344..e075cee 100644
--- a/src/analysis/scan/tokens.l
+++ b/src/analysis/scan/tokens.l
@@ -8,13 +8,35 @@
%{
-//#include "manual.h"
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
+/* Tête de lecture pour conversions */
+typedef union _read_ptr_t
+{
+ const uint8_t *byte_pos; /* Lecture par blocs de 8 bits */
+ const uint16_t *hword_pos; /* Lecture par blocs de 16 bits*/
+
+} read_ptr_t;
+
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch2 << 8 | ch1))
+
+#elif __BYTE_ORDER == __BIG_ENDIAN
+
+# define MAKE_HWORD(ch1, ch2) ((uint16_t)(ch1 << 8 | ch2))
+
+#else
+
+ /* __PDP_ENDIAN et Cie... */
+# error "Congratulations! Your byte order is not supported!"
+
+#endif
@@ -32,124 +54,143 @@
* *
******************************************************************************/
-static void rost_unescape_string_bytes(const char *src, size_t len, sized_string_t *out)
+static void rost_unescape_string(const char *src, size_t len, sized_string_t *out)
{
- size_t i; /* Boucle de parcours */
+ read_ptr_t reader; /* Tête de lecture */
+ const bin_t *max; /* Fin du parcours */
+ uint16_t half; /* Moitié de mot */
bin_t byte; /* Octet à analyser */
- bin_t next; /* Octet suivant */
+ bin_t *writer; /* Tête d'écriture */
- out->len = 0;
+ reader.byte_pos = (const uint8_t *)src;
+ max = reader.byte_pos + len;
- for (i = 0; i < len; i++)
- {
- byte = src[i];
+ writer = out->bin_data;
- switch (byte)
+ while (reader.byte_pos < max)
+ {
+ /**
+ * La lecture par groupes de deux octets n'est pas forcément toujours
+ * logique : pour "\nabc", la dernière lecture va considérer 'c"',
+ * incluant ainsi le caractère '"' qui a été écarté pour l'appel.
+ *
+ * Le code est cependant suffisamment souple pour ignore le superflu.
+ */
+ switch (*reader.hword_pos)
{
- case '\\':
-
- next = src[i + 1];
-
- switch (next)
- {
- case 'a':
- out->data[out->len++] = '\a';
- break;
-
- case 'b':
- out->data[out->len++] = '\b';
- break;
-
- case 't':
- out->data[out->len++] = '\t';
- break;
-
- case 'n':
- out->data[out->len++] = '\n';
- break;
-
- case 'v':
- out->data[out->len++] = '\v';
- break;
-
- case 'f':
- out->data[out->len++] = '\f';
- break;
-
- case 'r':
- out->data[out->len++] = '\r';
- break;
-
- case 'e':
- out->data[out->len++] = '\e';
- break;
-
- case '"':
- out->data[out->len++] = '\"';
- break;
+ case MAKE_HWORD('\\', 'a'):
+ reader.hword_pos++;
+ *writer++ = '\a';
+ break;
- case '\\':
- out->data[out->len++] = '\\';
- break;
+ case MAKE_HWORD('\\', 'b'):
+ reader.hword_pos++;
+ *writer++ = '\b';
+ break;
- case 'x':
+ case MAKE_HWORD('\\', 't'):
+ reader.hword_pos++;
+ *writer++ = '\t';
+ break;
- next = src[i + 2];
+ case MAKE_HWORD('\\', 'n'):
+ reader.hword_pos++;
+ *writer++ = '\n';
+ break;
- switch (next)
- {
- case '0' ... '9':
- out->data[out->len] = (next - '0');
- break;
+ case MAKE_HWORD('\\', 'v'):
+ reader.hword_pos++;
+ *writer++ = '\v';
+ break;
- case 'A' ... 'F':
- out->data[out->len] = 0xa + (next - 'A');
- break;
+ case MAKE_HWORD('\\', 'f'):
+ reader.hword_pos++;
+ *writer++ = '\f';
+ break;
- case 'a' ... 'f':
- out->data[out->len] = 0xa + (next - 'a');
- break;
+ case MAKE_HWORD('\\', 'r'):
+ reader.hword_pos++;
+ *writer++ = '\r';
+ break;
- }
+ case MAKE_HWORD('\\', 'e'):
+ reader.hword_pos++;
+ *writer++ = '\e';
+ break;
- out->data[out->len] <<= 4;
+ case MAKE_HWORD('\\', '"'):
+ reader.hword_pos++;
+ *writer++ = '\"';
+ break;
- next = src[i + 3];
+ case MAKE_HWORD('\\', '\\'):
+ reader.hword_pos++;
+ *writer++ = '\\';
+ break;
- switch (next)
- {
- case '0' ... '9':
- out->data[out->len] |= (next - '0');
- break;
+ case MAKE_HWORD('\\', 'x'):
+ reader.hword_pos++;
+
+ /**
+ * Le jeu des expressions régulières qui amène à l'appel de
+ * cette fonction limite les caractères possibles à trois
+ * ensembles : chiffres et lettres en majuscules et minuscules.
+ *
+ * La bascule des lettres en minuscules ramène les possibles
+ * à deux ensembles uniquement, simplifiant ainsi les règles
+ * de filtrage : aucun switch case n'est ainsi requis !
+ */
+
+ half = *reader.hword_pos++;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half & 0xff);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half >> 8);
+#endif
- case 'A' ... 'F':
- out->data[out->len] |= 0xa + (next - 'A');
- break;
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer = (byte - '0');
- case 'a' ... 'f':
- out->data[out->len] |= 0xa + (next - 'a');
- break;
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer = 0xa + (byte - 'a');
+ }
- }
+ *writer <<= 4;
- out->len++;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half >> 8);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half & 0xff);
+#endif
- i += 2;
- break;
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer++ |= (byte - '0');
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer++ |= 0xa + (byte - 'a');
}
- i++;
break;
default:
- out->data[out->len++] = byte;
+ *writer++ = *reader.byte_pos++;
break;
}
}
+ out->len = writer - out->bin_data;
+
}
@@ -167,147 +208,178 @@ static void rost_unescape_string_bytes(const char *src, size_t len, sized_string
* *
******************************************************************************/
-static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out)
+static void rost_unescape_regex(const char *src, size_t len, sized_string_t *out)
{
- size_t i; /* Boucle de parcours */
+ read_ptr_t reader; /* Tête de lecture */
+ const bin_t *max; /* Fin du parcours */
+ uint16_t half; /* Moitié de mot */
bin_t byte; /* Octet à analyser */
- bin_t next; /* Octet suivant */
+ bin_t *writer; /* Tête d'écriture */
- out->len = 0;
+ reader.byte_pos = (const uint8_t *)src;
+ max = reader.byte_pos + len;
- for (i = 0; i < len; i++)
- {
- byte = src[i];
+ writer = out->bin_data;
- switch (byte)
+ while (reader.byte_pos < max)
+ {
+ /**
+ * La lecture par groupes de deux octets n'est pas forcément toujours
+ * logique : pour "\nabc", la dernière lecture va considérer 'c"',
+ * incluant ainsi le caractère '"' qui a été écarté pour l'appel.
+ *
+ * Le code est cependant suffisamment souple pour ignore le superflu.
+ */
+ switch (*reader.hword_pos)
{
- case '\\':
-
- next = src[i + 1];
-
- switch (next)
- {
- case 'a':
- out->data[out->len++] = '\a';
- break;
-
- case 'b':
- out->data[out->len++] = '\b';
- break;
-
- case 't':
- out->data[out->len++] = '\t';
- break;
-
- case 'n':
- out->data[out->len++] = '\n';
- break;
-
- case 'v':
- out->data[out->len++] = '\v';
- break;
-
- case 'f':
- out->data[out->len++] = '\f';
- break;
-
- case 'r':
- out->data[out->len++] = '\r';
- break;
-
- case 'e':
- out->data[out->len++] = '\e';
- break;
-
- case '"':
- out->data[out->len++] = '\"';
- break;
+ case MAKE_HWORD('\\', 'a'):
+ reader.hword_pos++;
+ *writer++ = '\a';
+ break;
- case '\\':
- out->data[out->len++] = '\\';
- break;
+ case MAKE_HWORD('\\', 'b'):
+ reader.hword_pos++;
+ *writer++ = '\b';
+ break;
- case 'x':
+ case MAKE_HWORD('\\', 't'):
+ reader.hword_pos++;
+ *writer++ = '\t';
+ break;
- next = src[i + 2];
+ case MAKE_HWORD('\\', 'n'):
+ reader.hword_pos++;
+ *writer++ = '\n';
+ break;
- switch (next)
- {
- case '0' ... '9':
- out->data[out->len] = (next - '0');
- break;
+ case MAKE_HWORD('\\', 'v'):
+ reader.hword_pos++;
+ *writer++ = '\v';
+ break;
- case 'A' ... 'F':
- out->data[out->len] = 0xa + (next - 'A');
- break;
+ case MAKE_HWORD('\\', 'f'):
+ reader.hword_pos++;
+ *writer++ = '\f';
+ break;
- case 'a' ... 'f':
- out->data[out->len] = 0xa + (next - 'a');
- break;
+ case MAKE_HWORD('\\', 'r'):
+ reader.hword_pos++;
+ *writer++ = '\r';
+ break;
- }
+ case MAKE_HWORD('\\', 'e'):
+ reader.hword_pos++;
+ *writer++ = '\e';
+ break;
- out->data[out->len] <<= 4;
+ case MAKE_HWORD('\\', '"'):
+ reader.hword_pos++;
+ *writer++ = '\"';
+ break;
- next = src[i + 3];
+ case MAKE_HWORD('\\', '\\'):
+ reader.hword_pos++;
+ *writer++ = '\\';
+ break;
- switch (next)
- {
- case '0' ... '9':
- out->data[out->len] |= (next - '0');
- break;
+ case MAKE_HWORD('\\', 'x'):
+ reader.hword_pos++;
+
+ /**
+ * Le jeu des expressions régulières qui amène à l'appel de
+ * cette fonction limite les caractères possibles à trois
+ * ensembles : chiffres et lettres en majuscules et minuscules.
+ *
+ * La bascule des lettres en minuscules ramène les possibles
+ * à deux ensembles uniquement, simplifiant ainsi les règles
+ * de filtrage : aucun switch case n'est ainsi requis !
+ */
+
+ half = *reader.hword_pos++;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half & 0xff);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half >> 8);
+#endif
- case 'A' ... 'F':
- out->data[out->len] |= 0xa + (next - 'A');
- break;
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer = (byte - '0');
- case 'a' ... 'f':
- out->data[out->len] |= 0xa + (next - 'a');
- break;
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer = 0xa + (byte - 'a');
+ }
- }
+ *writer <<= 4;
- out->len++;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ byte = (half >> 8);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ byte = (half & 0xff);
+#endif
- i += 2;
- break;
+ /* '0' ... '9' */
+ if (byte <= '9')
+ *writer++ |= (byte - '0');
- case '{':
- out->data[out->len++] = '{';
- break;
+ /* 'A' ... 'F' || 'a' ... 'f' */
+ else
+ {
+ byte |= 0x20;
+ *writer++ |= 0xa + (byte - 'a');
+ }
- case '}':
- out->data[out->len++] = '}';
- break;
+ break;
- }
+ case MAKE_HWORD('\\', '{'):
+ reader.hword_pos++;
+ *writer++ = '{';
+ break;
- i++;
+ case MAKE_HWORD('\\', '}'):
+ reader.hword_pos++;
+ *writer++ = '}';
break;
default:
- out->data[out->len++] = byte;
+ *writer++ = *reader.byte_pos++;
break;
}
}
-}
+ out->len = writer - out->bin_data;
+}
#define PUSH_STATE(s) yy_push_state(s, yyscanner)
#define POP_STATE yy_pop_state(yyscanner)
+#define STOP_LEXER(msg, fbmsg) \
+ do \
+ { \
+ char *__text; \
+ int __ret; \
+ __ret = asprintf(&__text, "%s: '%s'", msg, yytext); \
+ if (__ret == -1) \
+ YY_FATAL_ERROR(fbmsg); \
+ else \
+ { \
+ YY_FATAL_ERROR(__text); \
+ free(__text); \
+ } \
+ } \
+ while (0)
-#define EXTEND_BUFFER_IF_NEEDED(extra) \
- if ((*used + extra) > *allocated) \
- { \
- *allocated *= 2; \
- *buf = realloc(*buf, *allocated); \
- }
+#define HANDLE_UNCOMPLETED_TOKEN \
+ STOP_LEXER("Uncompleted token in rule definition", "Undisclosed uncompleted token in rule definition")
%}
@@ -342,7 +414,6 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
%x bytes_regex_range
%x condition
-%x strlit
%x wait_for_colon
@@ -388,7 +459,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
<inc_path>\"{str_mixed}+\" {
POP_STATE;
- rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0);
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
#ifndef NDEBUG
/* Pour rendre plus lisibles les impressions de débogage */
@@ -411,12 +482,14 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
return RAW_RULE;
}
- <rule_intro>[A-Za-z0-9_]+ {
+ <rule_intro>{bytes_id} {
yylval->sized_cstring.data = yytext;
yylval->sized_cstring.len = yyleng;
- return RULE_NAME;
+ return RULE_IDENTIFIER;
}
+ <rule_intro>":" { return COLON; }
+
<rule_intro>[ \t]* { }
<rule_intro>"{" {
@@ -494,7 +567,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
return UNSIGNED_INTEGER;
}
- <meta_value>\"{str_not_escaped}+\" {
+ <meta_value>\"{str_not_escaped}*\" {
POP_STATE;
yylval->sized_cstring.data = yytext + 1;
@@ -503,10 +576,10 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
return PLAIN_TEXT;
}
- <meta_value>\"{str_mixed}+\" {
+ <meta_value>\"{str_mixed}*\" {
POP_STATE;
- rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0);
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
#ifndef NDEBUG
/* Pour rendre plus lisibles les impressions de débogage */
@@ -535,38 +608,25 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
<condition>[mM][bB] { return MB; }
<condition>[gG][bB] { return GB; }
-<condition>"\"" {
- *used = 0;
- PUSH_STATE(strlit);
- }
+<condition>\"{str_not_escaped}*\" {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
-<strlit>"\"" {
- POP_STATE;
- yylval->sized_cstring.data = *buf;
- yylval->sized_cstring.len = *used;
- return STRING;
- }
+ return PLAIN_TEXT;
+ }
-<strlit>"\\\"" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '"'; }
-<strlit>"\\t" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\t'; }
-<strlit>"\\r" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\r'; }
-<strlit>"\\n" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\n'; }
-<strlit>"\\\\" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\\'; }
-
-<strlit>\\x[0-9a-fA-F]{2} {
- char __ch;
- __ch = strtol(yytext + 2, NULL, 16);
- EXTEND_BUFFER_IF_NEEDED(1);
- (*buf)[(*used)++] = __ch;
- }
+<condition>\"{str_mixed}*\" {
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
-<strlit>[^\\\"]+ {
- size_t __len;
- __len = strlen(yytext);
- EXTEND_BUFFER_IF_NEEDED(__len);
- strcpy(&(*buf)[*used], yytext);
- *used += __len;
- }
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
%{ /* Définitions communes pour la section "bytes:" */ %}
@@ -592,7 +652,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
<bytes_value>\"{str_mixed}+\" {
POP_STATE;
- rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0);
+ rost_unescape_string(yytext + 1, yyleng - 2, tmp_0);
#ifndef NDEBUG
/* Pour rendre plus lisibles les impressions de débogage */
@@ -605,173 +665,203 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
}
-%{ /* Définition de motif en hexadécimal */ %}
- <bytes_value>"{" {
- POP_STATE;
- PUSH_STATE(bytes_hex);
- }
-
- <bytes_hex>"}" { POP_STATE; }
+<bytes>[A-Za-z_][A-Za-z0-9_]* {
+ yylval->sized_cstring.data = yytext;
+ yylval->sized_cstring.len = yyleng;
+ return NAME;
+ }
- <bytes_hex>"[" {
- PUSH_STATE(bytes_hex_range);
- return HOOK_O;
- }
- <bytes_hex_range>"-" { return MINUS; }
+ <bytes>"((" { return MOD_GROUP_O; }
- <bytes_hex_range>"]" {
- POP_STATE;
- return HOOK_C;
- }
+ <bytes>"))" { return MOD_GROUP_C; }
- <bytes_hex>"(" { return PAREN_O; }
+ <bytes>"(" { return PAREN_O; }
- <bytes_hex>")" { return PAREN_C; }
+ <bytes>")" { return PAREN_C; }
- <bytes_hex>"|" { return PIPE; }
+ <bytes>"," { return COMMA; }
- <bytes_hex>"~" { return TILDE; }
- <bytes_hex>{hbyte}([ ]*{hbyte})* {
- bool even;
- size_t i;
- bin_t byte;
- bin_t value;
+<bytes>\"{str_not_escaped}+\" {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
- tmp_0->len = 0;
+ return PLAIN_TEXT;
+ }
- even = true;
- for (i = 0; i < yyleng; i++)
- {
- byte = yytext[i];
- switch (byte)
- {
- case ' ':
- continue;
- break;
- case '0' ... '9':
- value = (byte - '0');
- break;
- case 'A' ... 'F':
- value = 0xa + (byte - 'A');
- break;
+%{ /* Définition de motif en hexadécimal */ %}
- case 'a' ... 'f':
- value = 0xa + (byte - 'a');
- break;
+ <bytes_value>"{" {
+ POP_STATE;
+ PUSH_STATE(bytes_hex);
+ }
- }
+ <bytes_hex>"}" { POP_STATE; }
- if (even)
- tmp_0->data[tmp_0->len] = (value << 4);
- else
- tmp_0->data[tmp_0->len++] |= value;
+ <bytes_hex>"[" {
+ PUSH_STATE(bytes_hex_range);
+ return HOOK_O;
+ }
- even = !even;
+ <bytes_hex_range>"-" { return MINUS; }
+ <bytes_hex_range>"]" {
+ POP_STATE;
+ return HOOK_C;
}
- assert(even);
+ <bytes_hex>"(" { return PAREN_O; }
-#ifndef NDEBUG
- /* Pour rendre plus lisibles les impressions de débogage */
- tmp_0->data[tmp_0->len] = '\0';
-#endif
+ <bytes_hex>")" { return PAREN_C; }
- yylval->tmp_cstring = tmp_0;
- return HEX_BYTES;
+ <bytes_hex>"|" { return PIPE; }
- }
+ <bytes_hex>"~" { return TILDE; }
- <bytes_hex>[\?]{2}([ ]*[\?]{2})* {
- unsigned long long counter;
- size_t i;
+ <bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* {
+ bool even;
+ size_t i;
+ bin_t byte;
+ bin_t value;
- counter = 0;
+ tmp_0->len = 0;
- for (i = 0; i < yyleng; i++)
- if (yytext[i] == '?')
- counter++;
+ even = true;
- assert(counter % 2 == 0);
+ for (i = 0; i < yyleng; i++)
+ {
+ byte = yytext[i];
- yylval->unsigned_integer = counter / 2;
- return FULL_MASK;
+ switch (byte)
+ {
+ case ' ':
+ continue;
+ break;
- }
+ case '0' ... '9':
+ value = (byte - '0');
+ break;
- <bytes_hex>{mbyte}([ ]*{mbyte})* {
- bool even;
- size_t i;
- bin_t byte;
- bin_t value;
+ case 'A' ... 'F':
+ value = 0xa + (byte - 'A');
+ break;
- tmp_0->len = 0;
- tmp_1->len = 0;
+ case 'a' ... 'f':
+ value = 0xa + (byte - 'a');
+ break;
- even = true;
+ }
- for (i = 0; i < yyleng; i++)
- {
- byte = yytext[i];
+ if (even)
+ tmp_0->data[tmp_0->len] = (value << 4);
+ else
+ tmp_0->data[tmp_0->len++] |= value;
- switch (byte)
- {
- case ' ':
- continue;
- break;
+ even = !even;
- case '?':
- even = !even;
- continue;
- break;
+ }
- case '0' ... '9':
- value = (byte - '0');
- break;
+ assert(even);
- case 'A' ... 'F':
- value = 0xa + (byte - 'A');
- break;
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
- case 'a' ... 'f':
- value = 0xa + (byte - 'a');
- break;
+ yylval->tmp_cstring = tmp_0;
+ return HEX_BYTES;
- }
+ }
- if (even)
- {
- tmp_0->data[tmp_0->len++] = (value << 4);
- tmp_1->data[tmp_1->len++] = 0xf0;
- }
- else
- {
- tmp_0->data[tmp_0->len++] = value;
- tmp_1->data[tmp_1->len++] = 0x0f;
- }
+ <bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* {
+ unsigned long long counter;
+ size_t i;
+
+ counter = 0;
+
+ for (i = 0; i < yyleng; i++)
+ if (yytext[i] == '?')
+ counter++;
+
+ assert(counter % 2 == 0);
- even = !even;
+ yylval->unsigned_integer = counter / 2;
+ return FULL_MASK;
}
+ <bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* {
+ bool even;
+ size_t i;
+ bin_t byte;
+ bin_t value;
+
+ tmp_0->len = 0;
+ tmp_1->len = 0;
+
+ even = true;
+
+ for (i = 0; i < yyleng; i++)
+ {
+ byte = yytext[i];
+
+ switch (byte)
+ {
+ case ' ':
+ continue;
+ break;
+
+ case '?':
+ even = !even;
+ continue;
+ break;
+
+ case '0' ... '9':
+ value = (byte - '0');
+ break;
+
+ case 'A' ... 'F':
+ value = 0xa + (byte - 'A');
+ break;
+
+ case 'a' ... 'f':
+ value = 0xa + (byte - 'a');
+ break;
+
+ }
+
+ if (even)
+ {
+ tmp_0->data[tmp_0->len++] = (value << 4);
+ tmp_1->data[tmp_1->len++] = 0xf0;
+ }
+ else
+ {
+ tmp_0->data[tmp_0->len++] = value;
+ tmp_1->data[tmp_1->len++] = 0x0f;
+ }
+
+ even = !even;
+
+ }
+
#ifndef NDEBUG
- /* Pour rendre plus lisibles les impressions de débogage */
- tmp_0->data[tmp_0->len] = '\0';
- tmp_1->data[tmp_1->len] = '\0';
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+ tmp_1->data[tmp_1->len] = '\0';
#endif
- yylval->masked.tmp_values = tmp_0;
- yylval->masked.tmp_masks = tmp_1;
- return SEMI_MASK;
+ yylval->masked.tmp_values = tmp_0;
+ yylval->masked.tmp_masks = tmp_1;
+ return SEMI_MASK;
- }
+ }
%{ /* Définition d'expressions régulières */ %}
@@ -787,7 +877,7 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
<bytes_regex>"." { return DOT; }
<bytes_regex>({regular_chars})+ {
- rost_unescape_bytes(yytext, yyleng, tmp_0);
+ rost_unescape_regex(yytext, yyleng, tmp_0);
printf(" regular: '%s'\n", yytext);
@@ -844,14 +934,14 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
<bytes_regex>"{" {
PUSH_STATE(bytes_regex_quantifier);
- return BRACKET_O;
+ return BRACE_IN;
}
<bytes_regex_quantifier>"," { return COMMA; }
<bytes_regex_quantifier>"}" {
POP_STATE;
- return BRACKET_C;
+ return BRACE_OUT;
}
@@ -883,9 +973,9 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
<condition>"/" { return DIV; }
<condition>"%" { return MOD; }
-<bytes,condition>"(" { return PAREN_O; }
-<bytes,condition>")" { return PAREN_C; }
-<bytes,condition>"," { return COMMA; }
+<condition>"(" { return PAREN_O; }
+<condition>")" { return PAREN_C; }
+<condition>"," { return COMMA; }
<condition>"[" { return HOOK_O; }
@@ -921,30 +1011,54 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
return BYTES_ID_COUNTER;
}
+ <condition>#{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_COUNTER;
+ }
+
<condition>@{bytes_id} {
yylval->sized_cstring.data = yytext + 1;
yylval->sized_cstring.len = yyleng - 1;
return BYTES_ID_START;
}
+ <condition>@{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_START;
+ }
+
<condition>!{bytes_id} {
yylval->sized_cstring.data = yytext + 1;
yylval->sized_cstring.len = yyleng - 1;
return BYTES_ID_LENGTH;
}
+ <condition>!{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_LENGTH;
+ }
+
<condition>~{bytes_id} {
yylval->sized_cstring.data = yytext + 1;
yylval->sized_cstring.len = yyleng - 1;
return BYTES_ID_END;
}
+ <condition>~{bytes_fuzzy_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_FUZZY_ID_END;
+ }
+
-<bytes,condition>[A-Za-z_][A-Za-z0-9_]* {
+<condition>[A-Za-z_][A-Za-z0-9_]* {
yylval->sized_cstring.data = yytext;
yylval->sized_cstring.len = yyleng;
return NAME;
@@ -953,41 +1067,150 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
-<bytes_value>"\"" {
- POP_STATE;
- // *built_pattern = g_bytes_pattern_new();
- PUSH_STATE(bytes_value_raw);
- }
-<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return 11111/*MASKED_STRING*/; }
-<bytes_value_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); }
-<bytes_value_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); }
-<bytes_value_raw>"\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); }
-<bytes_value_raw>"\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); }
-<bytes_value_raw>"\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); }
-<bytes_value_raw>\\x[0-9a-fA-F]{2} {
- uint8_t __ch;
- __ch = strtol(yytext + 2, NULL, 16);
- printf("__ch: %hhx\n", __ch);
- //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff);
- }
+%{ /* Commentaires */ %}
+
+<*>"/*" { PUSH_STATE(comment); }
+<comment>"*/" { POP_STATE; }
+<comment>(.|\n) { }
-<bytes_value_raw>. { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); }
+<*>"//"[^\n]* { }
+%{ /* Suppression du besoin de sauvegardes pour retours en arrière */ %}
+"i" { HANDLE_UNCOMPLETED_TOKEN; }
+"in" { HANDLE_UNCOMPLETED_TOKEN; }
+"inc" { HANDLE_UNCOMPLETED_TOKEN; }
+"incl" { HANDLE_UNCOMPLETED_TOKEN; }
+"inclu" { HANDLE_UNCOMPLETED_TOKEN; }
+"includ" { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\" { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<inc_path>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
-%{ /* Commentaires */ %}
+"g" { HANDLE_UNCOMPLETED_TOKEN; }
+"gl" { HANDLE_UNCOMPLETED_TOKEN; }
+"glo" { HANDLE_UNCOMPLETED_TOKEN; }
+"glob" { HANDLE_UNCOMPLETED_TOKEN; }
+"globa" { HANDLE_UNCOMPLETED_TOKEN; }
-<*>"/*" { PUSH_STATE(comment); }
-<comment>"*/" { POP_STATE; }
-<comment>(.|\n) { }
+"p" { HANDLE_UNCOMPLETED_TOKEN; }
+"pr" { HANDLE_UNCOMPLETED_TOKEN; }
+"pri" { HANDLE_UNCOMPLETED_TOKEN; }
+"priv" { HANDLE_UNCOMPLETED_TOKEN; }
+"priva" { HANDLE_UNCOMPLETED_TOKEN; }
+"privat" { HANDLE_UNCOMPLETED_TOKEN; }
-<*>"//"[^\n]* { }
+"r" { HANDLE_UNCOMPLETED_TOKEN; }
+"ru" { HANDLE_UNCOMPLETED_TOKEN; }
+"rul" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<raw_block>"m" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block>"me" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block>"met" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<raw_block,meta>"b" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta>"by" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta>"byt" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta>"byte" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<raw_block,meta,bytes>"c" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"co" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"con" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"cond" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"condi" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"condit" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"conditi" { HANDLE_UNCOMPLETED_TOKEN; }
+<raw_block,meta,bytes>"conditio" { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<meta_value>"t" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"tr" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"tru" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>"f" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"fa" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"fal" { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>"fals" { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>-0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+<meta_value>\"{str_mixed}* { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>\"{str_mixed}*\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>\"{str_mixed}*\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<meta_value>\"{str_mixed}*\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<condition>-0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex_range,bytes_regex_quantifier,condition>0x { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<condition>\"{str_not_escaped}* { HANDLE_UNCOMPLETED_TOKEN; }
+
+<condition>\" { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<condition>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_value>\" { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_value>\"{str_mixed}+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F]/[^?] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\?/[^?] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_regex>\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>({regular_chars})+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>({regular_chars})+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex>({regular_chars})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+
+
+<bytes_regex>({reg_classes})+\\
+
+
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\ { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>\\x { HANDLE_UNCOMPLETED_TOKEN; }
+<bytes_regex_range>\\x[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }
%{ /* Actions par défaut */ %}