diff options
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r-- | src/analysis/scan/tokens.l | 334 |
1 files changed, 296 insertions, 38 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l index 18594c4..594d4d9 100644 --- a/src/analysis/scan/tokens.l +++ b/src/analysis/scan/tokens.l @@ -32,7 +32,7 @@ * * ******************************************************************************/ -static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out) +static void rost_unescape_string_bytes(const char *src, size_t len, sized_string_t *out) { size_t i; /* Boucle de parcours */ bin_t byte; /* Octet à analyser */ @@ -52,30 +52,177 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out switch (next) { - case '\a': + case 'a': + out->data[out->len++] = '\a'; + break; + + case 'b': + out->data[out->len++] = '\b'; + break; + + case 't': + out->data[out->len++] = '\t'; + break; + + case 'n': + out->data[out->len++] = '\n'; + break; + + case 'v': + out->data[out->len++] = '\v'; + break; + + case 'f': + out->data[out->len++] = '\f'; + break; + + case 'r': + out->data[out->len++] = '\r'; + break; + + case 'e': + out->data[out->len++] = '\e'; + break; + + case '"': + out->data[out->len++] = '\"'; + break; + + case '\\': out->data[out->len++] = '\\'; break; - case '\t': + case 'x': + + next = src[i + 2]; + + switch (next) + { + case '0' ... '9': + out->data[out->len] = (next - '0'); + break; + + case 'A' ... 'F': + out->data[out->len] = 0xa + (next - 'A'); + break; + + case 'a' ... 'f': + out->data[out->len] = 0xa + (next - 'a'); + break; + + } + + out->data[out->len] <<= 4; + + next = src[i + 3]; + + switch (next) + { + case '0' ... '9': + out->data[out->len] |= (next - '0'); + break; + + case 'A' ... 'F': + out->data[out->len] |= 0xa + (next - 'A'); + break; + + case 'a' ... 'f': + out->data[out->len] |= 0xa + (next - 'a'); + break; + + } + + out->len++; + + i += 2; + break; + + } + + i++; + break; + + default: + out->data[out->len++] = byte; + break; + + } + + } + +} + + +/****************************************************************************** +* * +* Paramètres : src = liste d'octets à traiter. * +* len = taille de cette liste. * +* out = série d'octets bruts obtenue. [OUT] * +* * +* Description : Transcrit une série d'octets en en remplaçant certains. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out) +{ + size_t i; /* Boucle de parcours */ + bin_t byte; /* Octet à analyser */ + bin_t next; /* Octet suivant */ + + out->len = 0; + + for (i = 0; i < len; i++) + { + byte = src[i]; + + switch (byte) + { + case '\\': + + next = src[i + 1]; + + switch (next) + { + case 'a': + out->data[out->len++] = '\a'; + break; + + case 'b': + out->data[out->len++] = '\b'; + break; + + case 't': out->data[out->len++] = '\t'; break; - case '\n': + case 'n': out->data[out->len++] = '\n'; break; - case '\v': + case 'v': out->data[out->len++] = '\v'; break; - case '\f': + case 'f': out->data[out->len++] = '\f'; break; - case '\r': + case 'r': out->data[out->len++] = '\r'; break; + case 'e': + out->data[out->len++] = '\e'; + break; + + case '"': + out->data[out->len++] = '\"'; + break; + case '\\': out->data[out->len++] = '\\'; break; @@ -91,11 +238,11 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out break; case 'A' ... 'F': - out->data[out->len] = 0x10 + (next - 'A'); + out->data[out->len] = 0xa + (next - 'A'); break; case 'a' ... 'f': - out->data[out->len] = 0x10 + (next - 'a'); + out->data[out->len] = 0xa + (next - 'a'); break; } @@ -111,11 +258,11 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out break; case 'A' ... 'F': - out->data[out->len] |= 0x10 + (next - 'A'); + out->data[out->len] |= 0xa + (next - 'A'); break; case 'a' ... 'f': - out->data[out->len] |= 0x10 + (next - 'a'); + out->data[out->len] |= 0xa + (next - 'a'); break; } @@ -175,7 +322,7 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out %option yylineno %option never-interactive -%x include_path +%x inc_path %x rule_intro %x raw_block @@ -200,8 +347,12 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out %x comment +str_not_escaped [^\"\\] +str_escaped \\a|\\b|\\t|\\n|\\v|\\f|\\r|\\e|\\\"|\\\\|\\x{hbyte} +str_mixed ({str_not_escaped}|{str_escaped}) hbyte [0-9a-fA-F]{2} +mbyte (\?[0-9a-fA-F]|[0-9a-fA-F]\?) reg_allowed [^^$.|/{}()\[\]*+?\\] reg_allowed_escaped \\^|\\$|\\\.|\\\||\\\/|\\\{|\\\}|\\\(|\\\)|\\\[|\\\]|\\\*|\\\+|\\\?|\\\\ @@ -219,15 +370,35 @@ bytes_id [A-Za-z_][A-Za-z0-9_]* %% +"include" { PUSH_STATE(inc_path); return INCLUDE; } +<inc_path>\"{str_not_escaped}+\" { + POP_STATE; -"include" { PUSH_STATE(include_path); return INCLUDE; } + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + + return PLAIN_TEXT; + } + +<inc_path>\"{str_mixed}+\" { + POP_STATE; + + rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } + + +%{ /* Définition locale d'une règle */ %} -<include_path>"\"" { - POP_STATE; - *used = 0; - PUSH_STATE(strlit); - } "rule" { PUSH_STATE(rule_intro); return RAW_RULE; } @@ -295,6 +466,41 @@ bytes_id [A-Za-z_][A-Za-z0-9_]* } +%{ /* Définitions communes pour la section "bytes:" */ %} + +<strings>"fullword" { return FULLWORD; } +<strings>"nocase" { return NOCASE; } +<strings>"private" { return PRIVATE; } + +<strings>"=" { PUSH_STATE(bytes_value); return ASSIGN; } + + +%{ /* Définition de motif en texte brut */ %} + +<bytes_value>\"{str_not_escaped}+\" { + POP_STATE; + + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; + + return PLAIN_TEXT; + } + +<bytes_value>\"{str_mixed}+\" { + POP_STATE; + + rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } + %{ /* Définition de motif en hexadécimal */ %} @@ -350,26 +556,21 @@ bytes_id [A-Za-z_][A-Za-z0-9_]* break; case 'A' ... 'F': - value = 0x10 + (byte - 'A'); + value = 0xa + (byte - 'A'); break; case 'a' ... 'f': - value = 0x10 + (byte - 'a'); + value = 0xa + (byte - 'a'); break; } if (even) - { tmp_0->data[tmp_0->len] = (value << 4); - even = false; - } - else - { tmp_0->data[tmp_0->len++] |= value; - even = true; - } + + even = !even; } @@ -402,6 +603,73 @@ bytes_id [A-Za-z_][A-Za-z0-9_]* } + <bytes_hex>{mbyte}([ ]*{mbyte})* { + bool even; + size_t i; + bin_t byte; + bin_t value; + + tmp_0->len = 0; + tmp_1->len = 0; + + even = true; + + for (i = 0; i < yyleng; i++) + { + byte = yytext[i]; + + switch (byte) + { + case ' ': + continue; + break; + + case '?': + even = !even; + continue; + break; + + case '0' ... '9': + value = (byte - '0'); + break; + + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; + + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; + + } + + if (even) + { + tmp_0->data[tmp_0->len++] = (value << 4); + tmp_1->data[tmp_1->len++] = 0xf0; + } + else + { + tmp_0->data[tmp_0->len++] = value; + tmp_1->data[tmp_1->len++] = 0x0f; + } + + even = !even; + + } + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; + tmp_1->data[tmp_1->len] = '\0'; +#endif + + yylval->masked.tmp_values = tmp_0; + yylval->masked.tmp_masks = tmp_1; + return SEMI_MASK; + + } + %{ /* Définition d'expressions régulières */ %} @@ -573,16 +841,6 @@ bytes_id [A-Za-z_][A-Za-z0-9_]* return NAME; } -<strings>"=" { PUSH_STATE(bytes_value); return ASSIGN; } - - -<bytes_value>\"[^\"\\]+\" { - POP_STATE; - yylval->sized_cstring.data = yytext + 1; - yylval->sized_cstring.len = yyleng - 2; - return PLAIN_STRING; - } - @@ -592,7 +850,7 @@ bytes_id [A-Za-z_][A-Za-z0-9_]* PUSH_STATE(bytes_value_raw); } -<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; } +<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return 11111/*MASKED_STRING*/; } <bytes_value_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); } <bytes_value_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); } |