diff options
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r-- | src/analysis/scan/tokens.l | 495 |
1 files changed, 404 insertions, 91 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l index f3dbc79..18594c4 100644 --- a/src/analysis/scan/tokens.l +++ b/src/analysis/scan/tokens.l @@ -15,47 +15,140 @@ #include <stdlib.h> -#define read_block(tmp) \ - ({ \ - unsigned int __depth; \ - bool __is_string; \ - char *__iter; \ - \ - __depth = 1; \ - __is_string = false; \ - \ - for (__iter = temp; __depth > 0; __iter += (__depth > 0 ? 1 : 0)) \ - { \ - *__iter = input(); \ - \ - switch (*__iter) \ - { \ - case '"': \ - __is_string = !__is_string; \ - break; \ - \ - case '{': \ - if (!__is_string) __depth++; \ - break; \ - \ - case '}': \ - if (!__is_string) \ - { \ - __depth--; \ - if (__depth == 0) unput('}'); \ - } \ - break; \ - \ - } \ - \ - } \ - \ - *__iter = '\0'; \ - \ - }) +/****************************************************************************** +* * +* Paramètres : src = liste d'octets à traiter. * +* len = taille de cette liste. * +* out = série d'octets bruts obtenue. [OUT] * +* * +* Description : Transcrit une série d'octets en en remplaçant certains. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out) +{ + size_t i; /* Boucle de parcours */ + bin_t byte; /* Octet à analyser */ + bin_t next; /* Octet suivant */ + + out->len = 0; + + for (i = 0; i < len; i++) + { + byte = src[i]; + + switch (byte) + { + case '\\': + + next = src[i + 1]; + + switch (next) + { + case '\a': + out->data[out->len++] = '\\'; + break; + + case '\t': + out->data[out->len++] = '\t'; + break; + + case '\n': + out->data[out->len++] = '\n'; + break; + + case '\v': + out->data[out->len++] = '\v'; + break; + + case '\f': + out->data[out->len++] = '\f'; + break; + + case '\r': + out->data[out->len++] = '\r'; + break; + + case '\\': + out->data[out->len++] = '\\'; + break; + + case 'x': + + next = src[i + 2]; + + switch (next) + { + case '0' ... '9': + out->data[out->len] = (next - '0'); + break; + + case 'A' ... 'F': + out->data[out->len] = 0x10 + (next - 'A'); + break; + + case 'a' ... 'f': + out->data[out->len] = 0x10 + (next - 'a'); + break; + + } + + out->data[out->len] <<= 4; + + next = src[i + 3]; + + switch (next) + { + case '0' ... '9': + out->data[out->len] |= (next - '0'); + break; + + case 'A' ... 'F': + out->data[out->len] |= 0x10 + (next - 'A'); + break; + + case 'a' ... 'f': + out->data[out->len] |= 0x10 + (next - 'a'); + break; + + } + + out->len++; + + i += 2; + break; + + case '{': + out->data[out->len++] = '{'; + break; + + case '}': + out->data[out->len++] = '}'; + break; + + } + + i++; + break; + + default: + out->data[out->len++] = byte; + break; + + } + + } + +} + + #define PUSH_STATE(s) yy_push_state(s, yyscanner) #define POP_STATE yy_pop_state(yyscanner) @@ -88,9 +181,15 @@ %x raw_block %x strings -%x strval -%x strval_raw -%x strval_hex +%x bytes_value +%x bytes_value_raw + +%x bytes_hex +%x bytes_hex_range + +%x bytes_regex +%x bytes_regex_quantifier +%x bytes_regex_range %x condition %x strlit @@ -101,6 +200,22 @@ %x comment + +hbyte [0-9a-fA-F]{2} + +reg_allowed [^^$.|/{}()\[\]*+?\\] +reg_allowed_escaped \\^|\\$|\\\.|\\\||\\\/|\\\{|\\\}|\\\(|\\\)|\\\[|\\\]|\\\*|\\\+|\\\?|\\\\ +reg_escaped \\a|\\t|\\n|\\v|\\f|\\r +reg_byte \\x[0-9a-fA-F]{2} + +regular_chars {reg_allowed}|{reg_allowed_escaped}|{reg_escaped}|{reg_byte} + +reg_classes \\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B + + +bytes_id [A-Za-z_][A-Za-z0-9_]* + + %% @@ -139,8 +254,8 @@ <condition>-(0|[1-9][0-9]*) { yylval->signed_integer = strtoll(yytext, NULL, 10); return SIGNED_INTEGER; } <condition>-0x[0-9a-f]+ { yylval->signed_integer = strtoll(yytext, NULL, 16); return SIGNED_INTEGER; } -<condition>(0|[1-9][0-9]*) { yylval->unsigned_integer = strtoull(yytext, NULL, 10); return UNSIGNED_INTEGER; } -<condition>0x[0-9a-f]+ { yylval->unsigned_integer = strtoull(yytext, NULL, 16); return UNSIGNED_INTEGER; } +<bytes_hex_range,bytes_regex_quantifier,condition>(0|[1-9][0-9]*) { yylval->unsigned_integer = strtoull(yytext, NULL, 10); return UNSIGNED_INTEGER; } +<bytes_hex_range,bytes_regex_quantifier,condition>0x[0-9a-f]+ { yylval->unsigned_integer = strtoull(yytext, NULL, 16); return UNSIGNED_INTEGER; } <condition>[kK][bB] { return KB; } <condition>[mM][bB] { return MB; } @@ -181,6 +296,195 @@ +%{ /* Définition de motif en hexadécimal */ %} + + <bytes_value>"{" { + POP_STATE; + PUSH_STATE(bytes_hex); + } + + <bytes_hex>"}" { POP_STATE; } + + <bytes_hex>"[" { + PUSH_STATE(bytes_hex_range); + return HOOK_O; + } + + <bytes_hex_range>"-" { return MINUS; } + + <bytes_hex_range>"]" { + POP_STATE; + return HOOK_C; + } + + <bytes_hex>"(" { return PAREN_O; } + + <bytes_hex>")" { return PAREN_C; } + + <bytes_hex>"|" { return PIPE; } + + <bytes_hex>"~" { return TILDE; } + + <bytes_hex>{hbyte}([ ]*{hbyte})* { + bool even; + size_t i; + bin_t byte; + bin_t value; + + tmp_0->len = 0; + + even = true; + + for (i = 0; i < yyleng; i++) + { + byte = yytext[i]; + + switch (byte) + { + case ' ': + continue; + break; + + case '0' ... '9': + value = (byte - '0'); + break; + + case 'A' ... 'F': + value = 0x10 + (byte - 'A'); + break; + + case 'a' ... 'f': + value = 0x10 + (byte - 'a'); + break; + + } + + if (even) + { + tmp_0->data[tmp_0->len] = (value << 4); + even = false; + } + + else + { + tmp_0->data[tmp_0->len++] |= value; + even = true; + } + + } + + assert(even); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + return HEX_BYTES; + + } + + <bytes_hex>[\?]{2}([ ]*[\?]{2})* { + unsigned long long counter; + size_t i; + + counter = 0; + + for (i = 0; i < yyleng; i++) + if (yytext[i] == '?') + counter++; + + assert(counter % 2 == 0); + + yylval->unsigned_integer = counter / 2; + return FULL_MASK; + + } + + +%{ /* Définition d'expressions régulières */ %} + + <bytes_value>"/" { + POP_STATE; + printf(" -- regex\n"); + PUSH_STATE(bytes_regex); + } + + <bytes_regex>"/" { printf("exit regex\n"); POP_STATE; } + + <bytes_regex>"." { return DOT; } + + <bytes_regex>({regular_chars})+ { + rost_unescape_bytes(yytext, yyleng, tmp_0); + + printf(" regular: '%s'\n", yytext); + +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + return REGEX_BYTES; + + } + + <bytes_regex>({reg_classes})+ { + + return REGEX_CLASSES; + + } + +%{ /* <bytes_regex>\[({regular_chars}|({regular_chars})-z|{reg_classes})+\] { */ %} + + + <bytes_regex>"[" { + PUSH_STATE(bytes_regex_range); + printf(" !! entering range\n"); + return HOOK_O; + } + + <bytes_regex_range>"]" { + POP_STATE; + printf(" !! exiting range\n"); + return HOOK_C; + } + + + + +<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+ { + + printf("range: '%s'\n", yytext); + return REGEX_RANGE; + + } + + <bytes_regex>"(" { return PAREN_O; } + + <bytes_regex>")" { return PAREN_C; } + + <bytes_regex>"|" { return PIPE; } + + <bytes_regex>"*" { return MUL; } + <bytes_regex>"+" { return PLUS; } + <bytes_regex>"?" { return QUESTION; } + + <bytes_regex>"{" { + PUSH_STATE(bytes_regex_quantifier); + return BRACKET_O; + } + + <bytes_regex_quantifier>"," { return COMMA; } + + <bytes_regex_quantifier>"}" { + POP_STATE; + return BRACKET_C; + } + + +%{ /* Condition de correspondance */ %} <condition>"and" { return AND; } <condition>"or" { return OR; } @@ -208,10 +512,17 @@ <condition>"/" { return DIV; } <condition>"%" { return MOD; } -<condition>"(" { return PAREN_O; } -<condition>")" { return PAREN_C; } -<condition>"," { return COMMA; } +<strings,condition>"(" { return PAREN_O; } +<strings,condition>")" { return PAREN_C; } +<strings,condition>"," { return COMMA; } + + +<condition>"[" { return HOOK_O; } +<condition>"]" { return HOOK_C; } + + <condition>"." { return DOT; } +<strings>"|" { return PIPE; } <condition>"none" { return NONE; } <condition>"any" { return ANY; } @@ -221,36 +532,51 @@ <condition>"in" { return IN; } -<strings,condition>$[A-Za-z0-9_]* { - yylval->sized_cstring.data = yytext + 1; - yylval->sized_cstring.len = yyleng - 1; - return IDENTIFIER; - } + <strings,condition>${bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID; + } + + <condition>#{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_COUNTER; + } + + <condition>@{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_START; + } + + <condition>!{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_LENGTH; + } + + <condition>~{bytes_id} { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 1; + return BYTES_ID_END; + } -<condition>$[A-Za-z_][A-Za-z0-9_]* { - yylval->sized_cstring.data = yytext + 1; - yylval->sized_cstring.len = yyleng - 1; - return BYTES_ID; - } -<condition>#[A-Za-z_][A-Za-z0-9_]* { - yylval->sized_cstring.data = yytext + 1; - yylval->sized_cstring.len = yyleng - 1; - return BYTES_ID_COUNTER; - } -<condition>[A-Za-z_][A-Za-z0-9_]* { + +<strings,condition>[A-Za-z_][A-Za-z0-9_]* { yylval->sized_cstring.data = yytext; yylval->sized_cstring.len = yyleng; return NAME; } -<strings>"=" { PUSH_STATE(strval); return ASSIGN; } +<strings>"=" { PUSH_STATE(bytes_value); return ASSIGN; } -<strval>\"[^\"\\]+\" { +<bytes_value>\"[^\"\\]+\" { POP_STATE; yylval->sized_cstring.data = yytext + 1; yylval->sized_cstring.len = yyleng - 2; @@ -260,43 +586,28 @@ -<strval>"\"" { +<bytes_value>"\"" { POP_STATE; // *built_pattern = g_bytes_pattern_new(); - PUSH_STATE(strval_raw); - } -<strval>"{" { - POP_STATE; - // *built_pattern = g_bytes_pattern_new(); - PUSH_STATE(strval_hex); + PUSH_STATE(bytes_value_raw); } -<strval_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; } +<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; } -<strval_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); } -<strval_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); } -<strval_raw>"\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); } -<strval_raw>"\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); } -<strval_raw>"\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); } +<bytes_value_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); } +<bytes_value_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); } +<bytes_value_raw>"\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); } +<bytes_value_raw>"\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); } +<bytes_value_raw>"\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); } -<strval_raw>\\x[0-9a-fA-F]{2} { +<bytes_value_raw>\\x[0-9a-fA-F]{2} { uint8_t __ch; __ch = strtol(yytext + 2, NULL, 16); + printf("__ch: %hhx\n", __ch); //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); } -<strval_raw>. { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); } - -<strval_hex>"}" { POP_STATE; /*yylval->pattern = *built_pattern;*/ return MASKED_STRING; } - -<strval_hex>[0-9a-fA-F]{2} { - uint8_t __ch; - __ch = strtol(yytext, NULL, 16); - //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff); - } - -<strval_hex>"??" { /*g_bytes_pattern_insert_space(*built_pattern, 1, 1);*/ } - +<bytes_value_raw>. { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); } @@ -318,7 +629,9 @@ %{ /* Actions par défaut */ %} -<*>[ \t\n]+ { } +<*>[ \t]+ { } + +<*>[\n] { static int ln = 1; if (0) printf("----------- %%< -------------- %%< ---- %d\n", ln++); } <*>. { char *msg; |