summaryrefslogtreecommitdiff
path: root/src/analysis/scan/tokens.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r--src/analysis/scan/tokens.l495
1 files changed, 404 insertions, 91 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l
index f3dbc79..18594c4 100644
--- a/src/analysis/scan/tokens.l
+++ b/src/analysis/scan/tokens.l
@@ -15,47 +15,140 @@
#include <stdlib.h>
-#define read_block(tmp) \
- ({ \
- unsigned int __depth; \
- bool __is_string; \
- char *__iter; \
- \
- __depth = 1; \
- __is_string = false; \
- \
- for (__iter = temp; __depth > 0; __iter += (__depth > 0 ? 1 : 0)) \
- { \
- *__iter = input(); \
- \
- switch (*__iter) \
- { \
- case '"': \
- __is_string = !__is_string; \
- break; \
- \
- case '{': \
- if (!__is_string) __depth++; \
- break; \
- \
- case '}': \
- if (!__is_string) \
- { \
- __depth--; \
- if (__depth == 0) unput('}'); \
- } \
- break; \
- \
- } \
- \
- } \
- \
- *__iter = '\0'; \
- \
- })
+/******************************************************************************
+* *
+* Paramètres : src = liste d'octets à traiter. *
+* len = taille de cette liste. *
+* out = série d'octets bruts obtenue. [OUT] *
+* *
+* Description : Transcrit une série d'octets en en remplaçant certains. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out)
+{
+ size_t i; /* Boucle de parcours */
+ bin_t byte; /* Octet à analyser */
+ bin_t next; /* Octet suivant */
+
+ out->len = 0;
+
+ for (i = 0; i < len; i++)
+ {
+ byte = src[i];
+
+ switch (byte)
+ {
+ case '\\':
+
+ next = src[i + 1];
+
+ switch (next)
+ {
+ case '\a':
+ out->data[out->len++] = '\\';
+ break;
+
+ case '\t':
+ out->data[out->len++] = '\t';
+ break;
+
+ case '\n':
+ out->data[out->len++] = '\n';
+ break;
+
+ case '\v':
+ out->data[out->len++] = '\v';
+ break;
+
+ case '\f':
+ out->data[out->len++] = '\f';
+ break;
+
+ case '\r':
+ out->data[out->len++] = '\r';
+ break;
+
+ case '\\':
+ out->data[out->len++] = '\\';
+ break;
+
+ case 'x':
+
+ next = src[i + 2];
+
+ switch (next)
+ {
+ case '0' ... '9':
+ out->data[out->len] = (next - '0');
+ break;
+
+ case 'A' ... 'F':
+ out->data[out->len] = 0x10 + (next - 'A');
+ break;
+
+ case 'a' ... 'f':
+ out->data[out->len] = 0x10 + (next - 'a');
+ break;
+
+ }
+
+ out->data[out->len] <<= 4;
+
+ next = src[i + 3];
+
+ switch (next)
+ {
+ case '0' ... '9':
+ out->data[out->len] |= (next - '0');
+ break;
+
+ case 'A' ... 'F':
+ out->data[out->len] |= 0x10 + (next - 'A');
+ break;
+
+ case 'a' ... 'f':
+ out->data[out->len] |= 0x10 + (next - 'a');
+ break;
+
+ }
+
+ out->len++;
+
+ i += 2;
+ break;
+
+ case '{':
+ out->data[out->len++] = '{';
+ break;
+
+ case '}':
+ out->data[out->len++] = '}';
+ break;
+
+ }
+
+ i++;
+ break;
+
+ default:
+ out->data[out->len++] = byte;
+ break;
+
+ }
+
+ }
+
+}
+
+
#define PUSH_STATE(s) yy_push_state(s, yyscanner)
#define POP_STATE yy_pop_state(yyscanner)
@@ -88,9 +181,15 @@
%x raw_block
%x strings
-%x strval
-%x strval_raw
-%x strval_hex
+%x bytes_value
+%x bytes_value_raw
+
+%x bytes_hex
+%x bytes_hex_range
+
+%x bytes_regex
+%x bytes_regex_quantifier
+%x bytes_regex_range
%x condition
%x strlit
@@ -101,6 +200,22 @@
%x comment
+
+hbyte [0-9a-fA-F]{2}
+
+reg_allowed [^^$.|/{}()\[\]*+?\\]
+reg_allowed_escaped \\^|\\$|\\\.|\\\||\\\/|\\\{|\\\}|\\\(|\\\)|\\\[|\\\]|\\\*|\\\+|\\\?|\\\\
+reg_escaped \\a|\\t|\\n|\\v|\\f|\\r
+reg_byte \\x[0-9a-fA-F]{2}
+
+regular_chars {reg_allowed}|{reg_allowed_escaped}|{reg_escaped}|{reg_byte}
+
+reg_classes \\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B
+
+
+bytes_id [A-Za-z_][A-Za-z0-9_]*
+
+
%%
@@ -139,8 +254,8 @@
<condition>-(0|[1-9][0-9]*) { yylval->signed_integer = strtoll(yytext, NULL, 10); return SIGNED_INTEGER; }
<condition>-0x[0-9a-f]+ { yylval->signed_integer = strtoll(yytext, NULL, 16); return SIGNED_INTEGER; }
-<condition>(0|[1-9][0-9]*) { yylval->unsigned_integer = strtoull(yytext, NULL, 10); return UNSIGNED_INTEGER; }
-<condition>0x[0-9a-f]+ { yylval->unsigned_integer = strtoull(yytext, NULL, 16); return UNSIGNED_INTEGER; }
+<bytes_hex_range,bytes_regex_quantifier,condition>(0|[1-9][0-9]*) { yylval->unsigned_integer = strtoull(yytext, NULL, 10); return UNSIGNED_INTEGER; }
+<bytes_hex_range,bytes_regex_quantifier,condition>0x[0-9a-f]+ { yylval->unsigned_integer = strtoull(yytext, NULL, 16); return UNSIGNED_INTEGER; }
<condition>[kK][bB] { return KB; }
<condition>[mM][bB] { return MB; }
@@ -181,6 +296,195 @@
+%{ /* Définition de motif en hexadécimal */ %}
+
+ <bytes_value>"{" {
+ POP_STATE;
+ PUSH_STATE(bytes_hex);
+ }
+
+ <bytes_hex>"}" { POP_STATE; }
+
+ <bytes_hex>"[" {
+ PUSH_STATE(bytes_hex_range);
+ return HOOK_O;
+ }
+
+ <bytes_hex_range>"-" { return MINUS; }
+
+ <bytes_hex_range>"]" {
+ POP_STATE;
+ return HOOK_C;
+ }
+
+ <bytes_hex>"(" { return PAREN_O; }
+
+ <bytes_hex>")" { return PAREN_C; }
+
+ <bytes_hex>"|" { return PIPE; }
+
+ <bytes_hex>"~" { return TILDE; }
+
+ <bytes_hex>{hbyte}([ ]*{hbyte})* {
+ bool even;
+ size_t i;
+ bin_t byte;
+ bin_t value;
+
+ tmp_0->len = 0;
+
+ even = true;
+
+ for (i = 0; i < yyleng; i++)
+ {
+ byte = yytext[i];
+
+ switch (byte)
+ {
+ case ' ':
+ continue;
+ break;
+
+ case '0' ... '9':
+ value = (byte - '0');
+ break;
+
+ case 'A' ... 'F':
+ value = 0x10 + (byte - 'A');
+ break;
+
+ case 'a' ... 'f':
+ value = 0x10 + (byte - 'a');
+ break;
+
+ }
+
+ if (even)
+ {
+ tmp_0->data[tmp_0->len] = (value << 4);
+ even = false;
+ }
+
+ else
+ {
+ tmp_0->data[tmp_0->len++] |= value;
+ even = true;
+ }
+
+ }
+
+ assert(even);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+ return HEX_BYTES;
+
+ }
+
+ <bytes_hex>[\?]{2}([ ]*[\?]{2})* {
+ unsigned long long counter;
+ size_t i;
+
+ counter = 0;
+
+ for (i = 0; i < yyleng; i++)
+ if (yytext[i] == '?')
+ counter++;
+
+ assert(counter % 2 == 0);
+
+ yylval->unsigned_integer = counter / 2;
+ return FULL_MASK;
+
+ }
+
+
+%{ /* Définition d'expressions régulières */ %}
+
+ <bytes_value>"/" {
+ POP_STATE;
+ printf(" -- regex\n");
+ PUSH_STATE(bytes_regex);
+ }
+
+ <bytes_regex>"/" { printf("exit regex\n"); POP_STATE; }
+
+ <bytes_regex>"." { return DOT; }
+
+ <bytes_regex>({regular_chars})+ {
+ rost_unescape_bytes(yytext, yyleng, tmp_0);
+
+ printf(" regular: '%s'\n", yytext);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+ return REGEX_BYTES;
+
+ }
+
+ <bytes_regex>({reg_classes})+ {
+
+ return REGEX_CLASSES;
+
+ }
+
+%{ /* <bytes_regex>\[({regular_chars}|({regular_chars})-z|{reg_classes})+\] { */ %}
+
+
+ <bytes_regex>"[" {
+ PUSH_STATE(bytes_regex_range);
+ printf(" !! entering range\n");
+ return HOOK_O;
+ }
+
+ <bytes_regex_range>"]" {
+ POP_STATE;
+ printf(" !! exiting range\n");
+ return HOOK_C;
+ }
+
+
+
+
+<bytes_regex_range>({regular_chars}|({regular_chars}-{regular_chars})|{reg_classes})+ {
+
+ printf("range: '%s'\n", yytext);
+ return REGEX_RANGE;
+
+ }
+
+ <bytes_regex>"(" { return PAREN_O; }
+
+ <bytes_regex>")" { return PAREN_C; }
+
+ <bytes_regex>"|" { return PIPE; }
+
+ <bytes_regex>"*" { return MUL; }
+ <bytes_regex>"+" { return PLUS; }
+ <bytes_regex>"?" { return QUESTION; }
+
+ <bytes_regex>"{" {
+ PUSH_STATE(bytes_regex_quantifier);
+ return BRACKET_O;
+ }
+
+ <bytes_regex_quantifier>"," { return COMMA; }
+
+ <bytes_regex_quantifier>"}" {
+ POP_STATE;
+ return BRACKET_C;
+ }
+
+
+%{ /* Condition de correspondance */ %}
<condition>"and" { return AND; }
<condition>"or" { return OR; }
@@ -208,10 +512,17 @@
<condition>"/" { return DIV; }
<condition>"%" { return MOD; }
-<condition>"(" { return PAREN_O; }
-<condition>")" { return PAREN_C; }
-<condition>"," { return COMMA; }
+<strings,condition>"(" { return PAREN_O; }
+<strings,condition>")" { return PAREN_C; }
+<strings,condition>"," { return COMMA; }
+
+
+<condition>"[" { return HOOK_O; }
+<condition>"]" { return HOOK_C; }
+
+
<condition>"." { return DOT; }
+<strings>"|" { return PIPE; }
<condition>"none" { return NONE; }
<condition>"any" { return ANY; }
@@ -221,36 +532,51 @@
<condition>"in" { return IN; }
-<strings,condition>$[A-Za-z0-9_]* {
- yylval->sized_cstring.data = yytext + 1;
- yylval->sized_cstring.len = yyleng - 1;
- return IDENTIFIER;
- }
+ <strings,condition>${bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID;
+ }
+
+ <condition>#{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_COUNTER;
+ }
+
+ <condition>@{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_START;
+ }
+
+ <condition>!{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_LENGTH;
+ }
+
+ <condition>~{bytes_id} {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 1;
+ return BYTES_ID_END;
+ }
-<condition>$[A-Za-z_][A-Za-z0-9_]* {
- yylval->sized_cstring.data = yytext + 1;
- yylval->sized_cstring.len = yyleng - 1;
- return BYTES_ID;
- }
-<condition>#[A-Za-z_][A-Za-z0-9_]* {
- yylval->sized_cstring.data = yytext + 1;
- yylval->sized_cstring.len = yyleng - 1;
- return BYTES_ID_COUNTER;
- }
-<condition>[A-Za-z_][A-Za-z0-9_]* {
+
+<strings,condition>[A-Za-z_][A-Za-z0-9_]* {
yylval->sized_cstring.data = yytext;
yylval->sized_cstring.len = yyleng;
return NAME;
}
-<strings>"=" { PUSH_STATE(strval); return ASSIGN; }
+<strings>"=" { PUSH_STATE(bytes_value); return ASSIGN; }
-<strval>\"[^\"\\]+\" {
+<bytes_value>\"[^\"\\]+\" {
POP_STATE;
yylval->sized_cstring.data = yytext + 1;
yylval->sized_cstring.len = yyleng - 2;
@@ -260,43 +586,28 @@
-<strval>"\"" {
+<bytes_value>"\"" {
POP_STATE;
// *built_pattern = g_bytes_pattern_new();
- PUSH_STATE(strval_raw);
- }
-<strval>"{" {
- POP_STATE;
- // *built_pattern = g_bytes_pattern_new();
- PUSH_STATE(strval_hex);
+ PUSH_STATE(bytes_value_raw);
}
-<strval_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; }
+<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; }
-<strval_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); }
-<strval_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); }
-<strval_raw>"\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); }
-<strval_raw>"\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); }
-<strval_raw>"\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); }
+<bytes_value_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); }
+<bytes_value_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); }
+<bytes_value_raw>"\\r" { }//g_bytes_pattern_append_data(*built_pattern, '\r', 0xff); }
+<bytes_value_raw>"\\n" { }//g_bytes_pattern_append_data(*built_pattern, '\n', 0xff); }
+<bytes_value_raw>"\\\\" { }//g_bytes_pattern_append_data(*built_pattern, '\\', 0xff); }
-<strval_raw>\\x[0-9a-fA-F]{2} {
+<bytes_value_raw>\\x[0-9a-fA-F]{2} {
uint8_t __ch;
__ch = strtol(yytext + 2, NULL, 16);
+ printf("__ch: %hhx\n", __ch);
//g_bytes_pattern_append_data(*built_pattern, __ch, 0xff);
}
-<strval_raw>. { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); }
-
-<strval_hex>"}" { POP_STATE; /*yylval->pattern = *built_pattern;*/ return MASKED_STRING; }
-
-<strval_hex>[0-9a-fA-F]{2} {
- uint8_t __ch;
- __ch = strtol(yytext, NULL, 16);
- //g_bytes_pattern_append_data(*built_pattern, __ch, 0xff);
- }
-
-<strval_hex>"??" { /*g_bytes_pattern_insert_space(*built_pattern, 1, 1);*/ }
-
+<bytes_value_raw>. { }//g_bytes_pattern_append_data(*built_pattern, *yytext, 0xff); }
@@ -318,7 +629,9 @@
%{ /* Actions par défaut */ %}
-<*>[ \t\n]+ { }
+<*>[ \t]+ { }
+
+<*>[\n] { static int ln = 1; if (0) printf("----------- %%< -------------- %%< ---- %d\n", ln++); }
<*>. {
char *msg;