summaryrefslogtreecommitdiff
path: root/src/analysis/scan/tokens.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis/scan/tokens.l')
-rw-r--r--src/analysis/scan/tokens.l334
1 files changed, 296 insertions, 38 deletions
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l
index 18594c4..594d4d9 100644
--- a/src/analysis/scan/tokens.l
+++ b/src/analysis/scan/tokens.l
@@ -32,7 +32,7 @@
* *
******************************************************************************/
-static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out)
+static void rost_unescape_string_bytes(const char *src, size_t len, sized_string_t *out)
{
size_t i; /* Boucle de parcours */
bin_t byte; /* Octet à analyser */
@@ -52,30 +52,177 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
switch (next)
{
- case '\a':
+ case 'a':
+ out->data[out->len++] = '\a';
+ break;
+
+ case 'b':
+ out->data[out->len++] = '\b';
+ break;
+
+ case 't':
+ out->data[out->len++] = '\t';
+ break;
+
+ case 'n':
+ out->data[out->len++] = '\n';
+ break;
+
+ case 'v':
+ out->data[out->len++] = '\v';
+ break;
+
+ case 'f':
+ out->data[out->len++] = '\f';
+ break;
+
+ case 'r':
+ out->data[out->len++] = '\r';
+ break;
+
+ case 'e':
+ out->data[out->len++] = '\e';
+ break;
+
+ case '"':
+ out->data[out->len++] = '\"';
+ break;
+
+ case '\\':
out->data[out->len++] = '\\';
break;
- case '\t':
+ case 'x':
+
+ next = src[i + 2];
+
+ switch (next)
+ {
+ case '0' ... '9':
+ out->data[out->len] = (next - '0');
+ break;
+
+ case 'A' ... 'F':
+ out->data[out->len] = 0xa + (next - 'A');
+ break;
+
+ case 'a' ... 'f':
+ out->data[out->len] = 0xa + (next - 'a');
+ break;
+
+ }
+
+ out->data[out->len] <<= 4;
+
+ next = src[i + 3];
+
+ switch (next)
+ {
+ case '0' ... '9':
+ out->data[out->len] |= (next - '0');
+ break;
+
+ case 'A' ... 'F':
+ out->data[out->len] |= 0xa + (next - 'A');
+ break;
+
+ case 'a' ... 'f':
+ out->data[out->len] |= 0xa + (next - 'a');
+ break;
+
+ }
+
+ out->len++;
+
+ i += 2;
+ break;
+
+ }
+
+ i++;
+ break;
+
+ default:
+ out->data[out->len++] = byte;
+ break;
+
+ }
+
+ }
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : src = liste d'octets à traiter. *
+* len = taille de cette liste. *
+* out = série d'octets bruts obtenue. [OUT] *
+* *
+* Description : Transcrit une série d'octets en en remplaçant certains. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out)
+{
+ size_t i; /* Boucle de parcours */
+ bin_t byte; /* Octet à analyser */
+ bin_t next; /* Octet suivant */
+
+ out->len = 0;
+
+ for (i = 0; i < len; i++)
+ {
+ byte = src[i];
+
+ switch (byte)
+ {
+ case '\\':
+
+ next = src[i + 1];
+
+ switch (next)
+ {
+ case 'a':
+ out->data[out->len++] = '\a';
+ break;
+
+ case 'b':
+ out->data[out->len++] = '\b';
+ break;
+
+ case 't':
out->data[out->len++] = '\t';
break;
- case '\n':
+ case 'n':
out->data[out->len++] = '\n';
break;
- case '\v':
+ case 'v':
out->data[out->len++] = '\v';
break;
- case '\f':
+ case 'f':
out->data[out->len++] = '\f';
break;
- case '\r':
+ case 'r':
out->data[out->len++] = '\r';
break;
+ case 'e':
+ out->data[out->len++] = '\e';
+ break;
+
+ case '"':
+ out->data[out->len++] = '\"';
+ break;
+
case '\\':
out->data[out->len++] = '\\';
break;
@@ -91,11 +238,11 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
break;
case 'A' ... 'F':
- out->data[out->len] = 0x10 + (next - 'A');
+ out->data[out->len] = 0xa + (next - 'A');
break;
case 'a' ... 'f':
- out->data[out->len] = 0x10 + (next - 'a');
+ out->data[out->len] = 0xa + (next - 'a');
break;
}
@@ -111,11 +258,11 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
break;
case 'A' ... 'F':
- out->data[out->len] |= 0x10 + (next - 'A');
+ out->data[out->len] |= 0xa + (next - 'A');
break;
case 'a' ... 'f':
- out->data[out->len] |= 0x10 + (next - 'a');
+ out->data[out->len] |= 0xa + (next - 'a');
break;
}
@@ -175,7 +322,7 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
%option yylineno
%option never-interactive
-%x include_path
+%x inc_path
%x rule_intro
%x raw_block
@@ -200,8 +347,12 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
%x comment
+str_not_escaped [^\"\\]
+str_escaped \\a|\\b|\\t|\\n|\\v|\\f|\\r|\\e|\\\"|\\\\|\\x{hbyte}
+str_mixed ({str_not_escaped}|{str_escaped})
hbyte [0-9a-fA-F]{2}
+mbyte (\?[0-9a-fA-F]|[0-9a-fA-F]\?)
reg_allowed [^^$.|/{}()\[\]*+?\\]
reg_allowed_escaped \\^|\\$|\\\.|\\\||\\\/|\\\{|\\\}|\\\(|\\\)|\\\[|\\\]|\\\*|\\\+|\\\?|\\\\
@@ -219,15 +370,35 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
%%
+"include" { PUSH_STATE(inc_path); return INCLUDE; }
+<inc_path>\"{str_not_escaped}+\" {
+ POP_STATE;
-"include" { PUSH_STATE(include_path); return INCLUDE; }
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
+
+ return PLAIN_TEXT;
+ }
+
+<inc_path>\"{str_mixed}+\" {
+ POP_STATE;
+
+ rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
+
+
+%{ /* Définition locale d'une règle */ %}
-<include_path>"\"" {
- POP_STATE;
- *used = 0;
- PUSH_STATE(strlit);
- }
"rule" { PUSH_STATE(rule_intro); return RAW_RULE; }
@@ -295,6 +466,41 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
}
+%{ /* Définitions communes pour la section "bytes:" */ %}
+
+<strings>"fullword" { return FULLWORD; }
+<strings>"nocase" { return NOCASE; }
+<strings>"private" { return PRIVATE; }
+
+<strings>"=" { PUSH_STATE(bytes_value); return ASSIGN; }
+
+
+%{ /* Définition de motif en texte brut */ %}
+
+<bytes_value>\"{str_not_escaped}+\" {
+ POP_STATE;
+
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
+
+ return PLAIN_TEXT;
+ }
+
+<bytes_value>\"{str_mixed}+\" {
+ POP_STATE;
+
+ rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0);
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
+
%{ /* Définition de motif en hexadécimal */ %}
@@ -350,26 +556,21 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
break;
case 'A' ... 'F':
- value = 0x10 + (byte - 'A');
+ value = 0xa + (byte - 'A');
break;
case 'a' ... 'f':
- value = 0x10 + (byte - 'a');
+ value = 0xa + (byte - 'a');
break;
}
if (even)
- {
tmp_0->data[tmp_0->len] = (value << 4);
- even = false;
- }
-
else
- {
tmp_0->data[tmp_0->len++] |= value;
- even = true;
- }
+
+ even = !even;
}
@@ -402,6 +603,73 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
}
+ <bytes_hex>{mbyte}([ ]*{mbyte})* {
+ bool even;
+ size_t i;
+ bin_t byte;
+ bin_t value;
+
+ tmp_0->len = 0;
+ tmp_1->len = 0;
+
+ even = true;
+
+ for (i = 0; i < yyleng; i++)
+ {
+ byte = yytext[i];
+
+ switch (byte)
+ {
+ case ' ':
+ continue;
+ break;
+
+ case '?':
+ even = !even;
+ continue;
+ break;
+
+ case '0' ... '9':
+ value = (byte - '0');
+ break;
+
+ case 'A' ... 'F':
+ value = 0xa + (byte - 'A');
+ break;
+
+ case 'a' ... 'f':
+ value = 0xa + (byte - 'a');
+ break;
+
+ }
+
+ if (even)
+ {
+ tmp_0->data[tmp_0->len++] = (value << 4);
+ tmp_1->data[tmp_1->len++] = 0xf0;
+ }
+ else
+ {
+ tmp_0->data[tmp_0->len++] = value;
+ tmp_1->data[tmp_1->len++] = 0x0f;
+ }
+
+ even = !even;
+
+ }
+
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+ tmp_1->data[tmp_1->len] = '\0';
+#endif
+
+ yylval->masked.tmp_values = tmp_0;
+ yylval->masked.tmp_masks = tmp_1;
+ return SEMI_MASK;
+
+ }
+
%{ /* Définition d'expressions régulières */ %}
@@ -573,16 +841,6 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
return NAME;
}
-<strings>"=" { PUSH_STATE(bytes_value); return ASSIGN; }
-
-
-<bytes_value>\"[^\"\\]+\" {
- POP_STATE;
- yylval->sized_cstring.data = yytext + 1;
- yylval->sized_cstring.len = yyleng - 2;
- return PLAIN_STRING;
- }
-
@@ -592,7 +850,7 @@ bytes_id [A-Za-z_][A-Za-z0-9_]*
PUSH_STATE(bytes_value_raw);
}
-<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return MASKED_STRING; }
+<bytes_value_raw>"\"" { POP_STATE; /*yylval->pattern = *built_pattern*/; return 11111/*MASKED_STRING*/; }
<bytes_value_raw>"\\\"" { }//g_bytes_pattern_append_data(*built_pattern, '"', 0xff); }
<bytes_value_raw>"\\t" { }//g_bytes_pattern_append_data(*built_pattern, '\t', 0xff); }