From 28ef52f37784817c6590cdafc94aa9b356123802 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard <nocbos@gmail.com> Date: Sun, 3 Mar 2024 12:29:53 +0100 Subject: Restore mixed hexadecimal pattern support. --- src/analysis/scan/tokens.l | 253 ++++++++++++++++++++--------------------- tests/analysis/scan/grammar.py | 202 ++++++++++++++++++++++++++++++++ 2 files changed, 327 insertions(+), 128 deletions(-) diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l index ab881c1..e075cee 100644 --- a/src/analysis/scan/tokens.l +++ b/src/analysis/scan/tokens.l @@ -697,171 +697,171 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* %{ /* Définition de motif en hexadécimal */ %} - <bytes_value>"{" { - POP_STATE; - PUSH_STATE(bytes_hex); - } - - <bytes_hex>"}" { POP_STATE; } + <bytes_value>"{" { + POP_STATE; + PUSH_STATE(bytes_hex); + } - <bytes_hex>"[" { - PUSH_STATE(bytes_hex_range); - return HOOK_O; - } + <bytes_hex>"}" { POP_STATE; } - <bytes_hex_range>"-" { return MINUS; } + <bytes_hex>"[" { + PUSH_STATE(bytes_hex_range); + return HOOK_O; + } - <bytes_hex_range>"]" { - POP_STATE; - return HOOK_C; - } + <bytes_hex_range>"-" { return MINUS; } - <bytes_hex>"(" { return PAREN_O; } + <bytes_hex_range>"]" { + POP_STATE; + return HOOK_C; + } - <bytes_hex>")" { return PAREN_C; } + <bytes_hex>"(" { return PAREN_O; } - <bytes_hex>"|" { return PIPE; } + <bytes_hex>")" { return PAREN_C; } - <bytes_hex>"~" { return TILDE; } + <bytes_hex>"|" { return PIPE; } - <bytes_hex>{hbyte}([ ]*{hbyte})* { - bool even; - size_t i; - bin_t byte; - bin_t value; + <bytes_hex>"~" { return TILDE; } - tmp_0->len = 0; + <bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; - even = true; + tmp_0->len = 0; - for (i = 0; i < yyleng; i++) - { - byte = yytext[i]; + even = true; - switch (byte) + for (i = 0; i < yyleng; i++) { - case ' ': - continue; - break; + byte = yytext[i]; - case '0' ... '9': - value = (byte - '0'); - break; + switch (byte) + { + case ' ': + continue; + break; - case 'A' ... 'F': - value = 0xa + (byte - 'A'); - break; + case '0' ... '9': + value = (byte - '0'); + break; - case 'a' ... 'f': - value = 0xa + (byte - 'a'); - break; + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; - } + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; - if (even) - tmp_0->data[tmp_0->len] = (value << 4); - else - tmp_0->data[tmp_0->len++] |= value; + } - even = !even; + if (even) + tmp_0->data[tmp_0->len] = (value << 4); + else + tmp_0->data[tmp_0->len++] |= value; - } + even = !even; + + } - assert(even); + assert(even); #ifndef NDEBUG - /* Pour rendre plus lisibles les impressions de débogage */ - tmp_0->data[tmp_0->len] = '\0'; + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; #endif - yylval->tmp_cstring = tmp_0; - return HEX_BYTES; - - } + yylval->tmp_cstring = tmp_0; + return HEX_BYTES; - <bytes_hex>[\?]{2}([ ]*[\?]{2})* { - unsigned long long counter; - size_t i; + } - counter = 0; + <bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* { + unsigned long long counter; + size_t i; - for (i = 0; i < yyleng; i++) - if (yytext[i] == '?') - counter++; + counter = 0; - assert(counter % 2 == 0); + for (i = 0; i < yyleng; i++) + if (yytext[i] == '?') + counter++; - yylval->unsigned_integer = counter / 2; - return FULL_MASK; + assert(counter % 2 == 0); - } + yylval->unsigned_integer = counter / 2; + return FULL_MASK; - <bytes_hex>{mbyte}([ ]*{mbyte})* { - bool even; - size_t i; - bin_t byte; - bin_t value; + } - tmp_0->len = 0; - tmp_1->len = 0; + <bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; - even = true; + tmp_0->len = 0; + tmp_1->len = 0; - for (i = 0; i < yyleng; i++) - { - byte = yytext[i]; + even = true; - switch (byte) + for (i = 0; i < yyleng; i++) { - case ' ': - continue; - break; - - case '?': - even = !even; - continue; - break; - - case '0' ... '9': - value = (byte - '0'); - break; - - case 'A' ... 'F': - value = 0xa + (byte - 'A'); - break; - - case 'a' ... 'f': - value = 0xa + (byte - 'a'); - break; + byte = yytext[i]; + + switch (byte) + { + case ' ': + continue; + break; + + case '?': + even = !even; + continue; + break; + + case '0' ... '9': + value = (byte - '0'); + break; + + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; + + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; + + } + + if (even) + { + tmp_0->data[tmp_0->len++] = (value << 4); + tmp_1->data[tmp_1->len++] = 0xf0; + } + else + { + tmp_0->data[tmp_0->len++] = value; + tmp_1->data[tmp_1->len++] = 0x0f; + } + + even = !even; } - if (even) - { - tmp_0->data[tmp_0->len++] = (value << 4); - tmp_1->data[tmp_1->len++] = 0xf0; - } - else - { - tmp_0->data[tmp_0->len++] = value; - tmp_1->data[tmp_1->len++] = 0x0f; - } - - even = !even; - - } - #ifndef NDEBUG - /* Pour rendre plus lisibles les impressions de débogage */ - tmp_0->data[tmp_0->len] = '\0'; - tmp_1->data[tmp_1->len] = '\0'; + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; + tmp_1->data[tmp_1->len] = '\0'; #endif - yylval->masked.tmp_values = tmp_0; - yylval->masked.tmp_masks = tmp_1; - return SEMI_MASK; + yylval->masked.tmp_values = tmp_0; + yylval->masked.tmp_masks = tmp_1; + return SEMI_MASK; - } + } %{ /* Définition d'expressions régulières */ %} @@ -1185,17 +1185,14 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* <bytes>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F]/[^?] { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\? { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\?/[^?] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } <bytes_regex>\\ { HANDLE_UNCOMPLETED_TOKEN; } diff --git a/tests/analysis/scan/grammar.py b/tests/analysis/scan/grammar.py index 3a8196a..14f67fa 100644 --- a/tests/analysis/scan/grammar.py +++ b/tests/analysis/scan/grammar.py @@ -276,6 +276,208 @@ rule test { self.check_rule_success(rule, cnt) + def testBackingUpHandlers(self): + """Ensure handlers for backing up removals do not limit the grammar.""" + + cnt = MemoryContent(b'AB12') + + # Uncompleted token in rule definition: '?? ?? ' + + rule = ''' +rule test { + + bytes: + $a = { ?? ?? } + + condition: + #a == 3 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?? ' + + rule = ''' +rule test { + + bytes: + $a = { ?? 4? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?? ?' + + rule = ''' +rule test { + + bytes: + $a = { ?? ?2 } + + condition: + #a == 2 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?? ' + + rule = ''' +rule test { + + bytes: + $a = { ?? 42 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + + # Uncompleted token in rule definition: '?1 ?' + + rule = ''' +rule test { + + bytes: + $a = { ?1 ?? } + + condition: + #a == 2 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?1 4? ' + + rule = ''' +rule test { + + bytes: + $a = { ?1 4? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?1 ?2 ' + + rule = ''' +rule test { + + bytes: + $a = { ?1 ?2 } + + condition: + #a == 2 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?1 4' + + rule = ''' +rule test { + + bytes: + $a = { ?1 42 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + + # Uncompleted token in rule definition: '41 ' + + rule = ''' +rule test { + + bytes: + $a = { 41 ?? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '41 4' + + rule = ''' +rule test { + + bytes: + $a = { 41 4? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '41 ' + + rule = ''' +rule test { + + bytes: + $a = { 41 ?2 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '41 42 ' + + rule = ''' +rule test { + + bytes: + $a = { 41 42 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + + + # TODO : test <haystack> matches <regex> -- cgit v0.11.2-87-g4458