From 28ef52f37784817c6590cdafc94aa9b356123802 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard Date: Sun, 3 Mar 2024 12:29:53 +0100 Subject: Restore mixed hexadecimal pattern support. --- src/analysis/scan/tokens.l | 253 ++++++++++++++++++++--------------------- tests/analysis/scan/grammar.py | 202 ++++++++++++++++++++++++++++++++ 2 files changed, 327 insertions(+), 128 deletions(-) diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l index ab881c1..e075cee 100644 --- a/src/analysis/scan/tokens.l +++ b/src/analysis/scan/tokens.l @@ -697,171 +697,171 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* %{ /* Définition de motif en hexadécimal */ %} - "{" { - POP_STATE; - PUSH_STATE(bytes_hex); - } - - "}" { POP_STATE; } + "{" { + POP_STATE; + PUSH_STATE(bytes_hex); + } - "[" { - PUSH_STATE(bytes_hex_range); - return HOOK_O; - } + "}" { POP_STATE; } - "-" { return MINUS; } + "[" { + PUSH_STATE(bytes_hex_range); + return HOOK_O; + } - "]" { - POP_STATE; - return HOOK_C; - } + "-" { return MINUS; } - "(" { return PAREN_O; } + "]" { + POP_STATE; + return HOOK_C; + } - ")" { return PAREN_C; } + "(" { return PAREN_O; } - "|" { return PIPE; } + ")" { return PAREN_C; } - "~" { return TILDE; } + "|" { return PIPE; } - {hbyte}([ ]*{hbyte})* { - bool even; - size_t i; - bin_t byte; - bin_t value; + "~" { return TILDE; } - tmp_0->len = 0; + {hbyte}([ ]*{hbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; - even = true; + tmp_0->len = 0; - for (i = 0; i < yyleng; i++) - { - byte = yytext[i]; + even = true; - switch (byte) + for (i = 0; i < yyleng; i++) { - case ' ': - continue; - break; + byte = yytext[i]; - case '0' ... '9': - value = (byte - '0'); - break; + switch (byte) + { + case ' ': + continue; + break; - case 'A' ... 'F': - value = 0xa + (byte - 'A'); - break; + case '0' ... '9': + value = (byte - '0'); + break; - case 'a' ... 'f': - value = 0xa + (byte - 'a'); - break; + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; - } + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; - if (even) - tmp_0->data[tmp_0->len] = (value << 4); - else - tmp_0->data[tmp_0->len++] |= value; + } - even = !even; + if (even) + tmp_0->data[tmp_0->len] = (value << 4); + else + tmp_0->data[tmp_0->len++] |= value; - } + even = !even; + + } - assert(even); + assert(even); #ifndef NDEBUG - /* Pour rendre plus lisibles les impressions de débogage */ - tmp_0->data[tmp_0->len] = '\0'; + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; #endif - yylval->tmp_cstring = tmp_0; - return HEX_BYTES; - - } + yylval->tmp_cstring = tmp_0; + return HEX_BYTES; - [\?]{2}([ ]*[\?]{2})* { - unsigned long long counter; - size_t i; + } - counter = 0; + [\?]{2}([ ]*[\?]{2})*[ ]* { + unsigned long long counter; + size_t i; - for (i = 0; i < yyleng; i++) - if (yytext[i] == '?') - counter++; + counter = 0; - assert(counter % 2 == 0); + for (i = 0; i < yyleng; i++) + if (yytext[i] == '?') + counter++; - yylval->unsigned_integer = counter / 2; - return FULL_MASK; + assert(counter % 2 == 0); - } + yylval->unsigned_integer = counter / 2; + return FULL_MASK; - {mbyte}([ ]*{mbyte})* { - bool even; - size_t i; - bin_t byte; - bin_t value; + } - tmp_0->len = 0; - tmp_1->len = 0; + {mbyte}([ ]*{mbyte})*[ ]* { + bool even; + size_t i; + bin_t byte; + bin_t value; - even = true; + tmp_0->len = 0; + tmp_1->len = 0; - for (i = 0; i < yyleng; i++) - { - byte = yytext[i]; + even = true; - switch (byte) + for (i = 0; i < yyleng; i++) { - case ' ': - continue; - break; - - case '?': - even = !even; - continue; - break; - - case '0' ... '9': - value = (byte - '0'); - break; - - case 'A' ... 'F': - value = 0xa + (byte - 'A'); - break; - - case 'a' ... 'f': - value = 0xa + (byte - 'a'); - break; + byte = yytext[i]; + + switch (byte) + { + case ' ': + continue; + break; + + case '?': + even = !even; + continue; + break; + + case '0' ... '9': + value = (byte - '0'); + break; + + case 'A' ... 'F': + value = 0xa + (byte - 'A'); + break; + + case 'a' ... 'f': + value = 0xa + (byte - 'a'); + break; + + } + + if (even) + { + tmp_0->data[tmp_0->len++] = (value << 4); + tmp_1->data[tmp_1->len++] = 0xf0; + } + else + { + tmp_0->data[tmp_0->len++] = value; + tmp_1->data[tmp_1->len++] = 0x0f; + } + + even = !even; } - if (even) - { - tmp_0->data[tmp_0->len++] = (value << 4); - tmp_1->data[tmp_1->len++] = 0xf0; - } - else - { - tmp_0->data[tmp_0->len++] = value; - tmp_1->data[tmp_1->len++] = 0x0f; - } - - even = !even; - - } - #ifndef NDEBUG - /* Pour rendre plus lisibles les impressions de débogage */ - tmp_0->data[tmp_0->len] = '\0'; - tmp_1->data[tmp_1->len] = '\0'; + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; + tmp_1->data[tmp_1->len] = '\0'; #endif - yylval->masked.tmp_values = tmp_0; - yylval->masked.tmp_masks = tmp_1; - return SEMI_MASK; + yylval->masked.tmp_values = tmp_0; + yylval->masked.tmp_masks = tmp_1; + return SEMI_MASK; - } + } %{ /* Définition d'expressions régulières */ %} @@ -1185,17 +1185,14 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* \"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; } -{hbyte}([ ]*{hbyte})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F]/[^?] { HANDLE_UNCOMPLETED_TOKEN; } -[\?]{2}([ ]*[\?]{2})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -[\?]{2}([ ]*[\?]{2})*[ ]*[\?] { HANDLE_UNCOMPLETED_TOKEN; } +[\?]{2}([ ]*[\?]{2})*[ ]*[\?]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } -{mbyte}([ ]*{mbyte})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -{mbyte}([ ]*{mbyte})*[ ]*\? { HANDLE_UNCOMPLETED_TOKEN; } -{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +{mbyte}([ ]*{mbyte})*[ ]*\?/[^?] { HANDLE_UNCOMPLETED_TOKEN; } +{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } \\ { HANDLE_UNCOMPLETED_TOKEN; } diff --git a/tests/analysis/scan/grammar.py b/tests/analysis/scan/grammar.py index 3a8196a..14f67fa 100644 --- a/tests/analysis/scan/grammar.py +++ b/tests/analysis/scan/grammar.py @@ -276,6 +276,208 @@ rule test { self.check_rule_success(rule, cnt) + def testBackingUpHandlers(self): + """Ensure handlers for backing up removals do not limit the grammar.""" + + cnt = MemoryContent(b'AB12') + + # Uncompleted token in rule definition: '?? ?? ' + + rule = ''' +rule test { + + bytes: + $a = { ?? ?? } + + condition: + #a == 3 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?? ' + + rule = ''' +rule test { + + bytes: + $a = { ?? 4? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?? ?' + + rule = ''' +rule test { + + bytes: + $a = { ?? ?2 } + + condition: + #a == 2 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?? ' + + rule = ''' +rule test { + + bytes: + $a = { ?? 42 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + + # Uncompleted token in rule definition: '?1 ?' + + rule = ''' +rule test { + + bytes: + $a = { ?1 ?? } + + condition: + #a == 2 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?1 4? ' + + rule = ''' +rule test { + + bytes: + $a = { ?1 4? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?1 ?2 ' + + rule = ''' +rule test { + + bytes: + $a = { ?1 ?2 } + + condition: + #a == 2 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '?1 4' + + rule = ''' +rule test { + + bytes: + $a = { ?1 42 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + + # Uncompleted token in rule definition: '41 ' + + rule = ''' +rule test { + + bytes: + $a = { 41 ?? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '41 4' + + rule = ''' +rule test { + + bytes: + $a = { 41 4? } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '41 ' + + rule = ''' +rule test { + + bytes: + $a = { 41 ?2 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + # Uncompleted token in rule definition: '41 42 ' + + rule = ''' +rule test { + + bytes: + $a = { 41 42 } + + condition: + #a == 1 + +} +''' + + self.check_rule_success(rule, content=cnt) + + + + # TODO : test matches -- cgit v0.11.2-87-g4458