diff options
| author | Cyrille Bagard <nocbos@gmail.com> | 2024-03-03 11:29:53 (GMT) | 
|---|---|---|
| committer | Cyrille Bagard <nocbos@gmail.com> | 2024-03-03 11:29:53 (GMT) | 
| commit | 28ef52f37784817c6590cdafc94aa9b356123802 (patch) | |
| tree | c29c578524efff58ae4c9010098f2636c4a4ef18 | |
| parent | 35971d2bea4733d2f7631c22c61e22d07f7478af (diff) | |
Restore mixed hexadecimal pattern support.
| -rw-r--r-- | src/analysis/scan/tokens.l | 253 | ||||
| -rw-r--r-- | tests/analysis/scan/grammar.py | 202 | 
2 files changed, 327 insertions, 128 deletions
| diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l index ab881c1..e075cee 100644 --- a/src/analysis/scan/tokens.l +++ b/src/analysis/scan/tokens.l @@ -697,171 +697,171 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*  %{ /* Définition de motif en hexadécimal */ %} -                   <bytes_value>"{" { -                                        POP_STATE; -                                        PUSH_STATE(bytes_hex); -                                    } - -                     <bytes_hex>"}" { POP_STATE; } +                       <bytes_value>"{" { +                                            POP_STATE; +                                            PUSH_STATE(bytes_hex); +                                        } -                     <bytes_hex>"[" { -                                        PUSH_STATE(bytes_hex_range); -                                        return HOOK_O; -                                    } +                         <bytes_hex>"}" { POP_STATE; } -               <bytes_hex_range>"-" { return MINUS; } +                         <bytes_hex>"[" { +                                            PUSH_STATE(bytes_hex_range); +                                            return HOOK_O; +                                        } -               <bytes_hex_range>"]" { -                                        POP_STATE; -                                        return HOOK_C; -                                    } +                   <bytes_hex_range>"-" { return MINUS; } -                     <bytes_hex>"(" { return PAREN_O; } +                   <bytes_hex_range>"]" { +                                            POP_STATE; +                                            return HOOK_C; +                                        } -                     <bytes_hex>")" { return PAREN_C; } +                         <bytes_hex>"(" { return PAREN_O; } -                     <bytes_hex>"|" { return PIPE; } +                         <bytes_hex>")" { return PAREN_C; } -                     <bytes_hex>"~" { return TILDE; } +                         <bytes_hex>"|" { return PIPE; } -   <bytes_hex>{hbyte}([ ]*{hbyte})* { -                                        bool even; -                                        size_t i; -                                        bin_t byte; -                                        bin_t value; +                         <bytes_hex>"~" { return TILDE; } -                                        tmp_0->len = 0; +   <bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* { +                                            bool even; +                                            size_t i; +                                            bin_t byte; +                                            bin_t value; -                                        even = true; +                                            tmp_0->len = 0; -                                        for (i = 0; i < yyleng; i++) -                                        { -                                            byte = yytext[i]; +                                            even = true; -                                            switch (byte) +                                            for (i = 0; i < yyleng; i++)                                              { -                                                case ' ': -                                                    continue; -                                                    break; +                                                byte = yytext[i]; -                                                case '0' ... '9': -                                                    value = (byte - '0'); -                                                    break; +                                                switch (byte) +                                                { +                                                    case ' ': +                                                        continue; +                                                        break; -                                                case 'A' ... 'F': -                                                    value = 0xa + (byte - 'A'); -                                                    break; +                                                    case '0' ... '9': +                                                        value = (byte - '0'); +                                                        break; -                                                case 'a' ... 'f': -                                                    value = 0xa + (byte - 'a'); -                                                    break; +                                                    case 'A' ... 'F': +                                                        value = 0xa + (byte - 'A'); +                                                        break; -                                            } +                                                    case 'a' ... 'f': +                                                        value = 0xa + (byte - 'a'); +                                                        break; -                                            if (even) -                                                tmp_0->data[tmp_0->len] = (value << 4); -                                            else -                                                tmp_0->data[tmp_0->len++] |= value; +                                                } -                                            even = !even; +                                                if (even) +                                                    tmp_0->data[tmp_0->len] = (value << 4); +                                                else +                                                    tmp_0->data[tmp_0->len++] |= value; -                                        } +                                                even = !even; + +                                            } -                                        assert(even); +                                            assert(even);  #ifndef NDEBUG -                                        /* Pour rendre plus lisibles les impressions de débogage */ -                                        tmp_0->data[tmp_0->len] = '\0'; +                                            /* Pour rendre plus lisibles les impressions de débogage */ +                                            tmp_0->data[tmp_0->len] = '\0';  #endif -                                        yylval->tmp_cstring = tmp_0; -                                        return HEX_BYTES; - -                                    } +                                            yylval->tmp_cstring = tmp_0; +                                            return HEX_BYTES; -   <bytes_hex>[\?]{2}([ ]*[\?]{2})* { -                                        unsigned long long counter; -                                        size_t i; +                                        } -                                        counter = 0; +   <bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* { +                                            unsigned long long counter; +                                            size_t i; -                                        for (i = 0; i < yyleng; i++) -                                            if (yytext[i] == '?') -                                                counter++; +                                            counter = 0; -                                        assert(counter % 2 == 0); +                                            for (i = 0; i < yyleng; i++) +                                                if (yytext[i] == '?') +                                                    counter++; -                                        yylval->unsigned_integer = counter / 2; -                                        return FULL_MASK; +                                            assert(counter % 2 == 0); -                                    } +                                            yylval->unsigned_integer = counter / 2; +                                            return FULL_MASK; -   <bytes_hex>{mbyte}([ ]*{mbyte})* { -                                        bool even; -                                        size_t i; -                                        bin_t byte; -                                        bin_t value; +                                        } -                                        tmp_0->len = 0; -                                        tmp_1->len = 0; +   <bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* { +                                            bool even; +                                            size_t i; +                                            bin_t byte; +                                            bin_t value; -                                        even = true; +                                            tmp_0->len = 0; +                                            tmp_1->len = 0; -                                        for (i = 0; i < yyleng; i++) -                                        { -                                            byte = yytext[i]; +                                            even = true; -                                            switch (byte) +                                            for (i = 0; i < yyleng; i++)                                              { -                                                case ' ': -                                                    continue; -                                                    break; - -                                                case '?': -                                                    even = !even; -                                                    continue; -                                                    break; - -                                                case '0' ... '9': -                                                    value = (byte - '0'); -                                                    break; - -                                                case 'A' ... 'F': -                                                    value = 0xa + (byte - 'A'); -                                                    break; - -                                                case 'a' ... 'f': -                                                    value = 0xa + (byte - 'a'); -                                                    break; +                                                byte = yytext[i]; + +                                                switch (byte) +                                                { +                                                    case ' ': +                                                        continue; +                                                        break; + +                                                    case '?': +                                                        even = !even; +                                                        continue; +                                                        break; + +                                                    case '0' ... '9': +                                                        value = (byte - '0'); +                                                        break; + +                                                    case 'A' ... 'F': +                                                        value = 0xa + (byte - 'A'); +                                                        break; + +                                                    case 'a' ... 'f': +                                                        value = 0xa + (byte - 'a'); +                                                        break; + +                                                } + +                                                if (even) +                                                { +                                                    tmp_0->data[tmp_0->len++] = (value << 4); +                                                    tmp_1->data[tmp_1->len++] = 0xf0; +                                                } +                                                else +                                                { +                                                    tmp_0->data[tmp_0->len++] = value; +                                                    tmp_1->data[tmp_1->len++] = 0x0f; +                                                } + +                                                even = !even;                                              } -                                            if (even) -                                            { -                                                tmp_0->data[tmp_0->len++] = (value << 4); -                                                tmp_1->data[tmp_1->len++] = 0xf0; -                                            } -                                            else -                                            { -                                                tmp_0->data[tmp_0->len++] = value; -                                                tmp_1->data[tmp_1->len++] = 0x0f; -                                            } - -                                            even = !even; - -                                        } -  #ifndef NDEBUG -                                        /* Pour rendre plus lisibles les impressions de débogage */ -                                        tmp_0->data[tmp_0->len] = '\0'; -                                        tmp_1->data[tmp_1->len] = '\0'; +                                            /* Pour rendre plus lisibles les impressions de débogage */ +                                            tmp_0->data[tmp_0->len] = '\0'; +                                            tmp_1->data[tmp_1->len] = '\0';  #endif -                                        yylval->masked.tmp_values = tmp_0; -                                        yylval->masked.tmp_masks = tmp_1; -                                        return SEMI_MASK; +                                            yylval->masked.tmp_values = tmp_0; +                                            yylval->masked.tmp_masks = tmp_1; +                                            return SEMI_MASK; -                                    } +                                        }  %{ /* Définition d'expressions régulières */ %} @@ -1185,17 +1185,14 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*  <bytes>\"{str_not_escaped}+ { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{hbyte}([ ]*{hbyte})*[ ]*[0-9a-fA-F]/[^?] { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>[\?]{2}([ ]*[\?]{2})*[ ]*[\?]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]* { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\? { HANDLE_UNCOMPLETED_TOKEN; } -<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*\?/[^?] { HANDLE_UNCOMPLETED_TOKEN; } +<bytes_hex>{mbyte}([ ]*{mbyte})*[ ]*[0-9a-fA-F]/[^0-9a-fA-F] { HANDLE_UNCOMPLETED_TOKEN; }  <bytes_regex>\\ { HANDLE_UNCOMPLETED_TOKEN; } diff --git a/tests/analysis/scan/grammar.py b/tests/analysis/scan/grammar.py index 3a8196a..14f67fa 100644 --- a/tests/analysis/scan/grammar.py +++ b/tests/analysis/scan/grammar.py @@ -276,6 +276,208 @@ rule test {          self.check_rule_success(rule, cnt) +    def testBackingUpHandlers(self): +        """Ensure handlers for backing up removals do not limit the grammar.""" + +        cnt = MemoryContent(b'AB12') + +        # Uncompleted token in rule definition: '?? ?? ' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?? ?? } + +   condition: +      #a == 3 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '?? ' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?? 4? } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '?? ?' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?? ?2 } + +   condition: +      #a == 2 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '?? ' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?? 42 } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + + +        # Uncompleted token in rule definition: '?1 ?' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?1 ?? } + +   condition: +      #a == 2 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '?1 4? ' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?1 4? } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '?1 ?2 ' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?1 ?2 } + +   condition: +      #a == 2 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '?1 4' + +        rule = ''' +rule test { + +   bytes: +      $a = { ?1 42 } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + + +        # Uncompleted token in rule definition: '41 ' + +        rule = ''' +rule test { + +   bytes: +      $a = { 41 ?? } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '41 4' + +        rule = ''' +rule test { + +   bytes: +      $a = { 41 4? } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '41 ' + +        rule = ''' +rule test { + +   bytes: +      $a = { 41 ?2 } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + +        # Uncompleted token in rule definition: '41 42 ' + +        rule = ''' +rule test { + +   bytes: +      $a = { 41 42 } + +   condition: +      #a == 1 + +} +''' + +        self.check_rule_success(rule, content=cnt) + + + +  # TODO : test     <haystack> matches <regex> | 
