From aae46fab1f41df0cce9da9fc3c17eea776e487b1 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard Date: Wed, 25 Oct 2023 00:13:29 +0200 Subject: Handle big alloctions for strings in conditions with regular expressions. --- src/analysis/scan/grammar.y | 43 +++++++++++++++--------------- src/analysis/scan/tokens.l | 59 ++++++++++++------------------------------ tests/analysis/scan/fuzzing.py | 15 +++++++++++ 3 files changed, 53 insertions(+), 64 deletions(-) diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y index 02e5973..f31a5d1 100644 --- a/src/analysis/scan/grammar.y +++ b/src/analysis/scan/grammar.y @@ -6,10 +6,10 @@ /* Affiche un message d'erreur suite à l'analyse en échec. */ -static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *); +static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, void/*GBytesPattern*/ **, char *); #define raise_error(msg) \ - yyerror(scanner, yyscanner, built_rule, tmp_0, tmp_1, NULL, buf, allocated, used, msg) + yyerror(scanner, yyscanner, built_rule, tmp_0, tmp_1, NULL, msg) %} @@ -103,13 +103,13 @@ typedef void *yyscan_t; %define api.pure full -%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } -%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used } +%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void /*GBytesPattern*/ **built_pattern } +%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void/*GBytesPattern*/ **built_pattern } %code provides { #define YY_DECL \ - int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used) + int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern) YY_DECL; @@ -172,7 +172,6 @@ YY_DECL; %token FALSE_ "false" %token SIGNED_INTEGER %token UNSIGNED_INTEGER -%token STRING %token KB MB GB @@ -244,7 +243,6 @@ YY_DECL; %type SIGNED_INTEGER %type UNSIGNED_INTEGER -%type STRING %type rule_flags %type rule_flag @@ -1033,11 +1031,23 @@ YY_DECL; __converted = $1 * 1073741824; $$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted); } - | STRING + | PLAIN_TEXT { $$ = g_scan_literal_expression_new(LVT_STRING, &$1); } - | STRING "[" cexpression "]" + | PLAIN_TEXT "[" cexpression "]" + { + GScanExpression *__src; + __src = g_scan_literal_expression_new(LVT_STRING, &$1); + $$ = g_scan_set_item_new(__src, $3); + g_object_unref(G_OBJECT(__src)); + g_object_unref(G_OBJECT($3)); + } + | ESCAPED_TEXT + { + $$ = g_scan_literal_expression_new(LVT_STRING, &$1); + } + | ESCAPED_TEXT "[" cexpression "]" { GScanExpression *__src; __src = g_scan_literal_expression_new(LVT_STRING, &$1); @@ -1735,7 +1745,7 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio * * ******************************************************************************/ -static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg) +static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char *msg) { printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg); @@ -1765,9 +1775,6 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ sized_string_t tmp_0; /* Zone tampon #1 */ sized_string_t tmp_1; /* Zone tampon #2 */ void /*GBytesPattern*/ *built_pattern; /* Motif en construction */ - char *buf; /* Zone de travail temporaire */ - size_t allocated; /* Taille de mémoire allouée */ - size_t used; /* Quantité utilisée */ yyscan_t lexstate; /* Gestion d'analyse lexicale */ YY_BUFFER_STATE state; /* Contexte d'analyse */ int status; /* Bilan d'une analyse */ @@ -1784,17 +1791,11 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ built_pattern = NULL; - allocated = 256; - used = 0; - - buf = malloc(allocated * sizeof(char)); - buf[0] = '\0'; - rost_lex_init(&lexstate); state = rost__scan_bytes(text, length, lexstate); - status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1, &built_pattern, &buf, &allocated, &used); + status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1, &built_pattern); result = (status == EXIT_SUCCESS); @@ -1805,8 +1806,6 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_ exit_szstr(&tmp_0); exit_szstr(&tmp_1); - free(buf); - return result; } diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l index 1174ae7..11f5d9e 100644 --- a/src/analysis/scan/tokens.l +++ b/src/analysis/scan/tokens.l @@ -8,7 +8,6 @@ %{ -//#include "manual.h" #include #include @@ -16,8 +15,6 @@ - - /****************************************************************************** * * * Paramètres : src = liste d'octets à traiter. * @@ -296,20 +293,10 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out } - #define PUSH_STATE(s) yy_push_state(s, yyscanner) #define POP_STATE yy_pop_state(yyscanner) - -#define EXTEND_BUFFER_IF_NEEDED(extra) \ - if ((*used + extra) > *allocated) \ - { \ - *allocated *= 2; \ - *buf = realloc(*buf, *allocated); \ - } - - %} @@ -342,7 +329,6 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out %x bytes_regex_range %x condition -%x strlit %x wait_for_colon @@ -537,38 +523,27 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]* [mM][bB] { return MB; } [gG][bB] { return GB; } -"\"" { - *used = 0; - PUSH_STATE(strlit); - } +\"{str_not_escaped}+\" { + yylval->sized_cstring.data = yytext + 1; + yylval->sized_cstring.len = yyleng - 2; -"\"" { - POP_STATE; - yylval->sized_cstring.data = *buf; - yylval->sized_cstring.len = *used; - return STRING; - } + return PLAIN_TEXT; + } -"\\\"" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '"'; } -"\\t" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\t'; } -"\\r" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\r'; } -"\\n" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\n'; } -"\\\\" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\\'; } +\"{str_mixed}+\" { + POP_STATE; -\\x[0-9a-fA-F]{2} { - char __ch; - __ch = strtol(yytext + 2, NULL, 16); - EXTEND_BUFFER_IF_NEEDED(1); - (*buf)[(*used)++] = __ch; - } + rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0); -[^\\\"]+ { - size_t __len; - __len = strlen(yytext); - EXTEND_BUFFER_IF_NEEDED(__len); - strcpy(&(*buf)[*used], yytext); - *used += __len; - } +#ifndef NDEBUG + /* Pour rendre plus lisibles les impressions de débogage */ + tmp_0->data[tmp_0->len] = '\0'; +#endif + + yylval->tmp_cstring = tmp_0; + + return ESCAPED_TEXT; + } %{ /* Définitions communes pour la section "bytes:" */ %} diff --git a/tests/analysis/scan/fuzzing.py b/tests/analysis/scan/fuzzing.py index 044fe54..e26c496 100644 --- a/tests/analysis/scan/fuzzing.py +++ b/tests/analysis/scan/fuzzing.py @@ -177,3 +177,18 @@ rule test { ''' self.check_rule_failure(rule) + + + def testAllocations(self): + """Handle big alloctions for strings in conditions with regular expressions.""" + + rule = ''' +rule test { + + condition: + "%s" == "%s" + +} +''' % ("0" * (256 * 2 + 8), "0" * (256 * 2 + 8)) + + self.check_rule_success(rule) -- cgit v0.11.2-87-g4458