summaryrefslogtreecommitdiff
path: root/src/analysis
diff options
context:
space:
mode:
authorCyrille Bagard <nocbos@gmail.com>2023-10-24 22:13:29 (GMT)
committerCyrille Bagard <nocbos@gmail.com>2023-10-24 22:13:29 (GMT)
commitaae46fab1f41df0cce9da9fc3c17eea776e487b1 (patch)
treee6137127bc4e1263d9af9684b962b5fedd5bca58 /src/analysis
parentf82e9975ea778a2ffa7692e864b9ed49dd651bad (diff)
Handle big alloctions for strings in conditions with regular expressions.
Diffstat (limited to 'src/analysis')
-rw-r--r--src/analysis/scan/grammar.y43
-rw-r--r--src/analysis/scan/tokens.l59
2 files changed, 38 insertions, 64 deletions
diff --git a/src/analysis/scan/grammar.y b/src/analysis/scan/grammar.y
index 02e5973..f31a5d1 100644
--- a/src/analysis/scan/grammar.y
+++ b/src/analysis/scan/grammar.y
@@ -6,10 +6,10 @@
/* Affiche un message d'erreur suite à l'analyse en échec. */
-static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, void/*GBytesPattern*/ **, char **, size_t *, size_t *, char *);
+static int yyerror(GContentScanner *, yyscan_t, GScanRule **, sized_string_t *, sized_string_t *, void/*GBytesPattern*/ **, char *);
#define raise_error(msg) \
- yyerror(scanner, yyscanner, built_rule, tmp_0, tmp_1, NULL, buf, allocated, used, msg)
+ yyerror(scanner, yyscanner, built_rule, tmp_0, tmp_1, NULL, msg)
%}
@@ -103,13 +103,13 @@ typedef void *yyscan_t;
%define api.pure full
-%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void /*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
-%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void/*GBytesPattern*/ **built_pattern } { char **buf } { size_t *allocated } { size_t *used }
+%parse-param { GContentScanner *scanner } { yyscan_t yyscanner } { GScanRule **built_rule } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void /*GBytesPattern*/ **built_pattern }
+%lex-param { yyscan_t yyscanner } { sized_string_t *tmp_0} { sized_string_t *tmp_1} { void/*GBytesPattern*/ **built_pattern }
%code provides {
#define YY_DECL \
- int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used)
+ int rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern)
YY_DECL;
@@ -172,7 +172,6 @@ YY_DECL;
%token FALSE_ "false"
%token SIGNED_INTEGER
%token UNSIGNED_INTEGER
-%token STRING
%token KB MB GB
@@ -244,7 +243,6 @@ YY_DECL;
%type <signed_integer> SIGNED_INTEGER
%type <unsigned_integer> UNSIGNED_INTEGER
-%type <sized_cstring> STRING
%type <rule_flags> rule_flags
%type <rule_flags> rule_flag
@@ -1033,11 +1031,23 @@ YY_DECL;
__converted = $1 * 1073741824;
$$ = g_scan_literal_expression_new(LVT_UNSIGNED_INTEGER, &__converted);
}
- | STRING
+ | PLAIN_TEXT
{
$$ = g_scan_literal_expression_new(LVT_STRING, &$1);
}
- | STRING "[" cexpression "]"
+ | PLAIN_TEXT "[" cexpression "]"
+ {
+ GScanExpression *__src;
+ __src = g_scan_literal_expression_new(LVT_STRING, &$1);
+ $$ = g_scan_set_item_new(__src, $3);
+ g_object_unref(G_OBJECT(__src));
+ g_object_unref(G_OBJECT($3));
+ }
+ | ESCAPED_TEXT
+ {
+ $$ = g_scan_literal_expression_new(LVT_STRING, &$1);
+ }
+ | ESCAPED_TEXT "[" cexpression "]"
{
GScanExpression *__src;
__src = g_scan_literal_expression_new(LVT_STRING, &$1);
@@ -1735,7 +1745,7 @@ relational_expr : cexpression "<" cexpression { $$ = g_scan_relational_operatio
* *
******************************************************************************/
-static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char **buf, size_t *allocated, size_t *used, char *msg)
+static int yyerror(GContentScanner *scanner, yyscan_t yyscanner, GScanRule **built_rule, sized_string_t *tmp_0, sized_string_t *tmp_1, void/*GBytesPattern*/ **built_pattern, char *msg)
{
printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg);
@@ -1765,9 +1775,6 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_
sized_string_t tmp_0; /* Zone tampon #1 */
sized_string_t tmp_1; /* Zone tampon #2 */
void /*GBytesPattern*/ *built_pattern; /* Motif en construction */
- char *buf; /* Zone de travail temporaire */
- size_t allocated; /* Taille de mémoire allouée */
- size_t used; /* Quantité utilisée */
yyscan_t lexstate; /* Gestion d'analyse lexicale */
YY_BUFFER_STATE state; /* Contexte d'analyse */
int status; /* Bilan d'une analyse */
@@ -1784,17 +1791,11 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_
built_pattern = NULL;
- allocated = 256;
- used = 0;
-
- buf = malloc(allocated * sizeof(char));
- buf[0] = '\0';
-
rost_lex_init(&lexstate);
state = rost__scan_bytes(text, length, lexstate);
- status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1, &built_pattern, &buf, &allocated, &used);
+ status = yyparse(scanner, lexstate, &built_rule, &tmp_0, &tmp_1, &built_pattern);
result = (status == EXIT_SUCCESS);
@@ -1805,8 +1806,6 @@ bool process_rules_definitions(GContentScanner *scanner, const char *text, size_
exit_szstr(&tmp_0);
exit_szstr(&tmp_1);
- free(buf);
-
return result;
}
diff --git a/src/analysis/scan/tokens.l b/src/analysis/scan/tokens.l
index 1174ae7..11f5d9e 100644
--- a/src/analysis/scan/tokens.l
+++ b/src/analysis/scan/tokens.l
@@ -8,7 +8,6 @@
%{
-//#include "manual.h"
#include <assert.h>
#include <stdbool.h>
@@ -16,8 +15,6 @@
-
-
/******************************************************************************
* *
* Paramètres : src = liste d'octets à traiter. *
@@ -296,20 +293,10 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
}
-
#define PUSH_STATE(s) yy_push_state(s, yyscanner)
#define POP_STATE yy_pop_state(yyscanner)
-
-#define EXTEND_BUFFER_IF_NEEDED(extra) \
- if ((*used + extra) > *allocated) \
- { \
- *allocated *= 2; \
- *buf = realloc(*buf, *allocated); \
- }
-
-
%}
@@ -342,7 +329,6 @@ static void rost_unescape_bytes(const char *src, size_t len, sized_string_t *out
%x bytes_regex_range
%x condition
-%x strlit
%x wait_for_colon
@@ -537,38 +523,27 @@ bytes_fuzzy_id [\*A-Za-z_][\*A-Za-z0-9_]*
<condition>[mM][bB] { return MB; }
<condition>[gG][bB] { return GB; }
-<condition>"\"" {
- *used = 0;
- PUSH_STATE(strlit);
- }
+<condition>\"{str_not_escaped}+\" {
+ yylval->sized_cstring.data = yytext + 1;
+ yylval->sized_cstring.len = yyleng - 2;
-<strlit>"\"" {
- POP_STATE;
- yylval->sized_cstring.data = *buf;
- yylval->sized_cstring.len = *used;
- return STRING;
- }
+ return PLAIN_TEXT;
+ }
-<strlit>"\\\"" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '"'; }
-<strlit>"\\t" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\t'; }
-<strlit>"\\r" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\r'; }
-<strlit>"\\n" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\n'; }
-<strlit>"\\\\" { EXTEND_BUFFER_IF_NEEDED(1); (*buf)[(*used)++] = '\\'; }
+<condition>\"{str_mixed}+\" {
+ POP_STATE;
-<strlit>\\x[0-9a-fA-F]{2} {
- char __ch;
- __ch = strtol(yytext + 2, NULL, 16);
- EXTEND_BUFFER_IF_NEEDED(1);
- (*buf)[(*used)++] = __ch;
- }
+ rost_unescape_string_bytes(yytext + 1, yyleng - 2, tmp_0);
-<strlit>[^\\\"]+ {
- size_t __len;
- __len = strlen(yytext);
- EXTEND_BUFFER_IF_NEEDED(__len);
- strcpy(&(*buf)[*used], yytext);
- *used += __len;
- }
+#ifndef NDEBUG
+ /* Pour rendre plus lisibles les impressions de débogage */
+ tmp_0->data[tmp_0->len] = '\0';
+#endif
+
+ yylval->tmp_cstring = tmp_0;
+
+ return ESCAPED_TEXT;
+ }
%{ /* Définitions communes pour la section "bytes:" */ %}