summaryrefslogtreecommitdiff
path: root/tools/yara2rost
diff options
context:
space:
mode:
authorCyrille Bagard <nocbos@gmail.com>2023-09-18 23:26:47 (GMT)
committerCyrille Bagard <nocbos@gmail.com>2023-09-18 23:26:47 (GMT)
commit5c31b8362235b7423dced633866a2d4507c63251 (patch)
treeddc54cc5f6125d5fb43d383068f8929430a16b1d /tools/yara2rost
parentbec97699f21d4dadcdc1a9f26985c699a374655d (diff)
Introduce a new tool to convert rules: yara2rost.
Diffstat (limited to 'tools/yara2rost')
-rw-r--r--tools/yara2rost/Makefile.am36
-rw-r--r--tools/yara2rost/decl.h37
-rw-r--r--tools/yara2rost/demo.yar27
-rw-r--r--tools/yara2rost/enums.h47
-rw-r--r--tools/yara2rost/grammar.y1332
-rw-r--r--tools/yara2rost/tokens.l292
-rw-r--r--tools/yara2rost/yara2rost.c295
7 files changed, 2066 insertions, 0 deletions
diff --git a/tools/yara2rost/Makefile.am b/tools/yara2rost/Makefile.am
new file mode 100644
index 0000000..2830b03
--- /dev/null
+++ b/tools/yara2rost/Makefile.am
@@ -0,0 +1,36 @@
+
+BUILT_SOURCES = grammar.h
+
+
+# On évite d'utiliser les variables personnalisées de type *_la_[YL]FLAGS
+# afin de conserver des noms de fichiers simples, ie sans le nom de la
+# bibliothèque de sortie en préfixe.
+
+AM_YFLAGS = -v -d -p yara2rost_ -Wno-yacc #-Wcounterexamples
+
+AM_LFLAGS = -P yara2rost_ -o lex.yy.c --header-file=tokens.h \
+ -Dyyget_lineno=yara2rost_get_lineno \
+ -Dyy_scan_bytes=yara2rost__scan_bytes \
+ -Dyy_delete_buffer=yara2rost__delete_buffer
+
+AM_CFLAGS = $(DEBUG_CFLAGS) $(WARNING_FLAGS)
+
+
+bin_PROGRAMS = yara2rost
+
+.NOTPARALLEL: $(bin_PROGRAMS)
+
+yara2rost_SOURCES = \
+ decl.h \
+ enums.h \
+ tokens.l \
+ grammar.y \
+ yara2rost.c
+
+
+# Automake fait les choses à moitié
+CLEANFILES = grammar.h grammar.c grammar.output tokens.c tokens.h
+
+# Pareil : de tous les fichiers générés, seule la sortie de Flex saute pour les distributions !
+# On rajoute également de quoi générer les Makefiles.
+EXTRA_DIST = tokens.h
diff --git a/tools/yara2rost/decl.h b/tools/yara2rost/decl.h
new file mode 100644
index 0000000..05d63d4
--- /dev/null
+++ b/tools/yara2rost/decl.h
@@ -0,0 +1,37 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * decl.h - déclarations de prototypes utiles
+ *
+ * Copyright (C) 2023 Cyrille Bagard
+ *
+ * This file is part of Chrysalide.
+ *
+ * Chrysalide is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Chrysalide is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef _TOOLS_YARA2ROST_DECL_H
+#define _TOOLS_YARA2ROST_DECL_H
+
+
+#include <stdbool.h>
+
+
+
+/* Parcourt des définitions de règles pour traduction. */
+bool process_rules_definitions(const char *, size_t);
+
+
+
+#endif /* _TOOLS_YARA2ROST_DECL_H */
diff --git a/tools/yara2rost/demo.yar b/tools/yara2rost/demo.yar
new file mode 100644
index 0000000..081973f
--- /dev/null
+++ b/tools/yara2rost/demo.yar
@@ -0,0 +1,27 @@
+
+include "demobis.yar"
+
+import "modname"
+
+
+private global rule Test : tag1 tag2 {
+
+ meta:
+ desc_0 = "abc"
+ desc_1 = 123
+ desc_2 = true
+ desc_3 = false
+ desc_z = ""
+
+ strings:
+ $text = "value"
+ $text_b = "value" wide ascii fullword private xor(0x12)
+ $re = /hash: [0-9a-fA-F]{32}/
+ $re_b = /hash: [0-9a-fA-F]{32}/ wide ascii nocase fullword private
+ $hex = { AA bb [2-4] 61 62 63 }
+ $hex_b = { AA bb [2-4] 61 62 63 } private
+
+ condition:
+ filesize == 123 and entrypoint == 456 and for all of ($text*) : ( @ > @hex_b ) and any of them
+
+}
diff --git a/tools/yara2rost/enums.h b/tools/yara2rost/enums.h
new file mode 100644
index 0000000..19fe49c
--- /dev/null
+++ b/tools/yara2rost/enums.h
@@ -0,0 +1,47 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * enums.h - Reprise des fanions de la syntaxe YARA
+ *
+ * Copyright (C) 2023 Cyrille Bagard
+ *
+ * This file is part of Chrysalide.
+ *
+ * Chrysalide is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Chrysalide is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef _TOOLS_YARA2ROST_ENUMS_H
+#define _TOOLS_YARA2ROST_ENUMS_H
+
+
+typedef enum _RuleFlags
+{
+ RULE_FLAGS_NONE = (0 << 0),
+ RULE_FLAGS_PRIVATE = (1 << 0),
+ RULE_FLAGS_GLOBAL = (1 << 1)
+
+} RuleFlags;
+
+typedef enum _StringExtraFlags
+{
+ STRING_FLAGS_NONE = (0 << 0),
+ STRING_FLAGS_NO_CASE = (1 << 0),
+ STRING_FLAGS_FULL_WORD = (1 << 1),
+ STRING_FLAGS_PRIVATE = (1 << 2)
+
+} StringExtraFlags;
+
+
+
+#endif /* _TOOLS_YARA2ROST_ENUMS_H */
diff --git a/tools/yara2rost/grammar.y b/tools/yara2rost/grammar.y
new file mode 100644
index 0000000..0d756b1
--- /dev/null
+++ b/tools/yara2rost/grammar.y
@@ -0,0 +1,1332 @@
+
+%{
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+
+#include "decl.h"
+#include "tokens.h"
+
+
+
+/* Affiche un message d'erreur suite à l'analyse en échec. */
+static int yyerror(yyscan_t, const char *);
+
+/* Initialise une amorce de copie. */
+static void init_dump(sz_str_t *, const sz_cst_str_t *);
+
+#define init_dump_with_fixed(d, s) \
+ init_dump(d, (sz_cst_str_t []) { { .data = s, .len = sizeof(s) - 1 } })
+
+/* Complète une chaîne de caractères avec une autre. */
+static void add_to_dump(sz_str_t *, const sz_cst_str_t *);
+
+#define add_fixed_to_dump(d, s) \
+ add_to_dump(d, (sz_cst_str_t []) { { .data = s, .len = sizeof(s) - 1 } })
+
+#define add_dyn_to_dump(d, s) \
+ do \
+ { \
+ add_to_dump(d, (sz_cst_str_t *)s); \
+ free((s)->data); \
+ } \
+ while (0)
+
+/* Imprime une bribe de définition formant une règle ROST. */
+void dump_string(const char *, size_t);
+
+#define dump_fixed_string(s) \
+ dump_string(s, sizeof(s) - 1)
+
+
+%}
+
+
+%code requires {
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "enums.h"
+
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+
+
+typedef struct _sz_str_t
+{
+ char *data;
+ size_t len;
+
+} sz_str_t;
+
+typedef struct _sz_cst_str_t
+{
+ char *data;
+ size_t len;
+
+} sz_cst_str_t;
+
+}
+
+%union {
+
+ sz_str_t string; /* Chaîne de caractères #1 */
+ sz_cst_str_t cstring; /* Chaîne de caractères #2 */
+
+ RuleFlags rule_flags; /* Fanions pour règle */
+ StringExtraFlags string_flags; /* Fanions pour motif */
+
+}
+
+
+%expect 1
+
+%define api.pure full
+%define parse.error verbose
+
+%parse-param { yyscan_t yyscanner }
+%lex-param { yyscan_t yyscanner }
+
+
+%code provides {
+
+#define YY_DECL \
+ int yara2rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner)
+
+YY_DECL;
+
+}
+
+%token COLON ":"
+%token CURLY_BRACKET_O "{"
+%token CURLY_BRACKET_C "}"
+%token EQUAL "="
+%token PAREN_O "("
+%token PAREN_C ")"
+%token DOT_DOT ".."
+%token COMMA ","
+%token BRACKET_O "["
+%token BRACKET_C "]"
+%token PERCENT "%"
+%token DOT "."
+
+%token ADD_OP "+"
+%token SUB_OP "-"
+%token MUL_OP "*"
+%token DIV_OP "\\"
+%token EOR_OP "^"
+%token AND_OP "&"
+%token OR_OP "|"
+%token INV_OP "~"
+%token SHIFT_LEFT_OP "<<"
+%token SHIFT_RIGHT_OP ">>"
+
+%token LT "<"
+%token GT ">"
+%token LE "<="
+%token GE ">="
+%token EQ "=="
+%token NEQ "!="
+
+%token ALL "all"
+%token AND "and"
+%token ANY "any"
+%token ASCII "ascii"
+%token AT "at"
+%token BASE64 "base64"
+%token BASE64WIDE "base64wide"
+%token CONDITION "condition"
+%token CONTAINS "contains"
+%token DEFINED "defined"
+%token ENDSWITH "endswith"
+%token ENTRYPOINT "entrypoint"
+%token FILESIZE "filesize"
+%token FOR "for"
+%token FULLWORD "fullword"
+%token GLOBAL "global"
+%token ICONTAINS "icontains"
+%token IENDSWITH "iendswith"
+%token IEQUALS "iequals"
+%token IMPORT "import"
+%token IN "in"
+%token INCLUDE "include"
+%token ISTARTSWITH "istartswith"
+%token MATCHES "matches"
+%token META "meta"
+%token NOCASE "nocase"
+%token NONE "none"
+%token NOT "not"
+%token OF "of"
+%token OR "or"
+%token PRIVATE "private"
+%token RULE "rule"
+%token STARTSWITH "startswith"
+%token STRINGS "strings"
+%token THEM "them"
+%token WIDE "wide"
+%token XOR "xor"
+
+%token _FALSE "false"
+%token _TRUE "true"
+
+%token STRING_IDENTIFIER_WITH_WILDCARD
+%token STRING_IDENTIFIER
+%token STRING_COUNT
+%token STRING_OFFSET
+%token STRING_LENGTH
+%token INTEGER_FUNCTION
+%token IDENTIFIER
+%token NUMBER
+%token DOUBLE
+%token TEXT_STRING
+%token REGEXP
+%token HEX_STRING
+
+%type <cstring> STRING_IDENTIFIER_WITH_WILDCARD
+%type <cstring> STRING_IDENTIFIER
+%type <cstring> STRING_COUNT
+%type <cstring> STRING_OFFSET
+%type <cstring> STRING_LENGTH
+%type <cstring> INTEGER_FUNCTION
+%type <cstring> IDENTIFIER
+%type <cstring> NUMBER
+%type <cstring> DOUBLE
+%type <cstring> TEXT_STRING
+%type <cstring> REGEXP
+%type <cstring> HEX_STRING
+
+%type <rule_flags> rule_modifiers
+%type <rule_flags> rule_modifier
+
+%type <string_flags> string_modifiers
+%type <string_flags> string_modifier
+%type <string_flags> regexp_modifiers
+%type <string_flags> regexp_modifier
+%type <string_flags> hex_modifiers
+%type <string_flags> hex_modifier
+
+%type <string> boolean_expression
+%type <string> identifier
+%type <string> arguments
+%type <string> arguments_list
+%type <string> expression
+%type <string> for_iteration
+%type <string> for_variables
+%type <string> iterator
+%type <string> set
+%type <string> range
+%type <string> enumeration
+%type <string> string_iterator
+%type <string> string_set
+%type <string> string_enumeration
+%type <string> string_enumeration_item
+%type <string> rule_set
+%type <string> rule_enumeration
+%type <string> rule_enumeration_item
+%type <string> for_expression
+%type <string> for_quantifier
+%type <string> primary_expression
+%type <string> regexp
+
+%left OR
+%left AND
+%right NOT DEFINED
+%left EQ NEQ CONTAINS ICONTAINS STARTSWITH ENDSWITH ISTARTSWITH IENDSWITH IEQUALS MATCHES
+%left LT LE GT GE
+%left OR_OP
+%left EOR_OP
+%left AND_OP
+%left SHIFT_LEFT_OP SHIFT_RIGHT_OP
+%left ADD_OP SUB_OP
+%left MUL_OP DIV_OP PERCENT
+%right INV_OP UNARY_MINUS
+
+
+%%
+
+ rules : /* empty */
+ | rules include
+ | rules import
+ | rules rule
+ ;
+
+
+ include : "include" TEXT_STRING
+ {
+ dump_fixed_string("include ");
+ dump_string($2.data, $2.len);
+ dump_fixed_string("\n");
+ }
+ ;
+
+ import : "import" TEXT_STRING
+ {
+ dump_fixed_string("/* import ");
+ dump_string($2.data, $2.len);
+ dump_fixed_string(" */\n");
+ }
+ ;
+
+
+ rule : rule_modifiers "rule" IDENTIFIER
+ {
+ if ($1 != RULE_FLAGS_NONE)
+ {
+ if ($1 & RULE_FLAGS_PRIVATE)
+ {
+ dump_fixed_string("private");
+ dump_fixed_string(" ");
+ }
+
+ if ($1 & RULE_FLAGS_GLOBAL)
+ {
+ dump_fixed_string("global");
+ dump_fixed_string(" ");
+ }
+
+ }
+
+ dump_fixed_string("rule ");
+ dump_string($3.data, $3.len);
+
+ }
+ tags "{"
+ {
+ dump_fixed_string(" {\n");
+ }
+ meta strings condition "}"
+ {
+ dump_fixed_string("}\n");
+ }
+ ;
+
+
+ rule_modifiers : /* empty */
+ {
+ $$ = RULE_FLAGS_NONE;
+ }
+ | rule_modifiers rule_modifier
+ {
+ $$ = $1 | $2;
+ }
+ ;
+
+ rule_modifier : "private"
+ {
+ $$ = RULE_FLAGS_PRIVATE;
+ }
+ | "global"
+ {
+ $$ = RULE_FLAGS_GLOBAL;
+ }
+ ;
+
+
+ tags : /* empty */
+ | ":"
+ {
+ dump_fixed_string(" :");
+ }
+ tag_list
+ ;
+
+ tag_list : IDENTIFIER
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ }
+ | tag_list IDENTIFIER
+ {
+ dump_fixed_string(" ");
+ dump_string($2.data, $2.len);
+ }
+ ;
+
+
+/**
+ * Section "meta:"
+ */
+
+ meta : /* empty */
+ | "meta" ":"
+ {
+ dump_fixed_string("\n ");
+ dump_fixed_string("meta:\n");
+ }
+ meta_declarations
+ ;
+
+ meta_declarations : meta_declaration
+ {
+ dump_fixed_string("\n");
+ }
+ | meta_declarations meta_declaration
+ {
+ dump_fixed_string("\n");
+ }
+ ;
+
+ meta_declaration : IDENTIFIER "=" TEXT_STRING
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = ");
+ dump_string($3.data, $3.len);
+ }
+ | IDENTIFIER "=" NUMBER
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = ");
+ dump_string($3.data, $3.len);
+ }
+ | IDENTIFIER "=" "-" NUMBER
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = -");
+ dump_string($4.data, $4.len);
+ }
+ | IDENTIFIER "=" "true"
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = true");
+ }
+ | IDENTIFIER "=" "false"
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = false");
+ }
+ ;
+
+
+/**
+ * Section "strings:"
+ */
+
+ strings : /* empty */
+ | "strings" ":"
+ {
+ dump_fixed_string("\n ");
+ dump_fixed_string("bytes:\n");
+ }
+ string_declarations
+ ;
+
+ string_declarations : string_declaration
+ {
+ dump_fixed_string("\n");
+ }
+ | string_declarations string_declaration
+ {
+ dump_fixed_string("\n");
+ }
+ ;
+
+ string_declaration : STRING_IDENTIFIER "="
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = ");
+ }
+ TEXT_STRING
+ {
+ dump_string($4.data, $4.len);
+ }
+ string_modifiers
+ {
+ if ($6 & STRING_FLAGS_NO_CASE)
+ dump_fixed_string(" nocase");
+
+ if ($6 & STRING_FLAGS_FULL_WORD)
+ dump_fixed_string(" fullword");
+
+ if ($6 & STRING_FLAGS_PRIVATE)
+ dump_fixed_string(" private");
+
+ }
+ | STRING_IDENTIFIER "="
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = ");
+ }
+ REGEXP
+ {
+ dump_fixed_string("/");
+ dump_string($4.data, $4.len);
+ }
+ regexp_modifiers
+ {
+ if ($6 & STRING_FLAGS_NO_CASE)
+ dump_fixed_string(" nocase");
+
+ if ($6 & STRING_FLAGS_FULL_WORD)
+ dump_fixed_string(" fullword");
+
+ if ($6 & STRING_FLAGS_PRIVATE)
+ dump_fixed_string(" private");
+
+ }
+ | STRING_IDENTIFIER "="
+ {
+ dump_fixed_string(" ");
+ dump_string($1.data, $1.len);
+ dump_fixed_string(" = ");
+ }
+ HEX_STRING
+ {
+ dump_string($4.data, $4.len);
+ }
+ hex_modifiers
+ {
+ if ($6 & STRING_FLAGS_NO_CASE)
+ dump_fixed_string(" nocase");
+
+ if ($6 & STRING_FLAGS_FULL_WORD)
+ dump_fixed_string(" fullword");
+
+ if ($6 & STRING_FLAGS_PRIVATE)
+ dump_fixed_string(" private");
+
+ }
+ ;
+
+
+ string_modifiers : /* empty */
+ {
+ $$ = STRING_FLAGS_NONE;
+ }
+ | string_modifiers string_modifier
+ {
+ $$ = $1 | $2;
+ }
+ ;
+
+ string_modifier : "wide"
+ {
+ dump_fixed_string(" wide");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "ascii"
+ {
+ dump_fixed_string(" plain");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "nocase"
+ {
+ $$ = STRING_FLAGS_NO_CASE;
+ }
+ | "fullword"
+ {
+ $$ = STRING_FLAGS_FULL_WORD;
+ }
+ | "private"
+ {
+ $$ = STRING_FLAGS_PRIVATE;
+ }
+ | "xor"
+ {
+ dump_fixed_string(" xor");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "xor" "(" NUMBER ")"
+ {
+ dump_fixed_string(" xor(");
+ dump_string($3.data, $3.len);
+ dump_fixed_string(")");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "xor" "(" NUMBER "-" NUMBER ")"
+ {
+ dump_fixed_string(" xor(");
+ dump_string($3.data, $3.len);
+ dump_fixed_string("-");
+ dump_string($5.data, $5.len);
+ dump_fixed_string(")");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "base64"
+ {
+ dump_fixed_string(" base64");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "base64" "(" TEXT_STRING ")"
+ {
+ dump_fixed_string(" base64(");
+ dump_string($3.data, $3.len);
+ dump_fixed_string(")");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "base64wide"
+ {
+ dump_fixed_string(" (base64 | wide)");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "base64wide" "(" TEXT_STRING ")"
+ {
+ dump_fixed_string(" (base64(");
+ dump_string($3.data, $3.len);
+ dump_fixed_string(") | wide)");
+ $$ = STRING_FLAGS_NONE;
+ }
+ ;
+
+ regexp_modifiers : /* empty */
+ {
+ $$ = STRING_FLAGS_NONE;
+ }
+ | regexp_modifiers regexp_modifier
+ {
+ $$ = $1 | $2;
+ }
+ ;
+
+ regexp_modifier : "wide"
+ {
+ dump_fixed_string(" wide");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "ascii"
+ {
+ dump_fixed_string(" plain");
+ $$ = STRING_FLAGS_NONE;
+ }
+ | "nocase"
+ {
+ $$ = STRING_FLAGS_NO_CASE;
+ }
+ | "fullword"
+ {
+ $$ = STRING_FLAGS_FULL_WORD;
+ }
+ | "private"
+ {
+ $$ = STRING_FLAGS_PRIVATE;
+ }
+ ;
+
+ hex_modifiers : /* empty */
+ {
+ $$ = STRING_FLAGS_NONE;
+ }
+ | hex_modifiers hex_modifier
+ {
+ $$ = $1 | $2;
+ }
+ ;
+
+ hex_modifier : "private"
+ {
+ $$ = STRING_FLAGS_PRIVATE;
+ }
+ ;
+
+
+/**
+ * Section "condition:"
+ */
+
+ condition : "condition" ":" boolean_expression
+ {
+ dump_fixed_string("\n ");
+ dump_fixed_string("condition:\n");
+ dump_fixed_string(" ");
+ dump_string($3.data, $3.len);
+ free($3.data);
+ dump_fixed_string("\n\n");
+ }
+ ;
+
+ boolean_expression : expression { $$ = $1; }
+ ;
+
+ identifier : IDENTIFIER
+ {
+ init_dump(&$$, &$1);
+ }
+ | identifier "." IDENTIFIER
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, ".");
+ add_to_dump(&$$, &$3);
+ }
+ | identifier "[" primary_expression "]"
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, "[");
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, "]");
+ }
+ | identifier "(" arguments ")"
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, "(");
+ if ($3.len > 0)
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, ")");
+ }
+ ;
+
+
+ arguments : { $$.len = 0; /* empty */ }
+ | arguments_list { $$ = $1; }
+ ;
+
+
+ arguments_list : expression
+ {
+ $$ = $1;
+ }
+ | arguments_list "," expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, ", ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ ;
+
+
+ expression : "true"
+ {
+ init_dump_with_fixed(&$$, "true");
+ }
+ | "false"
+ {
+ init_dump_with_fixed(&$$, "false");
+ }
+ | primary_expression "matches" regexp
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " matches ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "contains" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " contains ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "icontains" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " icontains ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "startswith" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " startswith ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "istartswith" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " istartswith ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "endswith" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " endswith ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "iendswith" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " iendswith ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "iequals" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " iequals ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | STRING_IDENTIFIER
+ {
+ init_dump(&$$, &$1);
+ }
+ | STRING_IDENTIFIER "at" primary_expression
+ {
+ init_dump(&$$, &$1);
+ add_fixed_to_dump(&$$, " at ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | STRING_IDENTIFIER "in" range
+ {
+ init_dump(&$$, &$1);
+ add_fixed_to_dump(&$$, " in ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | "for" for_expression for_iteration ":" "(" boolean_expression ")"
+ {
+ init_dump_with_fixed(&$$, "for ");
+ add_dyn_to_dump(&$$, &$2);
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, " : (");
+ add_dyn_to_dump(&$$, &$6);
+ add_fixed_to_dump(&$$, ")");
+ }
+ | for_expression "of" string_set
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " of ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | for_expression "of" rule_set
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " of ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+
+ | primary_expression "%" "of" string_set
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, "% of ");
+ add_dyn_to_dump(&$$, &$4);
+ }
+ | primary_expression "%" "of" rule_set
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, "% of ");
+ add_dyn_to_dump(&$$, &$4);
+ }
+
+ | for_expression "of" string_set "in" range
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " of ");
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, " in ");
+ add_dyn_to_dump(&$$, &$5);
+ }
+ | for_expression "of" string_set "at" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " of ");
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, " at ");
+ add_dyn_to_dump(&$$, &$5);
+ }
+ | "not" boolean_expression
+ {
+ init_dump_with_fixed(&$$, "not ");
+ add_dyn_to_dump(&$$, &$2);
+ }
+ | "defined" boolean_expression
+ {
+ init_dump_with_fixed(&$$, "defined ");
+ add_dyn_to_dump(&$$, &$2);
+ }
+ | boolean_expression "and" boolean_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " and ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | boolean_expression "or" boolean_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " or ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "<" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " < ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression ">" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " > ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "<=" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " <= ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression ">=" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " >= ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "==" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " == ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "!=" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " != ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression
+ {
+ $$ = $1;
+ }
+ | "(" expression ")"
+ {
+ init_dump_with_fixed(&$$, "(");
+ add_dyn_to_dump(&$$, &$2);
+ add_fixed_to_dump(&$$, ")");
+ }
+ ;
+
+
+ for_iteration : for_variables "in" iterator
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " in ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | "of" string_iterator
+ {
+ init_dump_with_fixed(&$$, "of ");
+ add_dyn_to_dump(&$$, &$2);
+ }
+ ;
+
+ for_variables : IDENTIFIER
+ {
+ init_dump(&$$, &$1);
+ }
+ | for_variables "," IDENTIFIER
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, ", ");
+ add_to_dump(&$$, &$3);
+ }
+ ;
+
+
+ iterator : identifier { $$ = $1; }
+ | set { $$ = $1; }
+ ;
+
+
+ set : "(" enumeration ")"
+ {
+ init_dump_with_fixed(&$$, "(");
+ add_dyn_to_dump(&$$, &$2);
+ add_fixed_to_dump(&$$, ")");
+ }
+ | range { $$ = $1; }
+ ;
+
+
+ range : "(" primary_expression ".." primary_expression ")"
+ {
+ init_dump_with_fixed(&$$, "(");
+ add_dyn_to_dump(&$$, &$2);
+ add_fixed_to_dump(&$$, " .. ");
+ add_dyn_to_dump(&$$, &$4);
+ add_fixed_to_dump(&$$, ")");
+ }
+ ;
+
+
+ enumeration : primary_expression { $$ = $1; }
+ | enumeration "," primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, ", ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ ;
+
+
+ string_iterator : string_set { $$ = $1; }
+ ;
+
+ string_set : "(" string_enumeration ")"
+ {
+ init_dump_with_fixed(&$$, "(");
+ add_dyn_to_dump(&$$, &$2);
+ add_fixed_to_dump(&$$, ")");
+ }
+ | "them"
+ {
+ init_dump_with_fixed(&$$, "them");
+ }
+ ;
+
+ string_enumeration : string_enumeration_item
+ {
+ $$ = $1;
+ }
+ | string_enumeration "," string_enumeration_item
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, ", ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ ;
+
+string_enumeration_item : STRING_IDENTIFIER
+ {
+ init_dump(&$$, &$1);
+ }
+ | STRING_IDENTIFIER_WITH_WILDCARD
+ {
+ init_dump(&$$, &$1);
+ }
+ ;
+
+
+ rule_set : "(" rule_enumeration ")"
+ {
+ init_dump_with_fixed(&$$, "(");
+ add_dyn_to_dump(&$$, &$2);
+ add_fixed_to_dump(&$$, ")");
+ }
+ ;
+
+ rule_enumeration : rule_enumeration_item
+ {
+ $$ = $1;
+ }
+ | rule_enumeration "," rule_enumeration_item
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, ", ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ ;
+
+ rule_enumeration_item : IDENTIFIER
+ {
+ init_dump(&$$, &$1);
+ }
+ | IDENTIFIER "*"
+ {
+ init_dump(&$$, &$1);
+ add_fixed_to_dump(&$$, "*");
+ }
+ ;
+
+
+ for_expression : primary_expression { $$ = $1; }
+ | for_quantifier { $$ = $1; }
+ ;
+
+ for_quantifier : "all"
+ {
+ init_dump_with_fixed(&$$, "all");
+ }
+ | "any"
+ {
+ init_dump_with_fixed(&$$, "any");
+ }
+ | "none"
+ {
+ init_dump_with_fixed(&$$, "none");
+ }
+ ;
+
+
+ primary_expression : "(" primary_expression ")"
+ {
+ init_dump_with_fixed(&$$, "(");
+ add_dyn_to_dump(&$$, &$2);
+ add_fixed_to_dump(&$$, ")");
+ }
+ | "filesize"
+ {
+ init_dump_with_fixed(&$$, "datasize");
+ }
+ | "entrypoint"
+ {
+ init_dump_with_fixed(&$$, "/* entrypoint */ 0");
+ }
+ | INTEGER_FUNCTION "(" primary_expression ")"
+ {
+ init_dump(&$$, &$1);
+ add_fixed_to_dump(&$$, "(");
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, ")");
+ }
+ | NUMBER
+ {
+ init_dump(&$$, &$1);
+ }
+ | DOUBLE
+ {
+ init_dump(&$$, &$1);
+ }
+ | TEXT_STRING
+ {
+ init_dump(&$$, &$1);
+ }
+ | STRING_COUNT "in" range
+ {
+ init_dump(&$$, &$1);
+ add_fixed_to_dump(&$$, " in ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | STRING_COUNT
+ {
+ init_dump(&$$, &$1);
+ }
+ | STRING_OFFSET "[" primary_expression "]"
+ {
+ init_dump(&$$, &$1);
+ add_fixed_to_dump(&$$, "[");
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, "]");
+ }
+ | STRING_OFFSET
+ {
+ init_dump(&$$, &$1);
+ }
+ | STRING_LENGTH "[" primary_expression "]"
+ {
+ init_dump(&$$, &$1);
+ add_fixed_to_dump(&$$, "[");
+ add_dyn_to_dump(&$$, &$3);
+ add_fixed_to_dump(&$$, "]");
+ }
+ | STRING_LENGTH
+ {
+ init_dump(&$$, &$1);
+ }
+ | identifier
+ {
+ $$ = $1;
+ }
+ | "-" primary_expression %prec UNARY_MINUS
+ {
+ init_dump_with_fixed(&$$, "-");
+ add_dyn_to_dump(&$$, &$2);
+ }
+ | primary_expression "+" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " + ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "-" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " - ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "*" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " * ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "\\" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " \\ ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "%" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " % ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "^" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " ^ ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "&" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " & ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression "|" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " | ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | "~" primary_expression
+ {
+ init_dump_with_fixed(&$$, "~");
+ add_dyn_to_dump(&$$, &$2);
+ }
+ | primary_expression "<<" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " << ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | primary_expression ">>" primary_expression
+ {
+ $$ = $1;
+ add_fixed_to_dump(&$$, " >> ");
+ add_dyn_to_dump(&$$, &$3);
+ }
+ | regexp
+ ;
+
+
+ regexp : REGEXP
+ {
+ init_dump_with_fixed(&$$, "/");
+ add_to_dump(&$$, &$1);
+ }
+ ;
+
+
+%%
+
+
+/******************************************************************************
+* *
+* Paramètres : yyscanner = décodeur impliqué dans le processus. *
+* msg = message d'erreur. *
+* *
+* Description : Affiche un message d'erreur suite à l'analyse en échec. *
+* *
+* Retour : 0 *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static int yyerror(yyscan_t yyscanner, const char *msg)
+{
+ printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg);
+
+ return 0;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : dst = chaîne de caractères à créer. *
+* src = chaîne de caractères à ajouter. *
+* *
+* Description : Initialise une amorce de copie. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void init_dump(sz_str_t *dst, const sz_cst_str_t *src)
+{
+ dst->data = malloc((src->len + 1) * sizeof(char));
+ dst->len = src->len;
+
+ memcpy(dst->data, src->data, src->len);
+
+ dst->data[dst->len] = '\0';
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : dst = chaîne de caractères à créer. *
+* src = chaîne de caractères à ajouter. *
+* *
+* Description : Complète une chaîne de caractères avec une autre. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void add_to_dump(sz_str_t *dst, const sz_cst_str_t *src)
+{
+ dst->data = realloc(dst->data, (dst->len + src->len + 1) * sizeof(char));
+
+ memcpy(&dst->data[dst->len], src->data, src->len);
+
+ dst->len += src->len;
+
+ dst->data[dst->len] = '\0';
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : string = texte à copier sur la sortie standard. *
+* length = longueur de ce texte. *
+* *
+* Description : Imprime une bribe de définition formant une règle ROST. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+void dump_string(const char *string, size_t length)
+{
+ ssize_t ret; /* Bilan de l'appel */
+
+ ret = write(STDOUT_FILENO, string, length);
+
+ if (ret != length)
+ perror("write");
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : text = définitions des règles à charger. *
+* length = longueur de ces définitions. *
+* *
+* Description : Parcourt des définitions de règles pour traduction. *
+* *
+* Retour : Bilan à retourner. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+bool process_rules_definitions(const char *text, size_t length)
+{
+ bool result; /* Bilan à renvoyer */
+ yyscan_t lexstate; /* Gestion d'analyse lexicale */
+ YY_BUFFER_STATE state; /* Contexte d'analyse */
+ int status; /* Bilan d'une analyse */
+
+ result = false;
+
+ yara2rost_lex_init(&lexstate);
+
+ state = yara2rost__scan_bytes(text, length, lexstate);
+
+ status = yyparse(lexstate);
+
+ result = (status == EXIT_SUCCESS);
+
+ yy_delete_buffer(state, lexstate);
+
+ yara2rost_lex_destroy(lexstate);
+
+ return result;
+
+}
diff --git a/tools/yara2rost/tokens.l b/tools/yara2rost/tokens.l
new file mode 100644
index 0000000..34e61d0
--- /dev/null
+++ b/tools/yara2rost/tokens.l
@@ -0,0 +1,292 @@
+
+%top {
+
+#include "grammar.h"
+
+}
+
+
+%{
+
+#include "decl.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+
+#define PUSH_STATE(s) yy_push_state(s, yyscanner)
+#define POP_STATE yy_pop_state(yyscanner)
+
+%}
+
+
+%option bison-bridge reentrant
+%option stack
+%option nounput
+%option noinput
+%option noyywrap
+%option noyy_top_state
+%option yylineno
+%option never-interactive
+
+
+%x regexp
+%x comment
+
+
+str_not_escaped [^\"\\]
+str_escaped \\a|\\b|\\t|\\n|\\v|\\f|\\r|\\e|\\\"|\\\\|\\x{hbyte}
+str_mixed ({str_not_escaped}|{str_escaped})
+
+hbyte [0-9a-fA-F]{2}
+
+digit [0-9]
+letter [a-zA-Z]
+hexdigit [a-fA-F0-9]
+octdigit [0-7]
+
+
+%%
+
+
+":" { return COLON; }
+"{" { return CURLY_BRACKET_O; }
+"}" { return CURLY_BRACKET_C; }
+"=" { return EQUAL; }
+"(" { return PAREN_O; }
+")" { return PAREN_C; }
+".." { return DOT_DOT; }
+"," { return COMMA; }
+"[" { return BRACKET_O; }
+"]" { return BRACKET_C; }
+"%" { return PERCENT; }
+"." { return DOT; }
+
+"+" { return ADD_OP; }
+"-" { return SUB_OP; }
+"*" { return MUL_OP; }
+"\\" { return DIV_OP; }
+"^" { return EOR_OP; }
+"&" { return AND_OP; }
+"|" { return OR_OP; }
+"~" { return INV_OP; }
+"<<" { return SHIFT_LEFT_OP; }
+">>" { return SHIFT_RIGHT_OP; }
+
+"<" { return LT; }
+">" { return GT; }
+"<=" { return LE; }
+">=" { return GE; }
+"==" { return EQ; }
+"!=" { return NEQ; }
+
+"all" { return ALL; }
+"and" { return AND; }
+"any" { return ANY; }
+"ascii" { return ASCII; }
+"at" { return AT; }
+"base64" { return BASE64; }
+"base64wide" { return BASE64WIDE; }
+"condition" { return CONDITION; }
+"contains" { return CONTAINS; }
+"defined" { return DEFINED; }
+"endswith" { return ENDSWITH; }
+"entrypoint" { return ENTRYPOINT; }
+"filesize" { return FILESIZE; }
+"for" { return FOR; }
+"fullword" { return FULLWORD; }
+"global" { return GLOBAL; }
+"icontains" { return ICONTAINS; }
+"iendswith" { return IENDSWITH; }
+"iequals" { return IEQUALS; }
+"import" { return IMPORT; }
+"in" { return IN; }
+"include" { return INCLUDE; }
+"istartswith" { return ISTARTSWITH; }
+"matches" { return MATCHES; }
+"meta" { return META; }
+"nocase" { return NOCASE; }
+"none" { return NONE; }
+"not" { return NOT; }
+"of" { return OF; }
+"or" { return OR; }
+"private" { return PRIVATE; }
+"rule" { return RULE; }
+"startswith" { return STARTSWITH; }
+"strings" { return STRINGS; }
+"them" { return THEM; }
+"wide" { return WIDE; }
+"xor" { return XOR; }
+
+"false" { return _FALSE; }
+"true" { return _TRUE; }
+
+
+%{ /* Commentaires */ %}
+
+"/*" { PUSH_STATE(comment); }
+<comment>"*/" { POP_STATE; }
+<comment>(.|\n) { }
+
+"//"[^\n]* { }
+
+
+%{ /* Blocs de texte */ %}
+
+$({letter}|{digit}|_)*"*" {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return STRING_IDENTIFIER_WITH_WILDCARD;
+
+}
+
+$({letter}|{digit}|_)* {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return STRING_IDENTIFIER;
+
+}
+
+#({letter}|{digit}|_)* {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return STRING_COUNT;
+
+}
+
+@({letter}|{digit}|_)* {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return STRING_OFFSET;
+
+}
+
+!({letter}|{digit}|_)* {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return STRING_LENGTH;
+
+}
+
+u?int(8|16|32)(be)? {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return INTEGER_FUNCTION;
+
+}
+
+({letter}|_)({letter}|{digit}|_)* {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return IDENTIFIER;
+
+}
+
+{digit}+(MB|KB){0,1} {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return NUMBER;
+
+}
+
+{digit}+"."{digit}+ {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return DOUBLE;
+
+}
+
+0x{hexdigit}+ {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return NUMBER;
+
+}
+
+0o{octdigit}+ {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return NUMBER;
+
+}
+
+\"{str_mixed}*\" {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return TEXT_STRING;
+
+}
+
+"/" {
+
+ PUSH_STATE(regexp);
+
+}
+
+<regexp>(\\\/|\\.|[^/\n\\])+\/i?s? {
+
+ POP_STATE;
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return REGEXP;
+
+}
+
+\{(({hexdigit}|[ \-|\~\?\[\]\(\)\n\r\t]|\/\*(\/|\**[^*/])*\*+\/)+|\/\/.*\n)+\} {
+
+ yylval->cstring.data = yytext;
+ yylval->cstring.len = yyleng;
+
+ return HEX_STRING;
+
+}
+
+
+%{ /* Actions par défaut */ %}
+
+<*>[ \t\r]+ { }
+
+<*>[\n]+ { }
+
+<*>. {
+ char *msg;
+ int ret;
+ ret = asprintf(&msg, "Unhandled token in rule definition: '%s '", yytext);
+ if (ret == -1)
+ YY_FATAL_ERROR("Unhandled token in undisclosed rule definition");
+ else
+ {
+ YY_FATAL_ERROR(msg);
+ free(msg);
+ }
+ }
+
+
+%%
diff --git a/tools/yara2rost/yara2rost.c b/tools/yara2rost/yara2rost.c
new file mode 100644
index 0000000..3206309
--- /dev/null
+++ b/tools/yara2rost/yara2rost.c
@@ -0,0 +1,295 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * yara2rost.c - traduction de règles YARA en règles ROST
+ *
+ * Copyright (C) 2023 Cyrille Bagard
+ *
+ * This file is part of Chrysalide.
+ *
+ * Chrysalide is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Chrysalide is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <malloc.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+
+#include "decl.h"
+
+
+
+/* Affiche des indications sur l'utilisation du programme. */
+static void show_usage(const char *);
+
+/* Récupère un contenu à traiter depuis l'entrée standard. */
+static void *get_input_data_from_stdin(size_t *);
+
+/* Récupère un contenu à traiter depuis un fichier externe. */
+static void *get_input_data_from_file(const char *, size_t *);
+
+
+
+/******************************************************************************
+* *
+* Paramètres : argv0 = nombre du programme exécuté. *
+* *
+* Description : Affiche des indications sur l'utilisation du programme. *
+* *
+* Retour : - *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void show_usage(const char *argv0)
+{
+ printf("\n");
+
+ printf("Usage: %s [options] [<YARA file>]\n", argv0);
+
+ printf("\n");
+
+ printf("General options:\n");
+
+ printf("\n");
+
+ printf("\t-h | --help\t\tDisplay this messsage.\n");
+
+ printf("\n");
+
+ printf("If no YARA file is provided as argument, a rule definition is expected from the standard input.\n");
+
+ printf("\n");
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : length = taille de l'espace mémoire mis en place. [OUT] *
+* *
+* Description : Récupère un contenu à traiter depuis l'entrée standard. *
+* *
+* Retour : Adresse valide ou NULL en cas d'échec. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void *get_input_data_from_stdin(size_t *length)
+{
+ char *result; /* Espace mémoire à retourner */
+ ssize_t got; /* Quantité d'octets lus */
+
+ result = NULL;
+ *length = 0;
+
+#define ALLOC_SIZE 2048
+
+ while (true)
+ {
+ result = realloc(result, (*length + ALLOC_SIZE) * sizeof(char));
+
+ got = read(STDIN_FILENO, result + *length, ALLOC_SIZE);
+
+ if (got == -1)
+ {
+ perror("read");
+ goto exit_with_error;
+ }
+
+ *length += got;
+
+ if (got < ALLOC_SIZE)
+ break;
+
+ }
+
+ return result;
+
+ exit_with_error:
+
+ free(result);
+
+ *length = 0;
+
+ return NULL;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : filename = chemin du fichier à charger en mémoire. *
+* length = taille de l'espace mémoire mis en place. [OUT] *
+* *
+* Description : Récupère un contenu à traiter depuis un fichier externe. *
+* *
+* Retour : Adresse valide ou NULL en cas d'échec. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static void *get_input_data_from_file(const char *filename, size_t *length)
+{
+ char *result; /* Espace mémoire à retourner */
+ int fd; /* Descripteur du fichier */
+ struct stat info; /* Informations sur le fichier */
+ int ret; /* Bilan d'un appel */
+ ssize_t got; /* Quantité d'octets lus */
+
+ result = NULL;
+
+ fd = open(filename, O_RDONLY);
+ if (fd == -1)
+ {
+ perror("open");
+ goto exit;
+ }
+
+ ret = fstat(fd, &info);
+ if (ret == -1)
+ {
+ perror("fstat");
+ goto exit_with_fd;
+ }
+
+ *length = info.st_size;
+
+ result = malloc(*length * sizeof(char));
+
+ got = read(fd, result, *length);
+
+ if (got == -1 || got != *length)
+ {
+ perror("read");
+
+ free(result);
+
+ result = NULL;
+ *length = 0;
+
+ }
+
+ exit_with_fd:
+
+ close(fd);
+
+ exit:
+
+ return result;
+
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : argc = nombre d'arguments dans la ligne de commande. *
+* argv = arguments de la ligne de commande. *
+* *
+* Description : Point d'entrée du programme. *
+* *
+* Retour : EXIT_SUCCESS si le prgm s'est déroulé sans encombres. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+int main(int argc, char **argv)
+{
+ int result; /* Bilan à retourner */
+ bool need_help; /* Affichage de l'aide ? */
+ int index; /* Indice d'argument à traiter */
+ int ret; /* Bilan d'une lecture d'arg. */
+ const char *source; /* Source de définitions */
+ void *content; /* Contenu à traduire */
+ size_t length; /* Taille de ce contenu */
+
+ static struct option long_options[] = {
+
+ { "help", no_argument, NULL, 'h' },
+
+ { NULL, 0, NULL, 0 }
+
+ };
+
+ /* Récupération des commandes */
+
+ need_help = false;
+
+ while (true)
+ {
+ ret = getopt_long(argc, argv, "h", long_options, &index);
+ if (ret == -1) break;
+
+ switch (ret)
+ {
+ case 'h':
+ need_help = true;
+ break;
+
+ }
+
+ }
+
+ /* Vérifications supplémentaires */
+
+ if (need_help || (optind != argc && (optind + 1) != argc))
+ {
+ show_usage(argv[0]);
+ result = (need_help ? EXIT_SUCCESS : EXIT_FAILURE);
+ goto exit;
+ }
+
+ /* Execution attendue */
+
+ result = EXIT_FAILURE;
+
+ if (optind == argc)
+ content = get_input_data_from_stdin(&length);
+
+ else
+ {
+ source = argv[optind];
+
+ if (strcmp(source, "-") == 0 || strcmp(source, "/dev/stdin") == 0)
+ content = get_input_data_from_stdin(&length);
+ else
+ content = get_input_data_from_file(source, &length);
+
+ }
+
+ if (content != NULL)
+ {
+ if (process_rules_definitions(content, length))
+ result = EXIT_SUCCESS;
+
+ free(content);
+
+ }
+
+ exit:
+
+ return result;
+
+}