diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/Makefile.am | 2 | ||||
-rw-r--r-- | tools/d2c/decl.h | 2 | ||||
-rw-r--r-- | tools/d2c/format/decl.h | 2 | ||||
-rw-r--r-- | tools/d2c/grammar.y | 4 | ||||
-rw-r--r-- | tools/d2c/id/decl.h | 2 | ||||
-rw-r--r-- | tools/fuzzing/rost/Makefile.am | 14 | ||||
-rw-r--r-- | tools/fuzzing/rost/convert.py | 452 | ||||
-rw-r--r-- | tools/fuzzing/rost/fast-rost.c | 283 | ||||
-rwxr-xr-x | tools/fuzzing/rost/gen-dict.sh | 81 | ||||
-rwxr-xr-x | tools/fuzzing/rost/minall.sh | 25 | ||||
-rwxr-xr-x | tools/fuzzing/rost/rerun.sh | 27 | ||||
-rw-r--r-- | tools/fuzzing/rost/test.rost | 12 | ||||
-rw-r--r-- | tools/maint/extra.supp | 777 | ||||
-rw-r--r-- | tools/yara2rost/Makefile.am | 36 | ||||
-rw-r--r-- | tools/yara2rost/decl.h | 37 | ||||
-rw-r--r-- | tools/yara2rost/demo.yar | 27 | ||||
-rw-r--r-- | tools/yara2rost/enums.h | 47 | ||||
-rw-r--r-- | tools/yara2rost/grammar.y | 1332 | ||||
-rw-r--r-- | tools/yara2rost/tokens.l | 292 | ||||
-rw-r--r-- | tools/yara2rost/yara2rost.c | 295 |
20 files changed, 3741 insertions, 8 deletions
diff --git a/tools/Makefile.am b/tools/Makefile.am index ed53403..8b8f38b 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS = d2c +SUBDIRS = d2c yara2rost diff --git a/tools/d2c/decl.h b/tools/d2c/decl.h index bcca0ff..1214d01 100644 --- a/tools/d2c/decl.h +++ b/tools/d2c/decl.h @@ -34,4 +34,4 @@ rented_coder *process_definition_file(const char *, pre_processor *); -#endif /* _TOOLS_D2C_BITS_DECL_H */ +#endif /* _TOOLS_D2C_DECL_H */ diff --git a/tools/d2c/format/decl.h b/tools/d2c/format/decl.h index e3e4c9c..b12f853 100644 --- a/tools/d2c/format/decl.h +++ b/tools/d2c/format/decl.h @@ -34,4 +34,4 @@ bool load_format_from_raw_line(operands_format *, const char *); -#endif /* _TOOLS_D2C_BITS_DECL_H */ +#endif /* _TOOLS_D2C_FORMAT_DECL_H */ diff --git a/tools/d2c/grammar.y b/tools/d2c/grammar.y index 4444299..14959cf 100644 --- a/tools/d2c/grammar.y +++ b/tools/d2c/grammar.y @@ -1,7 +1,6 @@ %{ -#include <getopt.h>////// #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -15,9 +14,6 @@ /* Affiche un message d'erreur suite à l'analyse en échec. */ static int yyerror(rented_coder *, char *, char *); -/* Affiche des indications sur l'utilisation du programme. */ -static void show_usage(const char *); - /* Prépare le traitement d'un contenu en l'affichant en mémoire. */ static void *map_input_data(const char *, size_t *); diff --git a/tools/d2c/id/decl.h b/tools/d2c/id/decl.h index e494b9f..cd156bd 100644 --- a/tools/d2c/id/decl.h +++ b/tools/d2c/id/decl.h @@ -37,4 +37,4 @@ bool load_id_from_raw_line(instr_id *, const char *); -#endif /* _TOOLS_D2C_BITS_DECL_H */ +#endif /* _TOOLS_D2C_ID_DECL_H */ diff --git a/tools/fuzzing/rost/Makefile.am b/tools/fuzzing/rost/Makefile.am new file mode 100644 index 0000000..81e126f --- /dev/null +++ b/tools/fuzzing/rost/Makefile.am @@ -0,0 +1,14 @@ + +bin_PROGRAMS = fast-rost + + +AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src + +# EXTRA_rost_DEPENDENCIES = libchrysacore.la + +fast_rost_SOURCES = \ + fast-rost.c + +fast_rost_CFLAGS = $(TOOLKIT_CFLAGS) $(LIBXML_CFLAGS) + +fast_rost_LDFLAGS = $(LIBGOBJ_LIBS) -L$(top_srcdir)/src/.libs -lchrysacore diff --git a/tools/fuzzing/rost/convert.py b/tools/fuzzing/rost/convert.py new file mode 100644 index 0000000..b0ed90c --- /dev/null +++ b/tools/fuzzing/rost/convert.py @@ -0,0 +1,452 @@ + +import re +import sys + + +def define_PLAIN_TEXT(name, last): + """Create definition for the PLAIN_TEXT token.""" + + print(' "<%s>": [ ["\\\"", "<str_not_escaped>", "\\\""] ],' % name.lower()) + print(' "<str_not_escaped>": [ ["<char>"], ["<char>", "<char>"], ["<char>", "<char>", "<char>"] ],') + print(' "<char>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"], ["A"], ["B"], ["C"], ["D"], ["E"], ["F"] ]%s' % (',' if not(last) else '')) + + +def define_IDENTIFIER(name, last): + """Create definition for the RULE_IDENTIFIER token.""" + + print(' "<%s>": [ [ "<id>", "<id>", "<id>", "<idx>" ] ],' % name.lower()) + print(' "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],') + print(' "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else '')) + + +def define_SIGNED_INTEGER(name, last): + """Create definition for the SIGNED_INTEGER token.""" + + print(' "<%s>": [ ["-", "<unsigned_integer>"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_UNSIGNED_INTEGER(name, last): + """Create definition for the UNSIGNED_INTEGER token.""" + + print(' "<%s>": [ ["<fnumber>"], ["<number>", "<fnumber>"], ["<number>", "<fnumber>", "<fnumber>"] ],' % name.lower()) + print(' "<number>": [ ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ],') + print(' "<fnumber>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else '')) + + +def define_BYTES_ID(name, last): + """Create definition for the BYTES_ID token.""" + + print(' "<%s>": [ ["$"], ["$*"], [ "$", "<id>", "<idx>" ], [ "$", "<id>", "*" ] ],' % name.lower()) + print(' "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],') + print(' "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else '')) + + +def define_BYTES_ID_COUNTER(name, last): + """Create definition for the BYTES_ID_COUNTER token.""" + + print(' "<%s>": [ ["#"], ["#*"], [ "#", "<id>", "<idx>" ], [ "#", "<id>", "*" ] ],' % name.lower()) + print(' "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],') + print(' "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else '')) + + +def define_BYTES_ID_START(name, last): + """Create definition for the BYTES_ID_START token.""" + + print(' "<%s>": [ ["@"], ["@*"], [ "@", "<id>", "<idx>" ], [ "@", "<id>", "*" ] ],' % name.lower()) + print(' "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],') + print(' "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else '')) + + +def define_BYTES_ID_LENGTH(name, last): + """Create definition for the BYTES_ID_LENGTH token.""" + + print(' "<%s>": [ ["!"], ["!*"], [ "!", "<id>", "<idx>" ], [ "!", "<id>", "*" ] ],' % name.lower()) + print(' "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],') + print(' "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else '')) + + +def define_BYTES_ID_END(name, last): + """Create definition for the BYTES_ID_END token.""" + + print(' "<%s>": [ ["~"], ["~*"], [ "~", "<id>", "<idx>" ], [ "~", "<id>", "*" ] ],' % name.lower()) + print(' "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],') + print(' "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else '')) + + +def define_HEX_BYTES(name, last): + """Create definition for the HEX_BYTES token.""" + + print(' "<%s>": [ ["<hex>", "<hex>"] ],' % name.lower()) + print(' "<hex>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"], ["a"], ["b"], ["c"], ["d"], ["e"], ["f"] ]%s' % (',' if not(last) else '')) + + +def define_FULL_MASK(name, last): + """Create definition for the FULL_MASK token.""" + + print(' "<%s>": [ ["?", "?"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_SEMI_MASK(name, last): + """Create definition for the SEMI_MASK token.""" + + print(' "<%s>": [ ["?0"], ["1?"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_KB(name, last): + """Create definition for the KB token.""" + + print(' "<%s>": [ ["kb"], ["Kb"], ["kB"], ["KB"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_MB(name, last): + """Create definition for the MB token.""" + + print(' "<%s>": [ ["mb"], ["Mb"], ["mB"], ["MB"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_GB(name, last): + """Create definition for the GB token.""" + + print(' "<%s>": [ ["gb"], ["Gb"], ["gB"], ["GB"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +__lexer_tokens = { + 'PLAIN_TEXT': define_PLAIN_TEXT, + 'ESCAPED_TEXT': define_PLAIN_TEXT, + 'RULE_IDENTIFIER': define_IDENTIFIER, + 'INFO_KEY': define_PLAIN_TEXT, + 'SIGNED_INTEGER': define_SIGNED_INTEGER, + 'UNSIGNED_INTEGER': define_UNSIGNED_INTEGER, + + 'BYTES_ID': define_BYTES_ID, + 'BYTES_FUZZY_ID': define_BYTES_ID, + 'BYTES_ID_COUNTER': define_BYTES_ID_COUNTER, + 'BYTES_FUZZY_ID_COUNTER': define_BYTES_ID_COUNTER, + 'BYTES_ID_START': define_BYTES_ID_START, + 'BYTES_FUZZY_ID_START': define_BYTES_ID_START, + 'BYTES_ID_LENGTH': define_BYTES_ID_LENGTH, + 'BYTES_FUZZY_ID_LENGTH': define_BYTES_ID_LENGTH, + 'BYTES_ID_END': define_BYTES_ID_END, + 'BYTES_FUZZY_ID_END': define_BYTES_ID_END, + + 'NAME': define_PLAIN_TEXT, + 'HEX_BYTES': define_HEX_BYTES, + 'FULL_MASK': define_FULL_MASK, + 'SEMI_MASK': define_SEMI_MASK, + 'REGEX_BYTES': define_PLAIN_TEXT, + 'REGEX_CLASSES': define_PLAIN_TEXT, + 'REGEX_RANGE': define_PLAIN_TEXT, + 'KB': define_KB, + 'MB': define_MB, + 'GB': define_GB, +} + + +def remove_grammar_comments(grammar): + """Delete all the C code comments.""" + + # Cf. https://stackoverflow.com/questions/241327/remove-c-and-c-comments-using-python/241506#241506 + + def replacer(match): + s = match.group(0) + if s.startswith('/'): + return ' ' # note: a space and not an empty string + else: + return s + + regex = re.compile( + r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + re.DOTALL | re.MULTILINE + ) + + return regex.sub(replacer, grammar) + + +def remove_grammar_actions(grammar): + """Delete all the C code handling tokens.""" + + remaining = '' + + scope = 0 + string = False + + for ch in grammar: + + if ch == '{' and not(string): + scope += 1 + + elif ch == '}' and not(string): + assert(scope > 0) + scope -= 1 + + elif scope == 0: + remaining += ch + if ch == '"': + string = not(string) + + return remaining + + +def is_upper(text): + """State if a string is upper case.""" + + return text.upper() == text + + +def parse_rule_definition(grammar): + """Process the definition of one rule.""" + + result = [] + + regex = re.compile('(?<!")\|') + + definitions = regex.split(grammar) + + definitions = [ d.strip() for d in definitions ] + + for d in definitions: + + tokens = d.split() + + converted = [] + + for t in tokens: + + if not(t.startswith('"')) and is_upper(t): + + if not(t in __lexer_tokens.keys()): + print('Missing def:', t) + sys.exit() + + assert(t in __lexer_tokens.keys()) + + converted.append('"<%s>"' % t.lower()) + + else: + + if t.startswith('"'): + converted.append('%s' % t) + else: + converted.append('"<%s>"' % t) + + result.append(converted) + + return result + + +def parse_rules(grammar): + """Process all the rules contained in the grammar.""" + + tree = {} + + regex = re.compile('[\n\t ]*([^\n\t :]+)[\n\t ]*:([^;]+);') + + rules = regex.findall(grammar) + + first = True + + for r in rules: + + if first: + print(' "<START>": [ ["<%s>"] ],' % r[0]) + first = False + + definitions = parse_rule_definition(r[1]) + + tree[r[0]] = definitions + + return tree + + +def simplify_tree(tree): + """Remove nodes which only are links between two levels of nodes.""" + + """ + a = [ [b] ] + b = [ [c], [d] ] + + -> replace a by b + """ + + # Examples: cexpression, modifier_arg + + replaced = {} + + for k, v in tree.items(): + + if len(v) == 1 and len(v[0]) == 1: + + replaced['"<%s>"' % k] = v[0][0] + + new_tree = {} + + for k, v in tree.items(): + + name = '"<%s>"' % k + + if not(name in replaced.keys()): + + new_v = [] + + for vv in v: + + new_vv = vv + + for rk, rv in replaced.items(): + new_vv = list(map(lambda x: x.replace(rk, rv), new_vv)) + + new_v.append(new_vv) + + new_tree[k] = new_v + + return new_tree + + +def find_direct_parent_nodes(tree, name): + """Find all the rules containing a rule.""" + + rules = [] + + name = '"<%s>"' % name + + for k, v in tree.items(): + + for vv in v: + + if len(vv) == 1 and vv[0] == name and not(k in rules): + + rules.append(k) + + return rules + + +def remove_indirect_left_recursion(tree): + """Remove all nodes which implies indirect left recursion.""" + + """ + a = b + b = a + c + + -> a = a + c + """ + + # Examples: logical_expr, relational_expr, string_op, arithm_expr, intersection + + replaced = {} + + for k, v in tree.items(): + + parents = find_direct_parent_nodes(tree, k) + + if len(parents) != 1: + continue + + parent = parents[0] + + for vv in v: + + if vv[0] == '"<%s>"' % parent: + replaced[k] = v + break + + new_tree = {} + + for k, v in tree.items(): + + if not(k in replaced.keys()): + + new_v = [] + + for vv in v: + + if len(vv) != 1: + new_v.append(vv) + + else: + + modified = False + + for rk, rv in replaced.items(): + if '"<%s>"' % rk == vv[0]: + new_v += rv + modified = True + break + + if not(modified): + new_v.append(vv) + + new_tree[k] = new_v + + return new_tree + + +def output_rules(tree): + """Output a translated rule.""" + + for k, v in tree.items(): + + print(' "<%s>": [' % k, end='') + + first = True + + for d in v: + + if not(first): + print(',', end='') + + if len(d) == 0: + print(' []', end='') + + else: + + print(' [', end='') + + sub_first = True + + for sub_d in d: + + if not(sub_first): + print(', ', end='') + + print('%s' % sub_d, end='') + + sub_first = False + + print(']', end='') + + first = False + + print(' ],') + + +if __name__ == '__main__': + """Script entrypoint.""" + + # Cf. https://github.com/AFLplusplus/Grammar-Mutator/blob/stable/doc/customizing-grammars.md + + with open(sys.argv[1], 'r') as fd: + grammar = fd.read() + + grammar = grammar.split('%%')[1] + + grammar = remove_grammar_comments(grammar) + + grammar = remove_grammar_actions(grammar) + + print('{') + + tree = parse_rules(grammar) + + tree = simplify_tree(tree) + + tree = remove_indirect_left_recursion(tree) + + output_rules(tree) + + count = len(__lexer_tokens.keys()) + + for name, cb in __lexer_tokens.items(): + cb(name, count == 1) + count -= 1 + + print('}') diff --git a/tools/fuzzing/rost/fast-rost.c b/tools/fuzzing/rost/fast-rost.c new file mode 100644 index 0000000..f161273 --- /dev/null +++ b/tools/fuzzing/rost/fast-rost.c @@ -0,0 +1,283 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * fast-rost.c - fichier d'entrée du centre de collecte, adapté pour un fuzzing optimal + * + * Copyright (C) 2023 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <assert.h> +#include <getopt.h> +#include <libgen.h> +#include <locale.h> +#include <malloc.h> +#include <stdlib.h> +#include <string.h> + + +#include <i18n.h> + + +#include <analysis/contents/file.h> +#include <analysis/scan/options.h> +#include <analysis/scan/scanner.h> +#include <analysis/scan/patterns/backends/bitap.h> +#include <analysis/scan/patterns/backends/acism.h> +#include <core/core.h> +#include <core/global.h> +#include <core/logs.h> +#include <core/paths.h> +#include <plugins/pglist.h> + + + +#ifndef __AFL_FUZZ_TESTCASE_LEN + +ssize_t fuzz_len; +unsigned char fuzz_buf[1024000]; + +# define __AFL_FUZZ_TESTCASE_LEN fuzz_len +# define __AFL_FUZZ_TESTCASE_BUF fuzz_buf +# define __AFL_FUZZ_INIT() void sync(void); +# define __AFL_LOOP(x) \ + ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0) +# define __AFL_INIT() sync() + +#endif + + +__AFL_FUZZ_INIT(); + + +/****************************************************************************** +* * +* Paramètres : argc = nombre d'arguments dans la ligne de commande. * +* argv = arguments de la ligne de commande. * +* * +* Description : Point d'entrée du programme. * +* * +* Retour : EXIT_SUCCESS si le prgm s'est déroulé sans encombres. * +* * +* Remarques : - * +* * +******************************************************************************/ + +int main(int argc, char **argv) +{ + int result; /* Bilan de l'exécution */ + bool check_only; /* Validation uniquement */ + LogMessageType verbosity; /* Niveau de filtre de message */ + GScanOptions *options; /* Options d'analyses */ + int index; /* Indice d'argument */ + int ret; /* Bilan d'un appel */ + char *edir; /* Répertoire de base effectif */ + char *target; /* Cible communiquée */ + unsigned char *afl_buf; /* Tampon de travail d'AFL */ + int afl_len; /* Taille de ce tampon */ + GContentScanner *scanner; /* Encadrement d'une recherche */ + GBinContent *content; /* Contenu à analyser */ + GScanContext *context; /* Contexte des trouvailles */ + sized_string_t padding; /* Bourrage pour le JSON */ + bool full; /* Détailler l'affichage ? */ + + static struct option long_options[] = { + { "algorithm", required_argument, NULL, 'A' }, + { "check-only", no_argument, NULL, 'C' }, + { "print-json", no_argument, NULL, 'j' }, + { "print-strings", no_argument, NULL, 's' }, + { "print-stats", no_argument, NULL, 'S' }, + { "print-tags", no_argument, NULL, 'g' }, + { "tag", required_argument, NULL, 't' }, + { "verbosity", required_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + result = EXIT_FAILURE; + + /* Décodage des options */ + + check_only = false; + verbosity = LMT_COUNT; + + options = g_scan_options_new(); + + g_scan_options_set_backend_for_data(options, G_TYPE_ACISM_BACKEND); + + while (true) + { + ret = getopt_long(argc, argv, "A:CjsSgt:V:", long_options, &index); + if (ret == -1) break; + + switch (ret) + { + case 'A': + if (strcmp(optarg, "bitmap") == 0) + g_scan_options_set_backend_for_data(options, G_TYPE_BITAP_BACKEND); + else if (strcmp(optarg, "acism") == 0) + g_scan_options_set_backend_for_data(options, G_TYPE_ACISM_BACKEND); + else + g_scan_options_set_backend_for_data(options, G_TYPE_INVALID); + break; + + case 'C': + check_only = true; + g_scan_options_set_check_only(options, true); + break; + + case 'j': + g_scan_options_set_print_json(options, true); + break; + + case 's': + g_scan_options_set_print_strings(options, true); + break; + + case 'S': + g_scan_options_set_print_stats(options, true); + break; + + case 'g': + g_scan_options_set_print_tags(options, true); + break; + + case 't': + g_scan_options_select_tag(options, optarg); + break; + + case 'V': + verbosity = strtoul(optarg, NULL, 10); + break; + + } + + } + + if ((check_only && (optind + 0) != argc && (optind + 1) != argc) + || (!check_only && (optind + 1) != argc && (optind + 2) != argc)) + { + goto done; + } + + /* Actions de base */ + + if (g_scan_options_get_backend_for_data(options) == G_TYPE_INVALID) + { + goto done; + } + + /* Lancement des choses sérieuses */ + + setlocale(LC_ALL, ""); + edir = get_effective_directory(LOCALE_DIR); + bindtextdomain(PACKAGE, edir); + free(edir); + textdomain(PACKAGE); + + /* Initialisation de GTK */ + g_set_prgname("ROST"); + //gtk_init(&argc, &argv); + + /* Initialisation du programme */ + + set_batch_mode(); + + set_log_verbosity(verbosity); + + if (!load_all_core_components(true)) + goto done; + + init_all_plugins(true); + + /* Traitement des recherches */ + + if ((optind + 1) == argc) + target = argv[optind]; + else + goto done; + + __AFL_INIT(); + + afl_buf = __AFL_FUZZ_TESTCASE_BUF; + + while (__AFL_LOOP(10000)) + { + afl_len = __AFL_FUZZ_TESTCASE_LEN; + + scanner = g_content_scanner_new_from_text((char *)afl_buf, afl_len); + +#if 0 + do + { + FILE *stream; + + stream = fopen("/dev/shm/ctrl.log", "a"); + fprintf(stream, "running %d bytes => %p\n", afl_len, scanner); + fclose(stream); + + } while (0); +#endif + + if (scanner != NULL) + result = EXIT_SUCCESS; + + if (scanner != NULL && !check_only) + { + content = g_file_content_new(target); + if (content == NULL) goto bad_file_content; + + context = g_content_scanner_analyze(scanner, options, content); + + if (g_scan_options_get_print_json(options)) + { + padding.data = " "; + padding.len = 3; + + g_content_scanner_output_to_json(scanner, context, &padding, 0, STDOUT_FILENO); + + } + else + { + full = g_scan_options_get_print_strings(options); + + g_content_scanner_output_to_text(scanner, context, full, STDOUT_FILENO); + + } + + g_object_unref(G_OBJECT(context)); + g_object_unref(G_OBJECT(content)); + + bad_file_content: + + g_object_unref(G_OBJECT(scanner)); + + } + + } + + g_object_unref(G_OBJECT(options)); + + /* Sortie */ + + unload_all_core_components(false); + + done: + + return result; + +} diff --git a/tools/fuzzing/rost/gen-dict.sh b/tools/fuzzing/rost/gen-dict.sh new file mode 100755 index 0000000..dfebc0a --- /dev/null +++ b/tools/fuzzing/rost/gen-dict.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +TOP_DIR="$SCRIPT_DIR/../../.." +OUTPUT="$SCRIPT_DIR/rost.dict" + + +echo > "$OUTPUT" + + +echo "# Syntax core keywords" >> "$OUTPUT" + +cat "$TOP_DIR/src/analysis/scan/grammar.y" | \ + grep '%token.*".*' | grep -o -E '"[^"]+"' | sort >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Modifiers" >> "$OUTPUT" + +"$TOP_DIR/src/rost" --dump-modifiers | sort | sed -e 's/^/"/' -e 's/$/"/' >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Namespace" >> "$OUTPUT" + +"$TOP_DIR/src/rost" --dump-namespaces | sort | sed -e 's/^/"/' -e 's/$/"/' >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Identifiers" >> "$OUTPUT" + +for t in "$" "#" "@" "!" "~" ; +do + echo "\"${t}a0\"" >> "$OUTPUT" + echo "\"${t}a1\"" >> "$OUTPUT" + echo "\"${t}b\"" >> "$OUTPUT" + echo "\"${t}c\"" >> "$OUTPUT" + echo "\"${t}a*\"" >> "$OUTPUT" + echo "\"${t}*\"" >> "$OUTPUT" + echo "\"${t}\"" >> "$OUTPUT" + +done + + +echo >> "$OUTPUT" +echo "# Numbers" >> "$OUTPUT" + +for i in $( seq 0 32 ); +do + echo -$(( 2 ** i - 1 )) ; + echo -$(( 2 ** i )) ; + echo -$(( 2 ** i + 1 )) ; + + echo $(( 2 ** i - 1 )) ; + echo $(( 2 ** i )) ; + echo $(( 2 ** i + 1 )) ; + +done | sort | uniq | sort -n >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Misc" >> "$OUTPUT" + +echo "\"kb\"" >> "$OUTPUT" +echo "\"mb\"" >> "$OUTPUT" +echo "\"gb\"" >> "$OUTPUT" + +echo "\"a0\"" >> "$OUTPUT" +echo "\"a1\"" >> "$OUTPUT" +echo "\"b\"" >> "$OUTPUT" +echo "\"c\"" >> "$OUTPUT" + +echo "\"\\\"abcdef\\\"\"" >> "$OUTPUT" +echo "\"\\\"azerty\\\"\"" >> "$OUTPUT" +echo "\"\\\"qwertyqwerty\\\"\"" >> "$OUTPUT" +echo "\"??\"" >> "$OUTPUT" +echo "\"0?\"" >> "$OUTPUT" +echo "\"?a\"" >> "$OUTPUT" + +echo >> "$OUTPUT" diff --git a/tools/fuzzing/rost/minall.sh b/tools/fuzzing/rost/minall.sh new file mode 100755 index 0000000..e32777d --- /dev/null +++ b/tools/fuzzing/rost/minall.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +if [ -z "$FUZ_OUT" ]; then + echo "$0 needs a \$FUZ_OUT environment variable!" + exit 1 +fi + + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +MIN_DIR="$SCRIPT_DIR/min" + + +mkdir -p "$MIN_DIR" + +find "$FUZ_OUT/default/crashes/" -name 'id*' | while read f; +do + + id=$( echo $f | cut -d: -f2 | cut -d, -f1 ) + + h=$( sha256sum $f | cut -d " " -f1 ) + + afl-tmin -i "$f" -o "$MIN_DIR/$id-$h.rost" -- /dev/shm/fuzzing-sys/bin/fast-rost /bin/ls + +done diff --git a/tools/fuzzing/rost/rerun.sh b/tools/fuzzing/rost/rerun.sh new file mode 100755 index 0000000..3e75189 --- /dev/null +++ b/tools/fuzzing/rost/rerun.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +TOP_DIR="$SCRIPT_DIR/../../.." +MIN_DIR="$SCRIPT_DIR/min" + + +find "$MIN_DIR" -type f -name '*rost' | while read f; +do + echo "=========== $f" + + "$TOP_DIR/src/rost" $f /bin/ls + + status=$? + + if [ $status -le 2 ]; then + rm $f + fi + + sleep 1s + +done + + + + diff --git a/tools/fuzzing/rost/test.rost b/tools/fuzzing/rost/test.rost new file mode 100644 index 0000000..02daabe --- /dev/null +++ b/tools/fuzzing/rost/test.rost @@ -0,0 +1,12 @@ + +rule basic { + + bytes: + $a = "ABC" base64 + $b = "12" + $c = { 00 01 f0 ff ff [0-9] 23 } + + condition: + (#a == 123 or $b or $c) and console.log(maxcommon(modpath($a, $b))) + +} diff --git a/tools/maint/extra.supp b/tools/maint/extra.supp new file mode 100644 index 0000000..58cbd32 --- /dev/null +++ b/tools/maint/extra.supp @@ -0,0 +1,777 @@ + +# ==2020629== 64 bytes in 1 blocks are still reachable in loss record 516 of 1,020 +# ==2020629== at 0x48406C4: malloc (vg_replace_malloc.c:380) +# ==2020629== by 0x49044CD: g_realloc (gmem.c:201) +# ==2020629== by 0x48EBCDC: g_hash_table_realloc_key_or_value_array (ghash.c:382) +# ==2020629== by 0x48EBCDC: g_hash_table_setup_storage (ghash.c:591) +# ==2020629== by 0x48EC6C8: g_hash_table_new_full (ghash.c:1085) +# ==2020629== by 0x48EC6F0: g_hash_table_new (ghash.c:1036) +# ==2020629== by 0x48E84A4: g_error_init (gerror.c:525) +# ==2020629== by 0x48FA392: glib_init (glib-init.c:342) +# ==2020629== by 0x48FA3A0: glib_init_ctor (glib-init.c:455) +# ==2020629== by 0x4004ABD: call_init (dl-init.c:70) +# ==2020629== by 0x4004ABD: call_init (dl-init.c:26) +# ==2020629== by 0x4004BA3: _dl_init (dl-init.c:117) +# ==2020629== by 0x401AA5F: ??? (in /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) +# ==2020629== by 0x2: ??? +# ==2020629== by 0x1FFF0002B2: ??? +# ==2020629== by 0x1FFF0002C3: ??? +# ==2020629== by 0x1FFF0002CF: ??? +# ==2020629== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:g_realloc + fun:g_hash_table_realloc_key_or_value_array + fun:g_hash_table_setup_storage + fun:g_hash_table_new_full + fun:g_hash_table_new + fun:g_error_init + fun:glib_init + fun:glib_init_ctor + fun:call_init + fun:call_init + fun:_dl_init + ... +} + + +# ==1996673== 32 bytes in 1 blocks are still reachable in loss record 415 of 1,026 +# ==1996673== at 0x48455EF: calloc (vg_replace_malloc.c:1328) +# ==1996673== by 0x490447A: g_malloc0 (gmem.c:163) +# ==1996673== by 0x490466E: g_malloc0_n (gmem.c:404) +# ==1996673== by 0x48EBCF1: g_hash_table_setup_storage (ghash.c:593) +# ==1996673== by 0x48EC6C8: g_hash_table_new_full (ghash.c:1085) +# ==1996673== by 0x48EC6F0: g_hash_table_new (ghash.c:1036) +# ==1996673== by 0x48E84A4: g_error_init (gerror.c:525) +# ==1996673== by 0x48FA392: glib_init (glib-init.c:342) +# ==1996673== by 0x48FA3A0: glib_init_ctor (glib-init.c:455) +# ==1996673== by 0x4004ABD: call_init (dl-init.c:70) +# ==1996673== by 0x4004ABD: call_init (dl-init.c:26) +# ==1996673== by 0x4004BA3: _dl_init (dl-init.c:117) +# ==1996673== by 0x401AA5F: ??? (in /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) +# ==1996673== by 0x2: ??? +# ==1996673== by 0x1FFF0002B2: ??? +# ==1996673== by 0x1FFF0002C3: ??? +# ==1996673== by 0x1FFF0002CF: ??? +# ==1996673== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:g_malloc0 + fun:g_malloc0_n + fun:g_hash_table_setup_storage + fun:g_hash_table_new_full + fun:g_hash_table_new + fun:g_error_init + fun:glib_init + fun:glib_init_ctor + fun:call_init + fun:call_init + fun:_dl_init + ... +} + + +# ==1995462== 88 bytes in 1 blocks are still reachable in loss record 729 of 1,026 +# ==1995462== at 0x48815F4: type_node_any_new_W (gtype.c:457) +# ==1995462== by 0x4881770: type_node_fundamental_new_W (gtype.c:564) +# ==1995462== by 0x4883A1B: g_type_register_fundamental (gtype.c:2748) +# ==1995462== by 0x488B1E1: _g_value_types_init (gvaluetypes.c:529) +# ==1995462== by 0x4886E9B: gobject_init (gtype.c:4521) +# ==1995462== by 0x4886F31: gobject_init_ctor (gtype.c:4636) +# ==1995462== by 0x4004ABD: call_init (dl-init.c:70) +# ==1995462== by 0x4004ABD: call_init (dl-init.c:26) +# ==1995462== by 0x4004BA3: _dl_init (dl-init.c:117) +# ==1995462== by 0x401AA5F: ??? (in /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) +# ==1995462== by 0x2: ??? +# ==1995462== by 0x1FFF0002B2: ??? +# ==1995462== by 0x1FFF0002C3: ??? +# ==1995462== by 0x1FFF0002CF: ??? +# ==1995462== + +{ + Code in type_data_unref_U is disabled + Memcheck:Leak + match-leak-kinds: reachable + fun:type_node_any_new_W + fun:type_node_fundamental_new_W + fun:g_type_register_fundamental + fun:*_init + fun:gobject_init + fun:gobject_init_ctor + fun:call_init + fun:call_init + fun:_dl_init + ... +} + + +# ==1995681== 96 bytes in 1 blocks are still reachable in loss record 745 of 1,026 +# ==1995681== at 0x48407B4: malloc (vg_replace_malloc.c:381) +# ==1995681== by 0x4904422: g_malloc (gmem.c:130) +# ==1995681== by 0x491A9B1: g_slice_alloc (gslice.c:1074) +# ==1995681== by 0x48EC68A: g_hash_table_new_full (ghash.c:1073) +# ==1995681== by 0x48EC6F0: g_hash_table_new (ghash.c:1036) +# ==1995681== by 0x48E84A4: g_error_init (gerror.c:525) +# ==1995681== by 0x48FA392: glib_init (glib-init.c:342) +# ==1995681== by 0x48FA3A0: glib_init_ctor (glib-init.c:455) +# ==1995681== by 0x4004ABD: call_init (dl-init.c:70) +# ==1995681== by 0x4004ABD: call_init (dl-init.c:26) +# ==1995681== by 0x4004BA3: _dl_init (dl-init.c:117) +# ==1995681== by 0x401AA5F: ??? (in /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) +# ==1995681== by 0x2: ??? +# ==1995681== by 0x1FFF0002B2: ??? +# ==1995681== by 0x1FFF0002C3: ??? +# ==1995681== by 0x1FFF0002CF: ??? +# ==1995681== + +{ + Code in type_data_unref_U is disabled + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:g_malloc + fun:g_slice_alloc + fun:g_hash_table_new_full + fun:g_hash_table_new + fun:g_error_init + fun:glib_init + fun:glib_init_ctor + fun:call_init + fun:call_init + fun:_dl_init + ... +} + + +# ==1995732== 88 bytes in 1 blocks are still reachable in loss record 721 of 1,026 +# ==1995732== at 0x48815F4: type_node_any_new_W (gtype.c:457) +# ==1995732== by 0x4881770: type_node_fundamental_new_W (gtype.c:564) +# ==1995732== by 0x4886E35: gobject_init (gtype.c:4504) +# ==1995732== by 0x4886F31: gobject_init_ctor (gtype.c:4636) +# ==1995732== by 0x4004ABD: call_init (dl-init.c:70) +# ==1995732== by 0x4004ABD: call_init (dl-init.c:26) +# ==1995732== by 0x4004BA3: _dl_init (dl-init.c:117) +# ==1995732== by 0x401AA5F: ??? (in /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) +# ==1995732== by 0x2: ??? +# ==1995732== by 0x1FFF0002B2: ??? +# ==1995732== by 0x1FFF0002C3: ??? +# ==1995732== by 0x1FFF0002CF: ??? +# ==1995732== + +{ + Code in type_data_unref_U is disabled + Memcheck:Leak + match-leak-kinds: reachable + fun:type_node_any_new_W + fun:type_node_fundamental_new_W + fun:gobject_init + fun:gobject_init_ctor + fun:call_init + fun:call_init + fun:_dl_init + ... +} + + +###################################################### + + +# ==1994638== 88 bytes in 1 blocks are still reachable in loss record 740 of 1,026 +# ==1994638== at 0x48815F4: type_node_any_new_W (gtype.c:457) +# ==1994638== by 0x4881770: type_node_fundamental_new_W (gtype.c:564) +# ==1994638== by 0x4883A1B: g_type_register_fundamental (gtype.c:2748) +# ==1994638== by 0x486C204: _g_object_type_init (gobject.c:456) +# ==1994638== by 0x4886EAF: gobject_init (gtype.c:4537) +# ==1994638== by 0x4886F31: gobject_init_ctor (gtype.c:4636) +# ==1994638== by 0x4004ABD: call_init (dl-init.c:70) +# ==1994638== by 0x4004ABD: call_init (dl-init.c:26) +# ==1994638== by 0x4004BA3: _dl_init (dl-init.c:117) +# ==1994638== by 0x401AA5F: ??? (in /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) +# ==1994638== by 0x2: ??? +# ==1994638== by 0x1FFF0002B2: ??? +# ==1994638== by 0x1FFF0002C3: ??? +# ==1994638== by 0x1FFF0002CF: ??? +# ==1994638== + +# g_type_free_instance / g_type_class_unref / type_data_unref_U (cf. glib2.0-2.74.6/gobject/gtype.c) + +{ + Code in type_data_unref_U is disabled + Memcheck:Leak + match-leak-kinds: reachable + fun:type_node_any_new_W + fun:type_node_fundamental_new_W + fun:g_type_register_fundamental + fun:_g_object_type_init + fun:gobject_init + fun:gobject_init_ctor + fun:call_init + fun:call_init + fun:_dl_init + ... +} + + +# ==1994765== 8 bytes in 1 blocks are still reachable in loss record 14 of 1,026 +# ==1994765== at 0x485FAD8: freelist_alloc (gatomicarray.c:85) +# ==1994765== by 0x485FB28: _g_atomic_array_copy (gatomicarray.c:147) +# ==1994765== by 0x48810CE: iface_node_set_offset_L (gtype.c:1371) +# ==1994765== by 0x4881318: type_node_add_iface_entry_W (gtype.c:1456) +# ==1994765== by 0x4882417: type_add_interface_Wm (gtype.c:1501) +# ==1994765== by 0x4884B69: g_type_add_interface_static (gtype.c:2939) +# ==1994765== by 0x4A73709: g_arch_instruction_get_type_once (instruction.c:122) +# ==1994765== by 0x4A73662: g_arch_instruction_get_type (instruction.c:122) +# ==1994765== by 0x4A7DBED: g_raw_instruction_get_type_once (raw.c:105) +# ==1994765== by 0x4A7DBAA: g_raw_instruction_get_type (raw.c:105) +# ==1994765== by 0x4A8FED8: register_arch_gtypes (processors.c:80) +# ==1994765== by 0x4A8E4C4: load_all_core_components (core.c:123) +# ==1994765== by 0x10AAD1: main (rost.c:369) +# ==1994765== + +# g_type_free_instance / g_type_class_unref / type_data_unref_U (cf. glib2.0-2.74.6/gobject/gtype.c) + +{ + Code in type_data_unref_U is disabled + Memcheck:Leak + match-leak-kinds: reachable + fun:freelist_alloc + fun:_g_atomic_array_copy + fun:iface_node_set_offset_L + fun:type_node_add_iface_entry_W + fun:type_add_interface_Wm + fun:g_type_add_interface_static + ... +} + + +# ==1995318== 184 bytes in 1 blocks are still reachable in loss record 991 of 1,026 +# ==1995318== at 0x484582F: realloc (vg_replace_malloc.c:1437) +# ==1995318== by 0x49044CD: g_realloc (gmem.c:201) +# ==1995318== by 0x49046E4: g_realloc_n (gmem.c:433) +# ==1995318== by 0x488150E: type_node_any_new_W (gtype.c:516) +# ==1995318== by 0x488184A: type_node_new_W (gtype.c:582) +# ==1995318== by 0x48857D3: g_type_register_dynamic (gtype.c:2900) +# ==1995318== by 0x4AA8962: g_dynamic_types_register_type (dt.c:383) +# ==1995318== by 0x4AA8AC1: build_dynamic_type (dt.c:483) +# ==1995318== by 0x4AAABDA: g_plugin_module_new (plugin.c:506) +# ==1995318== by 0x4AA90C0: browse_directory_for_plugins (pglist.c:272) +# ==1995318== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1995318== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1995318== by 0x4AA8C33: init_all_plugins (pglist.c:86) +# ==1995318== by 0x10AAE6: main (rost.c:372) +# ==1995318== + +{ + Code in type_data_unref_U is disabled + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + fun:g_realloc + fun:g_realloc_n + fun:type_node_any_new_W + fun:type_node_new_W + fun:g_type_register_dynamic + ... +} + + +# ==1995789== 2,048 bytes in 1 blocks are still reachable in loss record 1,020 of 1,026 +# ==1995789== at 0x484582F: realloc (vg_replace_malloc.c:1437) +# ==1995789== by 0x49044CD: g_realloc (gmem.c:201) +# ==1995789== by 0x48EC1A6: g_hash_table_realloc_key_or_value_array (ghash.c:382) +# ==1995789== by 0x48EC1A6: realloc_arrays (ghash.c:724) +# ==1995789== by 0x48EC283: g_hash_table_resize (ghash.c:877) +# ==1995789== by 0x48EC308: g_hash_table_maybe_resize (ghash.c:917) +# ==1995789== by 0x48EC460: g_hash_table_insert_node (ghash.c:1370) +# ==1995789== by 0x48EC4B3: g_hash_table_insert_internal (ghash.c:1629) +# ==1995789== by 0x48ECD1D: g_hash_table_insert (ghash.c:1658) +# ==1995789== by 0x4881561: type_node_any_new_W (gtype.c:528) +# ==1995789== by 0x488184A: type_node_new_W (gtype.c:582) +# ==1995789== by 0x48857D3: g_type_register_dynamic (gtype.c:2900) +# ==1995789== by 0x4AA8962: g_dynamic_types_register_type (dt.c:383) +# ==1995789== by 0x4AA8AC1: build_dynamic_type (dt.c:483) +# ==1995789== by 0x4AAABDA: g_plugin_module_new (plugin.c:506) +# ==1995789== by 0x4AA90C0: browse_directory_for_plugins (pglist.c:272) +# ==1995789== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1995789== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1995789== by 0x4AA8C33: init_all_plugins (pglist.c:86) +# ==1995789== by 0x10AAE6: main (rost.c:372) +# ==1995789== + +{ + Code in type_data_unref_U is disabled + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + fun:g_realloc + fun:g_hash_table_realloc_key_or_value_array + fun:realloc_arrays + fun:g_hash_table_resize + fun:g_hash_table_maybe_resize + fun:g_hash_table_insert_node + fun:g_hash_table_insert_internal + fun:g_hash_table_insert + fun:type_node_any_new_W + fun:type_node_new_W + fun:g_type_register_dynamic + ... +} + + +# ==1995842== 56 bytes in 1 blocks are still reachable in loss record 518 of 1,026 +# ==1995842== at 0x485FAD8: freelist_alloc (gatomicarray.c:85) +# ==1995842== by 0x485FB57: _g_atomic_array_copy (gatomicarray.c:141) +# ==1995842== by 0x48816AB: type_node_any_new_W (gtype.c:500) +# ==1995842== by 0x488184A: type_node_new_W (gtype.c:582) +# ==1995842== by 0x4883C8A: g_type_register_static (gtype.c:2853) +# ==1995842== by 0x4883D4C: g_type_register_static_simple (gtype.c:2806) +# ==1995842== by 0x21266631: ??? +# ==1995842== by 0x212665C1: ??? +# ==1995842== by 0x20F18DB4: ??? +# ==1995842== by 0x20F18E0D: ??? +# ==1995842== by 0x20F1335A: ??? +# ==1995842== by 0x4AABB63: g_plugin_module_load (plugin.c:1115) +# ==1995842== by 0x4AA957C: load_remaning_plugins (pglist.c:469) +# ==1995842== by 0x4AA8CD2: init_all_plugins (pglist.c:103) +# ==1995842== by 0x10AAE6: main (rost.c:372) +# ==1995842== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:freelist_alloc + fun:_g_atomic_array_copy + fun:type_node_any_new_W + fun:type_node_new_W + fun:g_type_register_static + fun:g_type_register_static_simple + ... +} + + +# ==1995893== 8 bytes in 1 blocks are still reachable in loss record 36 of 1,026 +# ==1995893== at 0x48406C4: malloc (vg_replace_malloc.c:380) +# ==1995893== by 0x49044CD: g_realloc (gmem.c:201) +# ==1995893== by 0x49046E4: g_realloc_n (gmem.c:433) +# ==1995893== by 0x487FE9F: type_iface_add_prerequisite_W (gtype.c:1542) +# ==1995893== by 0x4884F28: g_type_interface_add_prerequisite (gtype.c:1632) +# ==1995893== by 0x4A9E5C5: g_comparable_item_get_type (comparison.c:40) +# ==1995893== by 0x4A42873: g_scan_expression_get_type_once (expr.c:73) +# ==1995893== by 0x4A427E2: g_scan_expression_get_type (expr.c:73) +# ==1995893== by 0x4A53253: g_scan_pattern_handler_get_type_once (handler.c:76) +# ==1995893== by 0x4A53210: g_scan_pattern_handler_get_type (handler.c:76) +# ==1995893== by 0x4A5347B: g_scan_pattern_handler_new (handler.c:197) +# ==1995893== by 0x4A467F1: rost_parse (grammar.y:1606) +# ==1995893== by 0x4A47785: process_rules_definitions (grammar.y:1895) +# ==1995893== by 0x4A4AE47: g_content_scanner_create_from_file (scanner.c:275) +# ==1995893== by 0x4A4AD6E: g_content_scanner_new_from_file (scanner.c:234) +# ==1995893== by 0x10AC85: main (rost.c:421) +# ==1995893== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:g_realloc + fun:g_realloc_n + fun:type_iface_add_prerequisite_W + fun:g_type_interface_add_prerequisite + fun:*_get_type + fun:*_get_type_once + ... +} + + + + +###################################################### + + +# ==1995203== 1,024 bytes in 1 blocks are still reachable in loss record 1,015 of 1,026 +# ==1995203== at 0x484582F: realloc (vg_replace_malloc.c:1437) +# ==1995203== by 0x49044CD: g_realloc (gmem.c:201) +# ==1995203== by 0x49046E4: g_realloc_n (gmem.c:433) +# ==1995203== by 0x48EC186: realloc_arrays (ghash.c:723) +# ==1995203== by 0x48EC283: g_hash_table_resize (ghash.c:877) +# ==1995203== by 0x48EC308: g_hash_table_maybe_resize (ghash.c:917) +# ==1995203== by 0x48EC460: g_hash_table_insert_node (ghash.c:1370) +# ==1995203== by 0x48EC4B3: g_hash_table_insert_internal (ghash.c:1629) +# ==1995203== by 0x48ECD1D: g_hash_table_insert (ghash.c:1658) +# ==1995203== by 0x4881561: type_node_any_new_W (gtype.c:528) +# ==1995203== by 0x488184A: type_node_new_W (gtype.c:582) +# ==1995203== by 0x48857D3: g_type_register_dynamic (gtype.c:2900) +# ==1995203== by 0x4AA8962: g_dynamic_types_register_type (dt.c:383) +# ==1995203== by 0x4AA8AC1: build_dynamic_type (dt.c:483) +# ==1995203== by 0x4AAABDA: g_plugin_module_new (plugin.c:506) +# ==1995203== by 0x4AA90C0: browse_directory_for_plugins (pglist.c:272) +# ==1995203== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1995203== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1995203== by 0x4AA8C33: init_all_plugins (pglist.c:86) +# ==1995203== by 0x10AAE6: main (rost.c:372) +# ==1995203== + +{ + Lack of g_hash_table_unref() call with static_type_nodes_ht (cf. glib2.0-2.74.6/gobject/gtype.c) + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + fun:g_realloc + fun:g_realloc_n + fun:realloc_arrays + fun:g_hash_table_resize + fun:g_hash_table_maybe_resize + fun:g_hash_table_insert_node + fun:g_hash_table_insert_internal + fun:g_hash_table_insert + fun:type_node_any_new_W + fun:type_node_new_W + fun:g_type_register_dynamic + ... +} + + +###################################################### + + +# ==1996736== 40 bytes in 1 blocks are still reachable in loss record 485 of 1,026 +# ==1996736== at 0x48407B4: malloc (vg_replace_malloc.c:381) +# ==1996736== by 0x494C2C3: g_rec_mutex_impl_new (gthread-posix.c:286) +# ==1996736== by 0x494C36E: g_rec_mutex_get_impl (gthread-posix.c:312) +# ==1996736== by 0x494C6CB: g_rec_mutex_lock (gthread-posix.c:397) +# ==1996736== by 0x4D8CCBD: g_module_open_full (gmodule.c:515) +# ==1996736== by 0x4D8D1A3: g_module_open (gmodule.c:698) +# ==1996736== by 0x4AA9EB3: g_plugin_module_new (plugin.c:240) +# ==1996736== by 0x4AA90C0: browse_directory_for_plugins (pglist.c:272) +# ==1996736== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1996736== by 0x4AA90B2: browse_directory_for_plugins (pglist.c:268) +# ==1996736== by 0x4AA8C33: init_all_plugins (pglist.c:86) +# ==1996736== by 0x10AAE6: main (rost.c:372) +# ==1996736== + +{ + Lack of g_rec_mutex_clear() call with g_module_global_lock (cf. glib2.0-2.74.6/gmodule/gmodule.c) + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:g_rec_mutex_impl_new + fun:g_rec_mutex_get_impl + fun:g_rec_mutex_lock + fun:g_module_open_full + fun:g_module_open + ... +} + + + +###################################################### + + +# ==2015061== 64 bytes in 1 blocks are still reachable in loss record 532 of 1,026 +# ==2015061== at 0x484582F: realloc (vg_replace_malloc.c:1437) +# ==2015061== by 0x49044CD: g_realloc (gmem.c:201) +# ==2015061== by 0x48EC1A6: g_hash_table_realloc_key_or_value_array (ghash.c:382) +# ==2015061== by 0x48EC1A6: realloc_arrays (ghash.c:724) +# ==2015061== by 0x48EC2D7: g_hash_table_resize (ghash.c:895) +# ==2015061== by 0x48EC308: g_hash_table_maybe_resize (ghash.c:917) +# ==2015061== by 0x48EC530: g_hash_table_remove_internal (ghash.c:1775) +# ==2015061== by 0x48ECDBA: g_hash_table_remove (ghash.c:1802) +# ==2015061== by 0x487BF82: g_signal_handlers_destroy (gsignal.c:2841) +# ==2015061== by 0x486A853: g_object_real_dispose (gobject.c:1362) +# ==2015061== by 0x4A9E9D2: g_config_param_dispose (configuration.c:199) +# ==2015061== by 0x486BD35: g_object_unref (gobject.c:3867) +# ==2015061== by 0x48FADD2: g_list_foreach (glist.c:1092) +# ==2015061== by 0x48FADED: g_list_free_full (glist.c:246) +# ==2015061== by 0x4AA07A6: g_generic_config_dispose (configuration.c:1233) +# ==2015061== by 0x486BD35: g_object_unref (gobject.c:3867) +# ==2015061== by 0x4A8FC05: unload_main_config_parameters (params.c:146) +# ==2015061== by 0x4A8E552: unload_all_core_components (core.c:174) +# ==2015061== by 0x10ADFC: main (rost.c:470) +# ==2015061== + +{ + Need to fix all GObject memory leaks? + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + fun:g_realloc + fun:g_hash_table_realloc_key_or_value_array + fun:realloc_arrays + fun:g_hash_table_resize + fun:g_hash_table_maybe_resize + fun:g_hash_table_remove_internal + fun:g_hash_table_remove + fun:g_signal_handlers_destroy + fun:g_object_real_dispose + ... +} + + +# ==2017718== 32 bytes in 1 blocks are still reachable in loss record 474 of 1,026 +# ==2017718== at 0x484582F: realloc (vg_replace_malloc.c:1437) +# ==2017718== by 0x49044CD: g_realloc (gmem.c:201) +# ==2017718== by 0x49046E4: g_realloc_n (gmem.c:433) +# ==2017718== by 0x48EC186: realloc_arrays (ghash.c:723) +# ==2017718== by 0x48EC2D7: g_hash_table_resize (ghash.c:895) +# ==2017718== by 0x48EC308: g_hash_table_maybe_resize (ghash.c:917) +# ==2017718== by 0x48EC530: g_hash_table_remove_internal (ghash.c:1775) +# ==2017718== by 0x48ECDBA: g_hash_table_remove (ghash.c:1802) +# ==2017718== by 0x487BF28: g_signal_handlers_destroy (gsignal.c:2823) +# ==2017718== by 0x486A853: g_object_real_dispose (gobject.c:1362) +# ==2017718== by 0x4A9E9D2: g_config_param_dispose (configuration.c:199) +# ==2017718== by 0x486BD35: g_object_unref (gobject.c:3867) +# ==2017718== by 0x48FADD2: g_list_foreach (glist.c:1092) +# ==2017718== by 0x48FADED: g_list_free_full (glist.c:246) +# ==2017718== by 0x4AA07A6: g_generic_config_dispose (configuration.c:1233) +# ==2017718== by 0x486BD35: g_object_unref (gobject.c:3867) +# ==2017718== by 0x4A8FC05: unload_main_config_parameters (params.c:146) +# ==2017718== by 0x4A8E552: unload_all_core_components (core.c:174) +# ==2017718== by 0x10ADFC: main (rost.c:470) +# ==2017718== + + +{ + Need to fix all GObject memory leaks? + Memcheck:Leak + match-leak-kinds: reachable + fun:realloc + fun:g_realloc + fun:g_realloc_n + fun:realloc_arrays + fun:g_hash_table_resize + fun:g_hash_table_maybe_resize + fun:g_hash_table_remove_internal + fun:g_hash_table_remove + fun:g_signal_handlers_destroy + fun:g_object_real_dispose + ... +} + + +###################################################### + + +# ==2037130== 368 bytes in 23 blocks are still reachable in loss record 1,004 of 1,018 +# ==2037130== at 0x48455EF: calloc (vg_replace_malloc.c:1328) +# ==2037130== by 0x490447A: g_malloc0 (gmem.c:163) +# ==2037130== by 0x487FD25: type_set_qdata_W (gtype.c:3813) +# ==2037130== by 0x4880085: type_add_flags_W (gtype.c:3878) +# ==2037130== by 0x48857E1: g_type_register_dynamic (gtype.c:2901) +# ==2037130== by 0x4AA89FA: g_dynamic_types_register_type (dt.c:410) +# ==2037130== by 0x4AA8B59: build_dynamic_type (dt.c:510) +# ==2037130== by 0x4AAAC93: g_plugin_module_new (plugin.c:506) +# ==2037130== by 0x4AA9179: browse_directory_for_plugins (pglist.c:281) +# ==2037130== by 0x4AA916B: browse_directory_for_plugins (pglist.c:277) +# ==2037130== by 0x4AA916B: browse_directory_for_plugins (pglist.c:277) +# ==2037130== by 0x4AA8CE2: init_all_plugins (pglist.c:91) +# ==2037130== by 0x10AB0A: main (rost.c:372) +# ==2037130== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:g_malloc0 + fun:type_set_qdata_W + fun:type_add_flags_W + fun:g_type_register_dynamic + ... +} + + +# ==2037232== 1,840 bytes in 23 blocks are still reachable in loss record 1,008 of 1,018 +# ==2037232== at 0x48455EF: calloc (vg_replace_malloc.c:1328) +# ==2037232== by 0x490447A: g_malloc0 (gmem.c:163) +# ==2037232== by 0x4881971: type_data_make_W (gtype.c:1141) +# ==2037232== by 0x4881F8C: type_data_ref_Wm (gtype.c:1272) +# ==2037232== by 0x4882ECF: g_type_class_ref (gtype.c:3034) +# ==2037232== by 0x486CE03: g_object_new_with_properties (gobject.c:2370) +# ==2037232== by 0x486D625: g_object_new (gobject.c:2037) +# ==2037232== by 0x4AAACB8: g_plugin_module_new (plugin.c:512) +# ==2037232== by 0x4AA9179: browse_directory_for_plugins (pglist.c:281) +# ==2037232== by 0x4AA916B: browse_directory_for_plugins (pglist.c:277) +# ==2037232== by 0x4AA916B: browse_directory_for_plugins (pglist.c:277) +# ==2037232== by 0x4AA8CE2: init_all_plugins (pglist.c:91) +# ==2037232== by 0x10AB0A: main (rost.c:372) +# ==2037232== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + fun:g_malloc0 + fun:type_data_make_W + fun:type_data_ref_Wm + fun:g_type_class_ref + fun:g_object_new_with_properties + fun:g_object_new + ... +} + + +# ==2038514== 104 bytes in 1 blocks are still reachable in loss record 845 of 1,018 +# ==2038514== at 0x485FAD8: freelist_alloc (gatomicarray.c:85) +# ==2038514== by 0x485FB57: _g_atomic_array_copy (gatomicarray.c:141) +# ==2038514== by 0x4881259: type_node_add_iface_entry_W (gtype.c:1429) +# ==2038514== by 0x4882417: type_add_interface_Wm (gtype.c:1501) +# ==2038514== by 0x4884B69: g_type_add_interface_static (gtype.c:2939) +# ==2038514== by 0x4A7FA23: g_imm_operand_get_type_once (feeder.c:125) +# ==2038514== by 0x4A7F93F: g_imm_operand_get_type (feeder.c:125) +# ==2038514== by 0x4A8FEE1: register_arch_gtypes (processors.c:83) +# ==2038514== by 0x4A8E4D4: load_all_core_components (core.c:122) +# ==2038514== by 0x10AAF5: main (rost.c:369) +# ==2038514== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:freelist_alloc + fun:_g_atomic_array_copy + fun:type_node_add_iface_entry_W + fun:type_add_interface_Wm + fun:g_type_add_interface_static + fun:*_get_type_once + ... +} + + +# ==2038565== 80 bytes in 1 blocks are still reachable in loss record 651 of 1,018 +# ==2038565== at 0x485FAD8: freelist_alloc (gatomicarray.c:85) +# ==2038565== by 0x485FB57: _g_atomic_array_copy (gatomicarray.c:141) +# ==2038565== by 0x4881259: type_node_add_iface_entry_W (gtype.c:1429) +# ==2038565== by 0x4882417: type_add_interface_Wm (gtype.c:1501) +# ==2038565== by 0x4884B69: g_type_add_interface_static (gtype.c:2939) +# ==2038565== by 0x217225B5: ??? +# ==2038565== by 0x21722507: ??? +# ==2038565== by 0x2171E32A: ??? +# ==2038565== by 0x2171E360: ??? +# ==2038565== by 0x4AABC1C: g_plugin_module_load (plugin.c:1115) +# ==2038565== by 0x4AA9635: load_remaning_plugins (pglist.c:478) +# ==2038565== by 0x4AA8D81: init_all_plugins (pglist.c:108) +# ==2038565== by 0x10AB0A: main (rost.c:372) +# ==2038565== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:freelist_alloc + fun:_g_atomic_array_copy + fun:type_node_add_iface_entry_W + fun:type_add_interface_Wm + fun:g_type_add_interface_static + obj:* + obj:* + obj:* + obj:* + ... +} + + +###################################################### + + +# XML2 + +# ==2031163== 104 bytes in 1 blocks are still reachable in loss record 874 of 1,018 +# ==2031163== at 0x48407B4: malloc (vg_replace_malloc.c:381) +# ==2031163== by 0x4E5D79A: xmlNewRMutex (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.14) +# ==2031163== by 0x4EC04AC: __xmlInitializeDict (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.14) +# ==2031163== by 0x4EC053C: __xmlRandom (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.14) +# ==2031163== by 0x4DFFBA5: xmlHashCreate (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.14) +# ==2031163== by 0x4E495EF: xmlXPathNewContext (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.14) +# ==2031163== by 0x4A8CAB0: create_new_xml_file (xml.c:70) +# ==2031163== by 0x4AA0BFF: g_generic_config_write (configuration.c:1485) +# ==2031163== by 0x4AA9EFA: g_plugin_module_dispose (plugin.c:173) +# ==2031163== by 0x486BD35: g_object_unref (gobject.c:3867) +# ==2031163== by 0x4AA94F0: on_plugin_ref_toggle (pglist.c:393) +# ==2031163== by 0x486B946: toggle_refs_notify (gobject.c:3599) +# ==2031163== by 0x486BCB7: g_object_unref (gobject.c:3806) +# ==2031163== by 0x4AA8F23: exit_all_plugins (pglist.c:162) +# ==2031163== by 0x10AE01: main (rost.c:476) +# ==2031163== + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:xmlNewRMutex + fun:__xmlInitializeDict + fun:__xmlRandom + fun:xmlHashCreate + fun:xmlXPathNewContext + fun:create_new_xml_file + fun:g_generic_config_write + fun:g_plugin_module_dispose + fun:g_object_unref + fun:on_plugin_ref_toggle + fun:toggle_refs_notify + fun:g_object_unref + fun:exit_all_plugins + fun:main +} + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:xmlNewRMutex + fun:__xmlInitializeDict + fun:__xmlRandom + fun:xmlHashCreate + fun:xmlXPathNewContext + fun:create_new_xml_file + fun:g_generic_config_write + fun:g_plugin_module_dispose + fun:g_object_unref + fun:on_plugin_ref_toggle + fun:exit_all_plugins + fun:main +} + + +# Python... + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + ... + fun:PyImport_Import + fun:create_python_plugin + fun:load_python_plugins + fun:chrysalide_plugin_on_plugins_loaded + fun:load_remaning_plugins + fun:init_all_plugins + fun:main +} + +{ + <insert_a_suppression_name_here> + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + ... + fun:PyImport_Import + fun:PyImport_ImportModule + fun:chrysalide_plugin_init + fun:g_plugin_module_load + fun:load_remaning_plugins + fun:init_all_plugins + fun:main +} diff --git a/tools/yara2rost/Makefile.am b/tools/yara2rost/Makefile.am new file mode 100644 index 0000000..2830b03 --- /dev/null +++ b/tools/yara2rost/Makefile.am @@ -0,0 +1,36 @@ + +BUILT_SOURCES = grammar.h + + +# On évite d'utiliser les variables personnalisées de type *_la_[YL]FLAGS +# afin de conserver des noms de fichiers simples, ie sans le nom de la +# bibliothèque de sortie en préfixe. + +AM_YFLAGS = -v -d -p yara2rost_ -Wno-yacc #-Wcounterexamples + +AM_LFLAGS = -P yara2rost_ -o lex.yy.c --header-file=tokens.h \ + -Dyyget_lineno=yara2rost_get_lineno \ + -Dyy_scan_bytes=yara2rost__scan_bytes \ + -Dyy_delete_buffer=yara2rost__delete_buffer + +AM_CFLAGS = $(DEBUG_CFLAGS) $(WARNING_FLAGS) + + +bin_PROGRAMS = yara2rost + +.NOTPARALLEL: $(bin_PROGRAMS) + +yara2rost_SOURCES = \ + decl.h \ + enums.h \ + tokens.l \ + grammar.y \ + yara2rost.c + + +# Automake fait les choses à moitié +CLEANFILES = grammar.h grammar.c grammar.output tokens.c tokens.h + +# Pareil : de tous les fichiers générés, seule la sortie de Flex saute pour les distributions ! +# On rajoute également de quoi générer les Makefiles. +EXTRA_DIST = tokens.h diff --git a/tools/yara2rost/decl.h b/tools/yara2rost/decl.h new file mode 100644 index 0000000..05d63d4 --- /dev/null +++ b/tools/yara2rost/decl.h @@ -0,0 +1,37 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * decl.h - déclarations de prototypes utiles + * + * Copyright (C) 2023 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>. + */ + + +#ifndef _TOOLS_YARA2ROST_DECL_H +#define _TOOLS_YARA2ROST_DECL_H + + +#include <stdbool.h> + + + +/* Parcourt des définitions de règles pour traduction. */ +bool process_rules_definitions(const char *, size_t); + + + +#endif /* _TOOLS_YARA2ROST_DECL_H */ diff --git a/tools/yara2rost/demo.yar b/tools/yara2rost/demo.yar new file mode 100644 index 0000000..081973f --- /dev/null +++ b/tools/yara2rost/demo.yar @@ -0,0 +1,27 @@ + +include "demobis.yar" + +import "modname" + + +private global rule Test : tag1 tag2 { + + meta: + desc_0 = "abc" + desc_1 = 123 + desc_2 = true + desc_3 = false + desc_z = "" + + strings: + $text = "value" + $text_b = "value" wide ascii fullword private xor(0x12) + $re = /hash: [0-9a-fA-F]{32}/ + $re_b = /hash: [0-9a-fA-F]{32}/ wide ascii nocase fullword private + $hex = { AA bb [2-4] 61 62 63 } + $hex_b = { AA bb [2-4] 61 62 63 } private + + condition: + filesize == 123 and entrypoint == 456 and for all of ($text*) : ( @ > @hex_b ) and any of them + +} diff --git a/tools/yara2rost/enums.h b/tools/yara2rost/enums.h new file mode 100644 index 0000000..19fe49c --- /dev/null +++ b/tools/yara2rost/enums.h @@ -0,0 +1,47 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * enums.h - Reprise des fanions de la syntaxe YARA + * + * Copyright (C) 2023 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>. + */ + + +#ifndef _TOOLS_YARA2ROST_ENUMS_H +#define _TOOLS_YARA2ROST_ENUMS_H + + +typedef enum _RuleFlags +{ + RULE_FLAGS_NONE = (0 << 0), + RULE_FLAGS_PRIVATE = (1 << 0), + RULE_FLAGS_GLOBAL = (1 << 1) + +} RuleFlags; + +typedef enum _StringExtraFlags +{ + STRING_FLAGS_NONE = (0 << 0), + STRING_FLAGS_NO_CASE = (1 << 0), + STRING_FLAGS_FULL_WORD = (1 << 1), + STRING_FLAGS_PRIVATE = (1 << 2) + +} StringExtraFlags; + + + +#endif /* _TOOLS_YARA2ROST_ENUMS_H */ diff --git a/tools/yara2rost/grammar.y b/tools/yara2rost/grammar.y new file mode 100644 index 0000000..0d756b1 --- /dev/null +++ b/tools/yara2rost/grammar.y @@ -0,0 +1,1332 @@ + +%{ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + + +#include "decl.h" +#include "tokens.h" + + + +/* Affiche un message d'erreur suite à l'analyse en échec. */ +static int yyerror(yyscan_t, const char *); + +/* Initialise une amorce de copie. */ +static void init_dump(sz_str_t *, const sz_cst_str_t *); + +#define init_dump_with_fixed(d, s) \ + init_dump(d, (sz_cst_str_t []) { { .data = s, .len = sizeof(s) - 1 } }) + +/* Complète une chaîne de caractères avec une autre. */ +static void add_to_dump(sz_str_t *, const sz_cst_str_t *); + +#define add_fixed_to_dump(d, s) \ + add_to_dump(d, (sz_cst_str_t []) { { .data = s, .len = sizeof(s) - 1 } }) + +#define add_dyn_to_dump(d, s) \ + do \ + { \ + add_to_dump(d, (sz_cst_str_t *)s); \ + free((s)->data); \ + } \ + while (0) + +/* Imprime une bribe de définition formant une règle ROST. */ +void dump_string(const char *, size_t); + +#define dump_fixed_string(s) \ + dump_string(s, sizeof(s) - 1) + + +%} + + +%code requires { + +#include <stdbool.h> +#include <sys/types.h> + +#include "enums.h" + +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; + + +typedef struct _sz_str_t +{ + char *data; + size_t len; + +} sz_str_t; + +typedef struct _sz_cst_str_t +{ + char *data; + size_t len; + +} sz_cst_str_t; + +} + +%union { + + sz_str_t string; /* Chaîne de caractères #1 */ + sz_cst_str_t cstring; /* Chaîne de caractères #2 */ + + RuleFlags rule_flags; /* Fanions pour règle */ + StringExtraFlags string_flags; /* Fanions pour motif */ + +} + + +%expect 1 + +%define api.pure full +%define parse.error verbose + +%parse-param { yyscan_t yyscanner } +%lex-param { yyscan_t yyscanner } + + +%code provides { + +#define YY_DECL \ + int yara2rost_lex(YYSTYPE *yylval_param, yyscan_t yyscanner) + +YY_DECL; + +} + +%token COLON ":" +%token CURLY_BRACKET_O "{" +%token CURLY_BRACKET_C "}" +%token EQUAL "=" +%token PAREN_O "(" +%token PAREN_C ")" +%token DOT_DOT ".." +%token COMMA "," +%token BRACKET_O "[" +%token BRACKET_C "]" +%token PERCENT "%" +%token DOT "." + +%token ADD_OP "+" +%token SUB_OP "-" +%token MUL_OP "*" +%token DIV_OP "\\" +%token EOR_OP "^" +%token AND_OP "&" +%token OR_OP "|" +%token INV_OP "~" +%token SHIFT_LEFT_OP "<<" +%token SHIFT_RIGHT_OP ">>" + +%token LT "<" +%token GT ">" +%token LE "<=" +%token GE ">=" +%token EQ "==" +%token NEQ "!=" + +%token ALL "all" +%token AND "and" +%token ANY "any" +%token ASCII "ascii" +%token AT "at" +%token BASE64 "base64" +%token BASE64WIDE "base64wide" +%token CONDITION "condition" +%token CONTAINS "contains" +%token DEFINED "defined" +%token ENDSWITH "endswith" +%token ENTRYPOINT "entrypoint" +%token FILESIZE "filesize" +%token FOR "for" +%token FULLWORD "fullword" +%token GLOBAL "global" +%token ICONTAINS "icontains" +%token IENDSWITH "iendswith" +%token IEQUALS "iequals" +%token IMPORT "import" +%token IN "in" +%token INCLUDE "include" +%token ISTARTSWITH "istartswith" +%token MATCHES "matches" +%token META "meta" +%token NOCASE "nocase" +%token NONE "none" +%token NOT "not" +%token OF "of" +%token OR "or" +%token PRIVATE "private" +%token RULE "rule" +%token STARTSWITH "startswith" +%token STRINGS "strings" +%token THEM "them" +%token WIDE "wide" +%token XOR "xor" + +%token _FALSE "false" +%token _TRUE "true" + +%token STRING_IDENTIFIER_WITH_WILDCARD +%token STRING_IDENTIFIER +%token STRING_COUNT +%token STRING_OFFSET +%token STRING_LENGTH +%token INTEGER_FUNCTION +%token IDENTIFIER +%token NUMBER +%token DOUBLE +%token TEXT_STRING +%token REGEXP +%token HEX_STRING + +%type <cstring> STRING_IDENTIFIER_WITH_WILDCARD +%type <cstring> STRING_IDENTIFIER +%type <cstring> STRING_COUNT +%type <cstring> STRING_OFFSET +%type <cstring> STRING_LENGTH +%type <cstring> INTEGER_FUNCTION +%type <cstring> IDENTIFIER +%type <cstring> NUMBER +%type <cstring> DOUBLE +%type <cstring> TEXT_STRING +%type <cstring> REGEXP +%type <cstring> HEX_STRING + +%type <rule_flags> rule_modifiers +%type <rule_flags> rule_modifier + +%type <string_flags> string_modifiers +%type <string_flags> string_modifier +%type <string_flags> regexp_modifiers +%type <string_flags> regexp_modifier +%type <string_flags> hex_modifiers +%type <string_flags> hex_modifier + +%type <string> boolean_expression +%type <string> identifier +%type <string> arguments +%type <string> arguments_list +%type <string> expression +%type <string> for_iteration +%type <string> for_variables +%type <string> iterator +%type <string> set +%type <string> range +%type <string> enumeration +%type <string> string_iterator +%type <string> string_set +%type <string> string_enumeration +%type <string> string_enumeration_item +%type <string> rule_set +%type <string> rule_enumeration +%type <string> rule_enumeration_item +%type <string> for_expression +%type <string> for_quantifier +%type <string> primary_expression +%type <string> regexp + +%left OR +%left AND +%right NOT DEFINED +%left EQ NEQ CONTAINS ICONTAINS STARTSWITH ENDSWITH ISTARTSWITH IENDSWITH IEQUALS MATCHES +%left LT LE GT GE +%left OR_OP +%left EOR_OP +%left AND_OP +%left SHIFT_LEFT_OP SHIFT_RIGHT_OP +%left ADD_OP SUB_OP +%left MUL_OP DIV_OP PERCENT +%right INV_OP UNARY_MINUS + + +%% + + rules : /* empty */ + | rules include + | rules import + | rules rule + ; + + + include : "include" TEXT_STRING + { + dump_fixed_string("include "); + dump_string($2.data, $2.len); + dump_fixed_string("\n"); + } + ; + + import : "import" TEXT_STRING + { + dump_fixed_string("/* import "); + dump_string($2.data, $2.len); + dump_fixed_string(" */\n"); + } + ; + + + rule : rule_modifiers "rule" IDENTIFIER + { + if ($1 != RULE_FLAGS_NONE) + { + if ($1 & RULE_FLAGS_PRIVATE) + { + dump_fixed_string("private"); + dump_fixed_string(" "); + } + + if ($1 & RULE_FLAGS_GLOBAL) + { + dump_fixed_string("global"); + dump_fixed_string(" "); + } + + } + + dump_fixed_string("rule "); + dump_string($3.data, $3.len); + + } + tags "{" + { + dump_fixed_string(" {\n"); + } + meta strings condition "}" + { + dump_fixed_string("}\n"); + } + ; + + + rule_modifiers : /* empty */ + { + $$ = RULE_FLAGS_NONE; + } + | rule_modifiers rule_modifier + { + $$ = $1 | $2; + } + ; + + rule_modifier : "private" + { + $$ = RULE_FLAGS_PRIVATE; + } + | "global" + { + $$ = RULE_FLAGS_GLOBAL; + } + ; + + + tags : /* empty */ + | ":" + { + dump_fixed_string(" :"); + } + tag_list + ; + + tag_list : IDENTIFIER + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + } + | tag_list IDENTIFIER + { + dump_fixed_string(" "); + dump_string($2.data, $2.len); + } + ; + + +/** + * Section "meta:" + */ + + meta : /* empty */ + | "meta" ":" + { + dump_fixed_string("\n "); + dump_fixed_string("meta:\n"); + } + meta_declarations + ; + + meta_declarations : meta_declaration + { + dump_fixed_string("\n"); + } + | meta_declarations meta_declaration + { + dump_fixed_string("\n"); + } + ; + + meta_declaration : IDENTIFIER "=" TEXT_STRING + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = "); + dump_string($3.data, $3.len); + } + | IDENTIFIER "=" NUMBER + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = "); + dump_string($3.data, $3.len); + } + | IDENTIFIER "=" "-" NUMBER + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = -"); + dump_string($4.data, $4.len); + } + | IDENTIFIER "=" "true" + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = true"); + } + | IDENTIFIER "=" "false" + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = false"); + } + ; + + +/** + * Section "strings:" + */ + + strings : /* empty */ + | "strings" ":" + { + dump_fixed_string("\n "); + dump_fixed_string("bytes:\n"); + } + string_declarations + ; + + string_declarations : string_declaration + { + dump_fixed_string("\n"); + } + | string_declarations string_declaration + { + dump_fixed_string("\n"); + } + ; + + string_declaration : STRING_IDENTIFIER "=" + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = "); + } + TEXT_STRING + { + dump_string($4.data, $4.len); + } + string_modifiers + { + if ($6 & STRING_FLAGS_NO_CASE) + dump_fixed_string(" nocase"); + + if ($6 & STRING_FLAGS_FULL_WORD) + dump_fixed_string(" fullword"); + + if ($6 & STRING_FLAGS_PRIVATE) + dump_fixed_string(" private"); + + } + | STRING_IDENTIFIER "=" + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = "); + } + REGEXP + { + dump_fixed_string("/"); + dump_string($4.data, $4.len); + } + regexp_modifiers + { + if ($6 & STRING_FLAGS_NO_CASE) + dump_fixed_string(" nocase"); + + if ($6 & STRING_FLAGS_FULL_WORD) + dump_fixed_string(" fullword"); + + if ($6 & STRING_FLAGS_PRIVATE) + dump_fixed_string(" private"); + + } + | STRING_IDENTIFIER "=" + { + dump_fixed_string(" "); + dump_string($1.data, $1.len); + dump_fixed_string(" = "); + } + HEX_STRING + { + dump_string($4.data, $4.len); + } + hex_modifiers + { + if ($6 & STRING_FLAGS_NO_CASE) + dump_fixed_string(" nocase"); + + if ($6 & STRING_FLAGS_FULL_WORD) + dump_fixed_string(" fullword"); + + if ($6 & STRING_FLAGS_PRIVATE) + dump_fixed_string(" private"); + + } + ; + + + string_modifiers : /* empty */ + { + $$ = STRING_FLAGS_NONE; + } + | string_modifiers string_modifier + { + $$ = $1 | $2; + } + ; + + string_modifier : "wide" + { + dump_fixed_string(" wide"); + $$ = STRING_FLAGS_NONE; + } + | "ascii" + { + dump_fixed_string(" plain"); + $$ = STRING_FLAGS_NONE; + } + | "nocase" + { + $$ = STRING_FLAGS_NO_CASE; + } + | "fullword" + { + $$ = STRING_FLAGS_FULL_WORD; + } + | "private" + { + $$ = STRING_FLAGS_PRIVATE; + } + | "xor" + { + dump_fixed_string(" xor"); + $$ = STRING_FLAGS_NONE; + } + | "xor" "(" NUMBER ")" + { + dump_fixed_string(" xor("); + dump_string($3.data, $3.len); + dump_fixed_string(")"); + $$ = STRING_FLAGS_NONE; + } + | "xor" "(" NUMBER "-" NUMBER ")" + { + dump_fixed_string(" xor("); + dump_string($3.data, $3.len); + dump_fixed_string("-"); + dump_string($5.data, $5.len); + dump_fixed_string(")"); + $$ = STRING_FLAGS_NONE; + } + | "base64" + { + dump_fixed_string(" base64"); + $$ = STRING_FLAGS_NONE; + } + | "base64" "(" TEXT_STRING ")" + { + dump_fixed_string(" base64("); + dump_string($3.data, $3.len); + dump_fixed_string(")"); + $$ = STRING_FLAGS_NONE; + } + | "base64wide" + { + dump_fixed_string(" (base64 | wide)"); + $$ = STRING_FLAGS_NONE; + } + | "base64wide" "(" TEXT_STRING ")" + { + dump_fixed_string(" (base64("); + dump_string($3.data, $3.len); + dump_fixed_string(") | wide)"); + $$ = STRING_FLAGS_NONE; + } + ; + + regexp_modifiers : /* empty */ + { + $$ = STRING_FLAGS_NONE; + } + | regexp_modifiers regexp_modifier + { + $$ = $1 | $2; + } + ; + + regexp_modifier : "wide" + { + dump_fixed_string(" wide"); + $$ = STRING_FLAGS_NONE; + } + | "ascii" + { + dump_fixed_string(" plain"); + $$ = STRING_FLAGS_NONE; + } + | "nocase" + { + $$ = STRING_FLAGS_NO_CASE; + } + | "fullword" + { + $$ = STRING_FLAGS_FULL_WORD; + } + | "private" + { + $$ = STRING_FLAGS_PRIVATE; + } + ; + + hex_modifiers : /* empty */ + { + $$ = STRING_FLAGS_NONE; + } + | hex_modifiers hex_modifier + { + $$ = $1 | $2; + } + ; + + hex_modifier : "private" + { + $$ = STRING_FLAGS_PRIVATE; + } + ; + + +/** + * Section "condition:" + */ + + condition : "condition" ":" boolean_expression + { + dump_fixed_string("\n "); + dump_fixed_string("condition:\n"); + dump_fixed_string(" "); + dump_string($3.data, $3.len); + free($3.data); + dump_fixed_string("\n\n"); + } + ; + + boolean_expression : expression { $$ = $1; } + ; + + identifier : IDENTIFIER + { + init_dump(&$$, &$1); + } + | identifier "." IDENTIFIER + { + $$ = $1; + add_fixed_to_dump(&$$, "."); + add_to_dump(&$$, &$3); + } + | identifier "[" primary_expression "]" + { + $$ = $1; + add_fixed_to_dump(&$$, "["); + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, "]"); + } + | identifier "(" arguments ")" + { + $$ = $1; + add_fixed_to_dump(&$$, "("); + if ($3.len > 0) + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, ")"); + } + ; + + + arguments : { $$.len = 0; /* empty */ } + | arguments_list { $$ = $1; } + ; + + + arguments_list : expression + { + $$ = $1; + } + | arguments_list "," expression + { + $$ = $1; + add_fixed_to_dump(&$$, ", "); + add_dyn_to_dump(&$$, &$3); + } + ; + + + expression : "true" + { + init_dump_with_fixed(&$$, "true"); + } + | "false" + { + init_dump_with_fixed(&$$, "false"); + } + | primary_expression "matches" regexp + { + $$ = $1; + add_fixed_to_dump(&$$, " matches "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "contains" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " contains "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "icontains" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " icontains "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "startswith" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " startswith "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "istartswith" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " istartswith "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "endswith" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " endswith "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "iendswith" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " iendswith "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "iequals" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " iequals "); + add_dyn_to_dump(&$$, &$3); + } + | STRING_IDENTIFIER + { + init_dump(&$$, &$1); + } + | STRING_IDENTIFIER "at" primary_expression + { + init_dump(&$$, &$1); + add_fixed_to_dump(&$$, " at "); + add_dyn_to_dump(&$$, &$3); + } + | STRING_IDENTIFIER "in" range + { + init_dump(&$$, &$1); + add_fixed_to_dump(&$$, " in "); + add_dyn_to_dump(&$$, &$3); + } + | "for" for_expression for_iteration ":" "(" boolean_expression ")" + { + init_dump_with_fixed(&$$, "for "); + add_dyn_to_dump(&$$, &$2); + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, " : ("); + add_dyn_to_dump(&$$, &$6); + add_fixed_to_dump(&$$, ")"); + } + | for_expression "of" string_set + { + $$ = $1; + add_fixed_to_dump(&$$, " of "); + add_dyn_to_dump(&$$, &$3); + } + | for_expression "of" rule_set + { + $$ = $1; + add_fixed_to_dump(&$$, " of "); + add_dyn_to_dump(&$$, &$3); + } + + | primary_expression "%" "of" string_set + { + $$ = $1; + add_fixed_to_dump(&$$, "% of "); + add_dyn_to_dump(&$$, &$4); + } + | primary_expression "%" "of" rule_set + { + $$ = $1; + add_fixed_to_dump(&$$, "% of "); + add_dyn_to_dump(&$$, &$4); + } + + | for_expression "of" string_set "in" range + { + $$ = $1; + add_fixed_to_dump(&$$, " of "); + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, " in "); + add_dyn_to_dump(&$$, &$5); + } + | for_expression "of" string_set "at" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " of "); + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, " at "); + add_dyn_to_dump(&$$, &$5); + } + | "not" boolean_expression + { + init_dump_with_fixed(&$$, "not "); + add_dyn_to_dump(&$$, &$2); + } + | "defined" boolean_expression + { + init_dump_with_fixed(&$$, "defined "); + add_dyn_to_dump(&$$, &$2); + } + | boolean_expression "and" boolean_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " and "); + add_dyn_to_dump(&$$, &$3); + } + | boolean_expression "or" boolean_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " or "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "<" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " < "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression ">" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " > "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "<=" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " <= "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression ">=" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " >= "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "==" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " == "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "!=" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " != "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression + { + $$ = $1; + } + | "(" expression ")" + { + init_dump_with_fixed(&$$, "("); + add_dyn_to_dump(&$$, &$2); + add_fixed_to_dump(&$$, ")"); + } + ; + + + for_iteration : for_variables "in" iterator + { + $$ = $1; + add_fixed_to_dump(&$$, " in "); + add_dyn_to_dump(&$$, &$3); + } + | "of" string_iterator + { + init_dump_with_fixed(&$$, "of "); + add_dyn_to_dump(&$$, &$2); + } + ; + + for_variables : IDENTIFIER + { + init_dump(&$$, &$1); + } + | for_variables "," IDENTIFIER + { + $$ = $1; + add_fixed_to_dump(&$$, ", "); + add_to_dump(&$$, &$3); + } + ; + + + iterator : identifier { $$ = $1; } + | set { $$ = $1; } + ; + + + set : "(" enumeration ")" + { + init_dump_with_fixed(&$$, "("); + add_dyn_to_dump(&$$, &$2); + add_fixed_to_dump(&$$, ")"); + } + | range { $$ = $1; } + ; + + + range : "(" primary_expression ".." primary_expression ")" + { + init_dump_with_fixed(&$$, "("); + add_dyn_to_dump(&$$, &$2); + add_fixed_to_dump(&$$, " .. "); + add_dyn_to_dump(&$$, &$4); + add_fixed_to_dump(&$$, ")"); + } + ; + + + enumeration : primary_expression { $$ = $1; } + | enumeration "," primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, ", "); + add_dyn_to_dump(&$$, &$3); + } + ; + + + string_iterator : string_set { $$ = $1; } + ; + + string_set : "(" string_enumeration ")" + { + init_dump_with_fixed(&$$, "("); + add_dyn_to_dump(&$$, &$2); + add_fixed_to_dump(&$$, ")"); + } + | "them" + { + init_dump_with_fixed(&$$, "them"); + } + ; + + string_enumeration : string_enumeration_item + { + $$ = $1; + } + | string_enumeration "," string_enumeration_item + { + $$ = $1; + add_fixed_to_dump(&$$, ", "); + add_dyn_to_dump(&$$, &$3); + } + ; + +string_enumeration_item : STRING_IDENTIFIER + { + init_dump(&$$, &$1); + } + | STRING_IDENTIFIER_WITH_WILDCARD + { + init_dump(&$$, &$1); + } + ; + + + rule_set : "(" rule_enumeration ")" + { + init_dump_with_fixed(&$$, "("); + add_dyn_to_dump(&$$, &$2); + add_fixed_to_dump(&$$, ")"); + } + ; + + rule_enumeration : rule_enumeration_item + { + $$ = $1; + } + | rule_enumeration "," rule_enumeration_item + { + $$ = $1; + add_fixed_to_dump(&$$, ", "); + add_dyn_to_dump(&$$, &$3); + } + ; + + rule_enumeration_item : IDENTIFIER + { + init_dump(&$$, &$1); + } + | IDENTIFIER "*" + { + init_dump(&$$, &$1); + add_fixed_to_dump(&$$, "*"); + } + ; + + + for_expression : primary_expression { $$ = $1; } + | for_quantifier { $$ = $1; } + ; + + for_quantifier : "all" + { + init_dump_with_fixed(&$$, "all"); + } + | "any" + { + init_dump_with_fixed(&$$, "any"); + } + | "none" + { + init_dump_with_fixed(&$$, "none"); + } + ; + + + primary_expression : "(" primary_expression ")" + { + init_dump_with_fixed(&$$, "("); + add_dyn_to_dump(&$$, &$2); + add_fixed_to_dump(&$$, ")"); + } + | "filesize" + { + init_dump_with_fixed(&$$, "datasize"); + } + | "entrypoint" + { + init_dump_with_fixed(&$$, "/* entrypoint */ 0"); + } + | INTEGER_FUNCTION "(" primary_expression ")" + { + init_dump(&$$, &$1); + add_fixed_to_dump(&$$, "("); + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, ")"); + } + | NUMBER + { + init_dump(&$$, &$1); + } + | DOUBLE + { + init_dump(&$$, &$1); + } + | TEXT_STRING + { + init_dump(&$$, &$1); + } + | STRING_COUNT "in" range + { + init_dump(&$$, &$1); + add_fixed_to_dump(&$$, " in "); + add_dyn_to_dump(&$$, &$3); + } + | STRING_COUNT + { + init_dump(&$$, &$1); + } + | STRING_OFFSET "[" primary_expression "]" + { + init_dump(&$$, &$1); + add_fixed_to_dump(&$$, "["); + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, "]"); + } + | STRING_OFFSET + { + init_dump(&$$, &$1); + } + | STRING_LENGTH "[" primary_expression "]" + { + init_dump(&$$, &$1); + add_fixed_to_dump(&$$, "["); + add_dyn_to_dump(&$$, &$3); + add_fixed_to_dump(&$$, "]"); + } + | STRING_LENGTH + { + init_dump(&$$, &$1); + } + | identifier + { + $$ = $1; + } + | "-" primary_expression %prec UNARY_MINUS + { + init_dump_with_fixed(&$$, "-"); + add_dyn_to_dump(&$$, &$2); + } + | primary_expression "+" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " + "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "-" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " - "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "*" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " * "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "\\" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " \\ "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "%" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " % "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "^" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " ^ "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "&" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " & "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression "|" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " | "); + add_dyn_to_dump(&$$, &$3); + } + | "~" primary_expression + { + init_dump_with_fixed(&$$, "~"); + add_dyn_to_dump(&$$, &$2); + } + | primary_expression "<<" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " << "); + add_dyn_to_dump(&$$, &$3); + } + | primary_expression ">>" primary_expression + { + $$ = $1; + add_fixed_to_dump(&$$, " >> "); + add_dyn_to_dump(&$$, &$3); + } + | regexp + ; + + + regexp : REGEXP + { + init_dump_with_fixed(&$$, "/"); + add_to_dump(&$$, &$1); + } + ; + + +%% + + +/****************************************************************************** +* * +* Paramètres : yyscanner = décodeur impliqué dans le processus. * +* msg = message d'erreur. * +* * +* Description : Affiche un message d'erreur suite à l'analyse en échec. * +* * +* Retour : 0 * +* * +* Remarques : - * +* * +******************************************************************************/ + +static int yyerror(yyscan_t yyscanner, const char *msg) +{ + printf("YYERROR line %d: %s\n", yyget_lineno(yyscanner), msg); + + return 0; + +} + + +/****************************************************************************** +* * +* Paramètres : dst = chaîne de caractères à créer. * +* src = chaîne de caractères à ajouter. * +* * +* Description : Initialise une amorce de copie. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void init_dump(sz_str_t *dst, const sz_cst_str_t *src) +{ + dst->data = malloc((src->len + 1) * sizeof(char)); + dst->len = src->len; + + memcpy(dst->data, src->data, src->len); + + dst->data[dst->len] = '\0'; + +} + + +/****************************************************************************** +* * +* Paramètres : dst = chaîne de caractères à créer. * +* src = chaîne de caractères à ajouter. * +* * +* Description : Complète une chaîne de caractères avec une autre. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void add_to_dump(sz_str_t *dst, const sz_cst_str_t *src) +{ + dst->data = realloc(dst->data, (dst->len + src->len + 1) * sizeof(char)); + + memcpy(&dst->data[dst->len], src->data, src->len); + + dst->len += src->len; + + dst->data[dst->len] = '\0'; + +} + + +/****************************************************************************** +* * +* Paramètres : string = texte à copier sur la sortie standard. * +* length = longueur de ce texte. * +* * +* Description : Imprime une bribe de définition formant une règle ROST. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +void dump_string(const char *string, size_t length) +{ + ssize_t ret; /* Bilan de l'appel */ + + ret = write(STDOUT_FILENO, string, length); + + if (ret != length) + perror("write"); + +} + + +/****************************************************************************** +* * +* Paramètres : text = définitions des règles à charger. * +* length = longueur de ces définitions. * +* * +* Description : Parcourt des définitions de règles pour traduction. * +* * +* Retour : Bilan à retourner. * +* * +* Remarques : - * +* * +******************************************************************************/ + +bool process_rules_definitions(const char *text, size_t length) +{ + bool result; /* Bilan à renvoyer */ + yyscan_t lexstate; /* Gestion d'analyse lexicale */ + YY_BUFFER_STATE state; /* Contexte d'analyse */ + int status; /* Bilan d'une analyse */ + + result = false; + + yara2rost_lex_init(&lexstate); + + state = yara2rost__scan_bytes(text, length, lexstate); + + status = yyparse(lexstate); + + result = (status == EXIT_SUCCESS); + + yy_delete_buffer(state, lexstate); + + yara2rost_lex_destroy(lexstate); + + return result; + +} diff --git a/tools/yara2rost/tokens.l b/tools/yara2rost/tokens.l new file mode 100644 index 0000000..34e61d0 --- /dev/null +++ b/tools/yara2rost/tokens.l @@ -0,0 +1,292 @@ + +%top { + +#include "grammar.h" + +} + + +%{ + +#include "decl.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> + + +#define PUSH_STATE(s) yy_push_state(s, yyscanner) +#define POP_STATE yy_pop_state(yyscanner) + +%} + + +%option bison-bridge reentrant +%option stack +%option nounput +%option noinput +%option noyywrap +%option noyy_top_state +%option yylineno +%option never-interactive + + +%x regexp +%x comment + + +str_not_escaped [^\"\\] +str_escaped \\a|\\b|\\t|\\n|\\v|\\f|\\r|\\e|\\\"|\\\\|\\x{hbyte} +str_mixed ({str_not_escaped}|{str_escaped}) + +hbyte [0-9a-fA-F]{2} + +digit [0-9] +letter [a-zA-Z] +hexdigit [a-fA-F0-9] +octdigit [0-7] + + +%% + + +":" { return COLON; } +"{" { return CURLY_BRACKET_O; } +"}" { return CURLY_BRACKET_C; } +"=" { return EQUAL; } +"(" { return PAREN_O; } +")" { return PAREN_C; } +".." { return DOT_DOT; } +"," { return COMMA; } +"[" { return BRACKET_O; } +"]" { return BRACKET_C; } +"%" { return PERCENT; } +"." { return DOT; } + +"+" { return ADD_OP; } +"-" { return SUB_OP; } +"*" { return MUL_OP; } +"\\" { return DIV_OP; } +"^" { return EOR_OP; } +"&" { return AND_OP; } +"|" { return OR_OP; } +"~" { return INV_OP; } +"<<" { return SHIFT_LEFT_OP; } +">>" { return SHIFT_RIGHT_OP; } + +"<" { return LT; } +">" { return GT; } +"<=" { return LE; } +">=" { return GE; } +"==" { return EQ; } +"!=" { return NEQ; } + +"all" { return ALL; } +"and" { return AND; } +"any" { return ANY; } +"ascii" { return ASCII; } +"at" { return AT; } +"base64" { return BASE64; } +"base64wide" { return BASE64WIDE; } +"condition" { return CONDITION; } +"contains" { return CONTAINS; } +"defined" { return DEFINED; } +"endswith" { return ENDSWITH; } +"entrypoint" { return ENTRYPOINT; } +"filesize" { return FILESIZE; } +"for" { return FOR; } +"fullword" { return FULLWORD; } +"global" { return GLOBAL; } +"icontains" { return ICONTAINS; } +"iendswith" { return IENDSWITH; } +"iequals" { return IEQUALS; } +"import" { return IMPORT; } +"in" { return IN; } +"include" { return INCLUDE; } +"istartswith" { return ISTARTSWITH; } +"matches" { return MATCHES; } +"meta" { return META; } +"nocase" { return NOCASE; } +"none" { return NONE; } +"not" { return NOT; } +"of" { return OF; } +"or" { return OR; } +"private" { return PRIVATE; } +"rule" { return RULE; } +"startswith" { return STARTSWITH; } +"strings" { return STRINGS; } +"them" { return THEM; } +"wide" { return WIDE; } +"xor" { return XOR; } + +"false" { return _FALSE; } +"true" { return _TRUE; } + + +%{ /* Commentaires */ %} + +"/*" { PUSH_STATE(comment); } +<comment>"*/" { POP_STATE; } +<comment>(.|\n) { } + +"//"[^\n]* { } + + +%{ /* Blocs de texte */ %} + +$({letter}|{digit}|_)*"*" { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return STRING_IDENTIFIER_WITH_WILDCARD; + +} + +$({letter}|{digit}|_)* { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return STRING_IDENTIFIER; + +} + +#({letter}|{digit}|_)* { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return STRING_COUNT; + +} + +@({letter}|{digit}|_)* { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return STRING_OFFSET; + +} + +!({letter}|{digit}|_)* { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return STRING_LENGTH; + +} + +u?int(8|16|32)(be)? { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return INTEGER_FUNCTION; + +} + +({letter}|_)({letter}|{digit}|_)* { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return IDENTIFIER; + +} + +{digit}+(MB|KB){0,1} { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return NUMBER; + +} + +{digit}+"."{digit}+ { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return DOUBLE; + +} + +0x{hexdigit}+ { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return NUMBER; + +} + +0o{octdigit}+ { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return NUMBER; + +} + +\"{str_mixed}*\" { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return TEXT_STRING; + +} + +"/" { + + PUSH_STATE(regexp); + +} + +<regexp>(\\\/|\\.|[^/\n\\])+\/i?s? { + + POP_STATE; + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return REGEXP; + +} + +\{(({hexdigit}|[ \-|\~\?\[\]\(\)\n\r\t]|\/\*(\/|\**[^*/])*\*+\/)+|\/\/.*\n)+\} { + + yylval->cstring.data = yytext; + yylval->cstring.len = yyleng; + + return HEX_STRING; + +} + + +%{ /* Actions par défaut */ %} + +<*>[ \t\r]+ { } + +<*>[\n]+ { } + +<*>. { + char *msg; + int ret; + ret = asprintf(&msg, "Unhandled token in rule definition: '%s '", yytext); + if (ret == -1) + YY_FATAL_ERROR("Unhandled token in undisclosed rule definition"); + else + { + YY_FATAL_ERROR(msg); + free(msg); + } + } + + +%% diff --git a/tools/yara2rost/yara2rost.c b/tools/yara2rost/yara2rost.c new file mode 100644 index 0000000..3206309 --- /dev/null +++ b/tools/yara2rost/yara2rost.c @@ -0,0 +1,295 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * yara2rost.c - traduction de règles YARA en règles ROST + * + * Copyright (C) 2023 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include <fcntl.h> +#include <getopt.h> +#include <malloc.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> + + +#include "decl.h" + + + +/* Affiche des indications sur l'utilisation du programme. */ +static void show_usage(const char *); + +/* Récupère un contenu à traiter depuis l'entrée standard. */ +static void *get_input_data_from_stdin(size_t *); + +/* Récupère un contenu à traiter depuis un fichier externe. */ +static void *get_input_data_from_file(const char *, size_t *); + + + +/****************************************************************************** +* * +* Paramètres : argv0 = nombre du programme exécuté. * +* * +* Description : Affiche des indications sur l'utilisation du programme. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void show_usage(const char *argv0) +{ + printf("\n"); + + printf("Usage: %s [options] [<YARA file>]\n", argv0); + + printf("\n"); + + printf("General options:\n"); + + printf("\n"); + + printf("\t-h | --help\t\tDisplay this messsage.\n"); + + printf("\n"); + + printf("If no YARA file is provided as argument, a rule definition is expected from the standard input.\n"); + + printf("\n"); + +} + + +/****************************************************************************** +* * +* Paramètres : length = taille de l'espace mémoire mis en place. [OUT] * +* * +* Description : Récupère un contenu à traiter depuis l'entrée standard. * +* * +* Retour : Adresse valide ou NULL en cas d'échec. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void *get_input_data_from_stdin(size_t *length) +{ + char *result; /* Espace mémoire à retourner */ + ssize_t got; /* Quantité d'octets lus */ + + result = NULL; + *length = 0; + +#define ALLOC_SIZE 2048 + + while (true) + { + result = realloc(result, (*length + ALLOC_SIZE) * sizeof(char)); + + got = read(STDIN_FILENO, result + *length, ALLOC_SIZE); + + if (got == -1) + { + perror("read"); + goto exit_with_error; + } + + *length += got; + + if (got < ALLOC_SIZE) + break; + + } + + return result; + + exit_with_error: + + free(result); + + *length = 0; + + return NULL; + +} + + +/****************************************************************************** +* * +* Paramètres : filename = chemin du fichier à charger en mémoire. * +* length = taille de l'espace mémoire mis en place. [OUT] * +* * +* Description : Récupère un contenu à traiter depuis un fichier externe. * +* * +* Retour : Adresse valide ou NULL en cas d'échec. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void *get_input_data_from_file(const char *filename, size_t *length) +{ + char *result; /* Espace mémoire à retourner */ + int fd; /* Descripteur du fichier */ + struct stat info; /* Informations sur le fichier */ + int ret; /* Bilan d'un appel */ + ssize_t got; /* Quantité d'octets lus */ + + result = NULL; + + fd = open(filename, O_RDONLY); + if (fd == -1) + { + perror("open"); + goto exit; + } + + ret = fstat(fd, &info); + if (ret == -1) + { + perror("fstat"); + goto exit_with_fd; + } + + *length = info.st_size; + + result = malloc(*length * sizeof(char)); + + got = read(fd, result, *length); + + if (got == -1 || got != *length) + { + perror("read"); + + free(result); + + result = NULL; + *length = 0; + + } + + exit_with_fd: + + close(fd); + + exit: + + return result; + + +} + + +/****************************************************************************** +* * +* Paramètres : argc = nombre d'arguments dans la ligne de commande. * +* argv = arguments de la ligne de commande. * +* * +* Description : Point d'entrée du programme. * +* * +* Retour : EXIT_SUCCESS si le prgm s'est déroulé sans encombres. * +* * +* Remarques : - * +* * +******************************************************************************/ + +int main(int argc, char **argv) +{ + int result; /* Bilan à retourner */ + bool need_help; /* Affichage de l'aide ? */ + int index; /* Indice d'argument à traiter */ + int ret; /* Bilan d'une lecture d'arg. */ + const char *source; /* Source de définitions */ + void *content; /* Contenu à traduire */ + size_t length; /* Taille de ce contenu */ + + static struct option long_options[] = { + + { "help", no_argument, NULL, 'h' }, + + { NULL, 0, NULL, 0 } + + }; + + /* Récupération des commandes */ + + need_help = false; + + while (true) + { + ret = getopt_long(argc, argv, "h", long_options, &index); + if (ret == -1) break; + + switch (ret) + { + case 'h': + need_help = true; + break; + + } + + } + + /* Vérifications supplémentaires */ + + if (need_help || (optind != argc && (optind + 1) != argc)) + { + show_usage(argv[0]); + result = (need_help ? EXIT_SUCCESS : EXIT_FAILURE); + goto exit; + } + + /* Execution attendue */ + + result = EXIT_FAILURE; + + if (optind == argc) + content = get_input_data_from_stdin(&length); + + else + { + source = argv[optind]; + + if (strcmp(source, "-") == 0 || strcmp(source, "/dev/stdin") == 0) + content = get_input_data_from_stdin(&length); + else + content = get_input_data_from_file(source, &length); + + } + + if (content != NULL) + { + if (process_rules_definitions(content, length)) + result = EXIT_SUCCESS; + + free(content); + + } + + exit: + + return result; + +} |