diff options
Diffstat (limited to 'tools/fuzzing/rost')
-rw-r--r-- | tools/fuzzing/rost/Makefile.am | 14 | ||||
-rw-r--r-- | tools/fuzzing/rost/convert.py | 403 | ||||
-rw-r--r-- | tools/fuzzing/rost/fast-rost.c | 283 | ||||
-rwxr-xr-x | tools/fuzzing/rost/gen-dict.sh | 81 | ||||
-rwxr-xr-x | tools/fuzzing/rost/minall.sh | 25 | ||||
-rwxr-xr-x | tools/fuzzing/rost/rerun.sh | 27 | ||||
-rw-r--r-- | tools/fuzzing/rost/test.rost | 12 |
7 files changed, 845 insertions, 0 deletions
diff --git a/tools/fuzzing/rost/Makefile.am b/tools/fuzzing/rost/Makefile.am new file mode 100644 index 0000000..81e126f --- /dev/null +++ b/tools/fuzzing/rost/Makefile.am @@ -0,0 +1,14 @@ + +bin_PROGRAMS = fast-rost + + +AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src + +# EXTRA_rost_DEPENDENCIES = libchrysacore.la + +fast_rost_SOURCES = \ + fast-rost.c + +fast_rost_CFLAGS = $(TOOLKIT_CFLAGS) $(LIBXML_CFLAGS) + +fast_rost_LDFLAGS = $(LIBGOBJ_LIBS) -L$(top_srcdir)/src/.libs -lchrysacore diff --git a/tools/fuzzing/rost/convert.py b/tools/fuzzing/rost/convert.py new file mode 100644 index 0000000..c0bdde8 --- /dev/null +++ b/tools/fuzzing/rost/convert.py @@ -0,0 +1,403 @@ + +import re +import sys + + +def define_PLAIN_TEXT(name, last): + """Create definition for the PLAIN_TEXT token.""" + + print(' "<%s>": [ ["\\\"", "<str_not_escaped>", "\\\""] ],' % name.lower()) + print(' "<str_not_escaped>": [ ["a", "b", "c"] ]%s' % (',' if not(last) else '')) + + +def define_SIGNED_INTEGER(name, last): + """Create definition for the SIGNED_INTEGER token.""" + + print(' "<%s>": [ ["-", "<unsigned_integer>"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_UNSIGNED_INTEGER(name, last): + """Create definition for the UNSIGNED_INTEGER token.""" + + print(' "<%s>": [ ["<number>"], ["<number>", "<number>"], ["<number>", "<number>", "<number>"] ],' % name.lower()) + print(' "<number>": [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]%s' % (',' if not(last) else '')) + + +def define_BYTES_ID(name, last): + """Create definition for the BYTES_ID token.""" + + print(' "<%s>": [ ["$", "<id>"] ],' % name.lower()) + print(' "<id>": [ ["a", "b", "c"] ]%s' % (',' if not(last) else '')) + + +def define_HEX_BYTES(name, last): + """Create definition for the HEX_BYTES token.""" + + print(' "<%s>": [ ["00", "01"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_FULL_MASK(name, last): + """Create definition for the FULL_MASK token.""" + + print(' "<%s>": [ ["?0", "1?"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_KB(name, last): + """Create definition for the KB token.""" + + print(' "<%s>": [ ["kb"], ["Kb"], ["kB"], ["KB"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_MB(name, last): + """Create definition for the MB token.""" + + print(' "<%s>": [ ["mb"], ["Mb"], ["mB"], ["MB"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +def define_GB(name, last): + """Create definition for the GB token.""" + + print(' "<%s>": [ ["gb"], ["Gb"], ["gB"], ["GB"] ]%s' % (name.lower(), ',' if not(last) else '')) + + +__lexer_tokens = { + 'PLAIN_TEXT': define_PLAIN_TEXT, + 'ESCAPED_TEXT': define_PLAIN_TEXT, + 'RULE_IDENTIFIER': define_PLAIN_TEXT, + 'INFO_KEY': define_PLAIN_TEXT, + 'SIGNED_INTEGER': define_SIGNED_INTEGER, + 'UNSIGNED_INTEGER': define_UNSIGNED_INTEGER, + + 'BYTES_ID': define_BYTES_ID, + 'BYTES_FUZZY_ID': define_BYTES_ID, + 'BYTES_ID_COUNTER': define_BYTES_ID, + 'BYTES_FUZZY_ID_COUNTER': define_BYTES_ID, + 'BYTES_ID_START': define_BYTES_ID, + 'BYTES_FUZZY_ID_START': define_BYTES_ID, + 'BYTES_ID_LENGTH': define_BYTES_ID, + 'BYTES_FUZZY_ID_LENGTH': define_BYTES_ID, + 'BYTES_ID_END': define_BYTES_ID, + 'BYTES_FUZZY_ID_END': define_BYTES_ID, + + 'NAME': define_PLAIN_TEXT, + 'HEX_BYTES': define_HEX_BYTES, + 'FULL_MASK': define_FULL_MASK, + 'SEMI_MASK': define_FULL_MASK, + 'REGEX_BYTES': define_PLAIN_TEXT, + 'REGEX_CLASSES': define_PLAIN_TEXT, + 'REGEX_RANGE': define_PLAIN_TEXT, + 'KB': define_KB, + 'MB': define_MB, + 'GB': define_GB, + 'STRING': define_PLAIN_TEXT, +} + + +def remove_grammar_comments(grammar): + """Delete all the C code comments.""" + + # Cf. https://stackoverflow.com/questions/241327/remove-c-and-c-comments-using-python/241506#241506 + + def replacer(match): + s = match.group(0) + if s.startswith('/'): + return ' ' # note: a space and not an empty string + else: + return s + + regex = re.compile( + r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + re.DOTALL | re.MULTILINE + ) + + return regex.sub(replacer, grammar) + + +def remove_grammar_actions(grammar): + """Delete all the C code handling tokens.""" + + remaining = '' + + scope = 0 + string = False + + for ch in grammar: + + if ch == '{' and not(string): + scope += 1 + + elif ch == '}' and not(string): + assert(scope > 0) + scope -= 1 + + elif scope == 0: + remaining += ch + if ch == '"': + string = not(string) + + return remaining + + +def is_upper(text): + """State if a string is upper case.""" + + return text.upper() == text + + +def parse_rule_definition(grammar): + """Process the definition of one rule.""" + + result = [] + + regex = re.compile('(?<!")\|') + + definitions = regex.split(grammar) + + definitions = [ d.strip() for d in definitions ] + + for d in definitions: + + tokens = d.split() + + converted = [] + + for t in tokens: + + if not(t.startswith('"')) and is_upper(t): + + if not(t in __lexer_tokens.keys()): + print('Missing def:', t) + sys.exit() + + assert(t in __lexer_tokens.keys()) + + converted.append('"<%s>"' % t.lower()) + + else: + + if t.startswith('"'): + converted.append('%s' % t) + else: + converted.append('"<%s>"' % t) + + result.append(converted) + + return result + + +def parse_rules(grammar): + """Process all the rules contained in the grammar.""" + + tree = {} + + regex = re.compile('[\n\t ]*([^\n\t :]+)[\n\t ]*:([^;]+);') + + rules = regex.findall(grammar) + + first = True + + for r in rules: + + if first: + print(' "<START>": [ ["<%s>"] ],' % r[0]) + first = False + + definitions = parse_rule_definition(r[1]) + + tree[r[0]] = definitions + + return tree + + +def simplify_tree(tree): + """Remove nodes which only are links between two levels of nodes.""" + + """ + a = [ [b] ] + b = [ [c], [d] ] + + -> replace a by b + """ + + # Examples: cexpression, modifier_arg + + replaced = {} + + for k, v in tree.items(): + + if len(v) == 1 and len(v[0]) == 1: + + replaced['"<%s>"' % k] = v[0][0] + + new_tree = {} + + for k, v in tree.items(): + + name = '"<%s>"' % k + + if not(name in replaced.keys()): + + new_v = [] + + for vv in v: + + new_vv = vv + + for rk, rv in replaced.items(): + new_vv = list(map(lambda x: x.replace(rk, rv), new_vv)) + + new_v.append(new_vv) + + new_tree[k] = new_v + + return new_tree + + +def find_direct_parent_nodes(tree, name): + """Find all the rules containing a rule.""" + + rules = [] + + name = '"<%s>"' % name + + for k, v in tree.items(): + + for vv in v: + + if len(vv) == 1 and vv[0] == name and not(k in rules): + + rules.append(k) + + return rules + + +def remove_indirect_left_recursion(tree): + """Remove all nodes which implies indirect left recursion.""" + + """ + a = b + b = a + c + + -> a = a + c + """ + + # Examples: logical_expr, relational_expr, string_op, arithm_expr, intersection + + replaced = {} + + for k, v in tree.items(): + + parents = find_direct_parent_nodes(tree, k) + + if len(parents) != 1: + continue + + parent = parents[0] + + for vv in v: + + if vv[0] == '"<%s>"' % parent: + replaced[k] = v + break + + new_tree = {} + + for k, v in tree.items(): + + if not(k in replaced.keys()): + + new_v = [] + + for vv in v: + + if len(vv) != 1: + new_v.append(vv) + + else: + + modified = False + + for rk, rv in replaced.items(): + if '"<%s>"' % rk == vv[0]: + new_v += rv + modified = True + break + + if not(modified): + new_v.append(vv) + + new_tree[k] = new_v + + return new_tree + + +def output_rules(tree): + """Output a translated rule.""" + + for k, v in tree.items(): + + print(' "<%s>": [ ' % k, end='') + + first = True + + for d in v: + + if not(first): + print(',', end='') + + if len(d) == 0: + print(' []', end='') + + else: + + print(' [', end='') + + sub_first = True + + for sub_d in d: + + if not(sub_first): + print(', ', end='') + + print('%s' % sub_d, end='') + + sub_first = False + + print(']', end='') + + first = False + + print(' ],') + + +if __name__ == '__main__': + """Script entrypoint.""" + + # Cf. https://github.com/AFLplusplus/Grammar-Mutator/blob/stable/doc/customizing-grammars.md + + with open(sys.argv[1], 'r') as fd: + grammar = fd.read() + + grammar = grammar.split('%%')[1] + + grammar = remove_grammar_comments(grammar) + + grammar = remove_grammar_actions(grammar) + + print('{') + + tree = parse_rules(grammar) + + tree = simplify_tree(tree) + + tree = remove_indirect_left_recursion(tree) + + output_rules(tree) + + count = len(__lexer_tokens.keys()) + + for name, cb in __lexer_tokens.items(): + cb(name, count == 1) + count -= 1 + + print('}') diff --git a/tools/fuzzing/rost/fast-rost.c b/tools/fuzzing/rost/fast-rost.c new file mode 100644 index 0000000..f161273 --- /dev/null +++ b/tools/fuzzing/rost/fast-rost.c @@ -0,0 +1,283 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * fast-rost.c - fichier d'entrée du centre de collecte, adapté pour un fuzzing optimal + * + * Copyright (C) 2023 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <assert.h> +#include <getopt.h> +#include <libgen.h> +#include <locale.h> +#include <malloc.h> +#include <stdlib.h> +#include <string.h> + + +#include <i18n.h> + + +#include <analysis/contents/file.h> +#include <analysis/scan/options.h> +#include <analysis/scan/scanner.h> +#include <analysis/scan/patterns/backends/bitap.h> +#include <analysis/scan/patterns/backends/acism.h> +#include <core/core.h> +#include <core/global.h> +#include <core/logs.h> +#include <core/paths.h> +#include <plugins/pglist.h> + + + +#ifndef __AFL_FUZZ_TESTCASE_LEN + +ssize_t fuzz_len; +unsigned char fuzz_buf[1024000]; + +# define __AFL_FUZZ_TESTCASE_LEN fuzz_len +# define __AFL_FUZZ_TESTCASE_BUF fuzz_buf +# define __AFL_FUZZ_INIT() void sync(void); +# define __AFL_LOOP(x) \ + ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0) +# define __AFL_INIT() sync() + +#endif + + +__AFL_FUZZ_INIT(); + + +/****************************************************************************** +* * +* Paramètres : argc = nombre d'arguments dans la ligne de commande. * +* argv = arguments de la ligne de commande. * +* * +* Description : Point d'entrée du programme. * +* * +* Retour : EXIT_SUCCESS si le prgm s'est déroulé sans encombres. * +* * +* Remarques : - * +* * +******************************************************************************/ + +int main(int argc, char **argv) +{ + int result; /* Bilan de l'exécution */ + bool check_only; /* Validation uniquement */ + LogMessageType verbosity; /* Niveau de filtre de message */ + GScanOptions *options; /* Options d'analyses */ + int index; /* Indice d'argument */ + int ret; /* Bilan d'un appel */ + char *edir; /* Répertoire de base effectif */ + char *target; /* Cible communiquée */ + unsigned char *afl_buf; /* Tampon de travail d'AFL */ + int afl_len; /* Taille de ce tampon */ + GContentScanner *scanner; /* Encadrement d'une recherche */ + GBinContent *content; /* Contenu à analyser */ + GScanContext *context; /* Contexte des trouvailles */ + sized_string_t padding; /* Bourrage pour le JSON */ + bool full; /* Détailler l'affichage ? */ + + static struct option long_options[] = { + { "algorithm", required_argument, NULL, 'A' }, + { "check-only", no_argument, NULL, 'C' }, + { "print-json", no_argument, NULL, 'j' }, + { "print-strings", no_argument, NULL, 's' }, + { "print-stats", no_argument, NULL, 'S' }, + { "print-tags", no_argument, NULL, 'g' }, + { "tag", required_argument, NULL, 't' }, + { "verbosity", required_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + result = EXIT_FAILURE; + + /* Décodage des options */ + + check_only = false; + verbosity = LMT_COUNT; + + options = g_scan_options_new(); + + g_scan_options_set_backend_for_data(options, G_TYPE_ACISM_BACKEND); + + while (true) + { + ret = getopt_long(argc, argv, "A:CjsSgt:V:", long_options, &index); + if (ret == -1) break; + + switch (ret) + { + case 'A': + if (strcmp(optarg, "bitmap") == 0) + g_scan_options_set_backend_for_data(options, G_TYPE_BITAP_BACKEND); + else if (strcmp(optarg, "acism") == 0) + g_scan_options_set_backend_for_data(options, G_TYPE_ACISM_BACKEND); + else + g_scan_options_set_backend_for_data(options, G_TYPE_INVALID); + break; + + case 'C': + check_only = true; + g_scan_options_set_check_only(options, true); + break; + + case 'j': + g_scan_options_set_print_json(options, true); + break; + + case 's': + g_scan_options_set_print_strings(options, true); + break; + + case 'S': + g_scan_options_set_print_stats(options, true); + break; + + case 'g': + g_scan_options_set_print_tags(options, true); + break; + + case 't': + g_scan_options_select_tag(options, optarg); + break; + + case 'V': + verbosity = strtoul(optarg, NULL, 10); + break; + + } + + } + + if ((check_only && (optind + 0) != argc && (optind + 1) != argc) + || (!check_only && (optind + 1) != argc && (optind + 2) != argc)) + { + goto done; + } + + /* Actions de base */ + + if (g_scan_options_get_backend_for_data(options) == G_TYPE_INVALID) + { + goto done; + } + + /* Lancement des choses sérieuses */ + + setlocale(LC_ALL, ""); + edir = get_effective_directory(LOCALE_DIR); + bindtextdomain(PACKAGE, edir); + free(edir); + textdomain(PACKAGE); + + /* Initialisation de GTK */ + g_set_prgname("ROST"); + //gtk_init(&argc, &argv); + + /* Initialisation du programme */ + + set_batch_mode(); + + set_log_verbosity(verbosity); + + if (!load_all_core_components(true)) + goto done; + + init_all_plugins(true); + + /* Traitement des recherches */ + + if ((optind + 1) == argc) + target = argv[optind]; + else + goto done; + + __AFL_INIT(); + + afl_buf = __AFL_FUZZ_TESTCASE_BUF; + + while (__AFL_LOOP(10000)) + { + afl_len = __AFL_FUZZ_TESTCASE_LEN; + + scanner = g_content_scanner_new_from_text((char *)afl_buf, afl_len); + +#if 0 + do + { + FILE *stream; + + stream = fopen("/dev/shm/ctrl.log", "a"); + fprintf(stream, "running %d bytes => %p\n", afl_len, scanner); + fclose(stream); + + } while (0); +#endif + + if (scanner != NULL) + result = EXIT_SUCCESS; + + if (scanner != NULL && !check_only) + { + content = g_file_content_new(target); + if (content == NULL) goto bad_file_content; + + context = g_content_scanner_analyze(scanner, options, content); + + if (g_scan_options_get_print_json(options)) + { + padding.data = " "; + padding.len = 3; + + g_content_scanner_output_to_json(scanner, context, &padding, 0, STDOUT_FILENO); + + } + else + { + full = g_scan_options_get_print_strings(options); + + g_content_scanner_output_to_text(scanner, context, full, STDOUT_FILENO); + + } + + g_object_unref(G_OBJECT(context)); + g_object_unref(G_OBJECT(content)); + + bad_file_content: + + g_object_unref(G_OBJECT(scanner)); + + } + + } + + g_object_unref(G_OBJECT(options)); + + /* Sortie */ + + unload_all_core_components(false); + + done: + + return result; + +} diff --git a/tools/fuzzing/rost/gen-dict.sh b/tools/fuzzing/rost/gen-dict.sh new file mode 100755 index 0000000..dfebc0a --- /dev/null +++ b/tools/fuzzing/rost/gen-dict.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +TOP_DIR="$SCRIPT_DIR/../../.." +OUTPUT="$SCRIPT_DIR/rost.dict" + + +echo > "$OUTPUT" + + +echo "# Syntax core keywords" >> "$OUTPUT" + +cat "$TOP_DIR/src/analysis/scan/grammar.y" | \ + grep '%token.*".*' | grep -o -E '"[^"]+"' | sort >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Modifiers" >> "$OUTPUT" + +"$TOP_DIR/src/rost" --dump-modifiers | sort | sed -e 's/^/"/' -e 's/$/"/' >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Namespace" >> "$OUTPUT" + +"$TOP_DIR/src/rost" --dump-namespaces | sort | sed -e 's/^/"/' -e 's/$/"/' >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Identifiers" >> "$OUTPUT" + +for t in "$" "#" "@" "!" "~" ; +do + echo "\"${t}a0\"" >> "$OUTPUT" + echo "\"${t}a1\"" >> "$OUTPUT" + echo "\"${t}b\"" >> "$OUTPUT" + echo "\"${t}c\"" >> "$OUTPUT" + echo "\"${t}a*\"" >> "$OUTPUT" + echo "\"${t}*\"" >> "$OUTPUT" + echo "\"${t}\"" >> "$OUTPUT" + +done + + +echo >> "$OUTPUT" +echo "# Numbers" >> "$OUTPUT" + +for i in $( seq 0 32 ); +do + echo -$(( 2 ** i - 1 )) ; + echo -$(( 2 ** i )) ; + echo -$(( 2 ** i + 1 )) ; + + echo $(( 2 ** i - 1 )) ; + echo $(( 2 ** i )) ; + echo $(( 2 ** i + 1 )) ; + +done | sort | uniq | sort -n >> "$OUTPUT" + + +echo >> "$OUTPUT" +echo "# Misc" >> "$OUTPUT" + +echo "\"kb\"" >> "$OUTPUT" +echo "\"mb\"" >> "$OUTPUT" +echo "\"gb\"" >> "$OUTPUT" + +echo "\"a0\"" >> "$OUTPUT" +echo "\"a1\"" >> "$OUTPUT" +echo "\"b\"" >> "$OUTPUT" +echo "\"c\"" >> "$OUTPUT" + +echo "\"\\\"abcdef\\\"\"" >> "$OUTPUT" +echo "\"\\\"azerty\\\"\"" >> "$OUTPUT" +echo "\"\\\"qwertyqwerty\\\"\"" >> "$OUTPUT" +echo "\"??\"" >> "$OUTPUT" +echo "\"0?\"" >> "$OUTPUT" +echo "\"?a\"" >> "$OUTPUT" + +echo >> "$OUTPUT" diff --git a/tools/fuzzing/rost/minall.sh b/tools/fuzzing/rost/minall.sh new file mode 100755 index 0000000..e32777d --- /dev/null +++ b/tools/fuzzing/rost/minall.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +if [ -z "$FUZ_OUT" ]; then + echo "$0 needs a \$FUZ_OUT environment variable!" + exit 1 +fi + + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +MIN_DIR="$SCRIPT_DIR/min" + + +mkdir -p "$MIN_DIR" + +find "$FUZ_OUT/default/crashes/" -name 'id*' | while read f; +do + + id=$( echo $f | cut -d: -f2 | cut -d, -f1 ) + + h=$( sha256sum $f | cut -d " " -f1 ) + + afl-tmin -i "$f" -o "$MIN_DIR/$id-$h.rost" -- /dev/shm/fuzzing-sys/bin/fast-rost /bin/ls + +done diff --git a/tools/fuzzing/rost/rerun.sh b/tools/fuzzing/rost/rerun.sh new file mode 100755 index 0000000..3e75189 --- /dev/null +++ b/tools/fuzzing/rost/rerun.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +TOP_DIR="$SCRIPT_DIR/../../.." +MIN_DIR="$SCRIPT_DIR/min" + + +find "$MIN_DIR" -type f -name '*rost' | while read f; +do + echo "=========== $f" + + "$TOP_DIR/src/rost" $f /bin/ls + + status=$? + + if [ $status -le 2 ]; then + rm $f + fi + + sleep 1s + +done + + + + diff --git a/tools/fuzzing/rost/test.rost b/tools/fuzzing/rost/test.rost new file mode 100644 index 0000000..02daabe --- /dev/null +++ b/tools/fuzzing/rost/test.rost @@ -0,0 +1,12 @@ + +rule basic { + + bytes: + $a = "ABC" base64 + $b = "12" + $c = { 00 01 f0 ff ff [0-9] 23 } + + condition: + (#a == 123 or $b or $c) and console.log(maxcommon(modpath($a, $b))) + +} |