From 50789d8c735178858411f67118bc73eb620f4e69 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Tue, 14 Nov 2023 20:19:39 +0100
Subject: Improve the ROST grammar translation for AFL++.*

---
 tools/fuzzing/rost/convert.py | 74 +++++++++++++++++++++++++++++++++----------
 1 file changed, 58 insertions(+), 16 deletions(-)

diff --git a/tools/fuzzing/rost/convert.py b/tools/fuzzing/rost/convert.py
index c0bdde8..116dee6 100644
--- a/tools/fuzzing/rost/convert.py
+++ b/tools/fuzzing/rost/convert.py
@@ -7,7 +7,8 @@ def define_PLAIN_TEXT(name, last):
     """Create definition for the PLAIN_TEXT token."""
 
     print('    "<%s>": [ ["\\\"", "<str_not_escaped>", "\\\""] ],' % name.lower())
-    print('    "<str_not_escaped>": [ ["a", "b", "c"] ]%s' % (',' if not(last) else ''))
+    print('    "<str_not_escaped>": [ ["<char>"], ["<char>", "<char>"], ["<char>", "<char>", "<char>"] ],')
+    print('    "<char>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"], ["A"], ["B"], ["C"], ["D"], ["E"], ["F"] ]%s' % (',' if not(last) else ''))
 
 
 def define_SIGNED_INTEGER(name, last):
@@ -19,27 +20,68 @@ def define_SIGNED_INTEGER(name, last):
 def define_UNSIGNED_INTEGER(name, last):
     """Create definition for the UNSIGNED_INTEGER token."""
 
-    print('    "<%s>": [ ["<number>"], ["<number>", "<number>"], ["<number>", "<number>", "<number>"] ],' % name.lower())
-    print('    "<number>": [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]%s' % (',' if not(last) else ''))
+    print('    "<%s>": [ ["<fnumber>"], ["<number>", "<fnumber>"], ["<number>", "<fnumber>", "<fnumber>"] ],' % name.lower())
+    print('    "<number>": [ ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ],')
+    print('    "<fnumber>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else ''))
 
 
 def define_BYTES_ID(name, last):
     """Create definition for the BYTES_ID token."""
 
-    print('    "<%s>": [ ["$", "<id>"] ],' % name.lower())
-    print('    "<id>": [ ["a", "b", "c"] ]%s' % (',' if not(last) else ''))
+    print('    "<%s>": [ ["$"], ["$*"], [ "$", "<id>", "<idx>" ], [ "$", "<id>", "*" ] ],' % name.lower())
+    print('    "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],')
+    print('    "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else ''))
+
+
+def define_BYTES_ID_COUNTER(name, last):
+    """Create definition for the BYTES_ID_COUNTER token."""
+
+    print('    "<%s>": [ ["#"], ["#*"], [ "#", "<id>", "<idx>" ], [ "#", "<id>", "*" ] ],' % name.lower())
+    print('    "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],')
+    print('    "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else ''))
+
+
+def define_BYTES_ID_START(name, last):
+    """Create definition for the BYTES_ID_START token."""
+
+    print('    "<%s>": [ ["@"], ["@*"], [ "@", "<id>", "<idx>" ], [ "@", "<id>", "*" ] ],' % name.lower())
+    print('    "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],')
+    print('    "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else ''))
+
+
+def define_BYTES_ID_LENGTH(name, last):
+    """Create definition for the BYTES_ID_LENGTH token."""
+
+    print('    "<%s>": [ ["!"], ["!*"], [ "!", "<id>", "<idx>" ], [ "!", "<id>", "*" ] ],' % name.lower())
+    print('    "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],')
+    print('    "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else ''))
+
+
+def define_BYTES_ID_END(name, last):
+    """Create definition for the BYTES_ID_END token."""
+
+    print('    "<%s>": [ ["~"], ["~*"], [ "~", "<id>", "<idx>" ], [ "~", "<id>", "*" ] ],' % name.lower())
+    print('    "<id>": [ ["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"], ["j"], ["k"], ["l"] ],')
+    print('    "<idx>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"] ]%s' % (',' if not(last) else ''))
 
 
 def define_HEX_BYTES(name, last):
     """Create definition for the HEX_BYTES token."""
 
-    print('    "<%s>": [ ["00", "01"] ]%s' % (name.lower(), ',' if not(last) else ''))
+    print('    "<%s>": [ ["<hex>", "<hex>"] ],' % name.lower())
+    print('    "<hex>": [ ["0"], ["1"], ["2"], ["3"], ["4"], ["5"], ["6"], ["7"], ["8"], ["9"], ["a"], ["b"], ["c"], ["d"], ["e"], ["f"] ]%s' % (',' if not(last) else ''))
 
 
 def define_FULL_MASK(name, last):
     """Create definition for the FULL_MASK token."""
 
-    print('    "<%s>": [ ["?0", "1?"] ]%s' % (name.lower(), ',' if not(last) else ''))
+    print('    "<%s>": [ ["?", "?"] ]%s' % (name.lower(), ',' if not(last) else ''))
+
+
+def define_SEMI_MASK(name, last):
+    """Create definition for the SEMI_MASK token."""
+
+    print('    "<%s>": [ ["?0"], ["1?"] ]%s' % (name.lower(), ',' if not(last) else ''))
 
 
 def define_KB(name, last):
@@ -70,19 +112,19 @@ __lexer_tokens = {
 
     'BYTES_ID': define_BYTES_ID,
     'BYTES_FUZZY_ID': define_BYTES_ID,
-    'BYTES_ID_COUNTER': define_BYTES_ID,
-    'BYTES_FUZZY_ID_COUNTER': define_BYTES_ID,
-    'BYTES_ID_START': define_BYTES_ID,
-    'BYTES_FUZZY_ID_START': define_BYTES_ID,
-    'BYTES_ID_LENGTH': define_BYTES_ID,
-    'BYTES_FUZZY_ID_LENGTH': define_BYTES_ID,
-    'BYTES_ID_END': define_BYTES_ID,
-    'BYTES_FUZZY_ID_END': define_BYTES_ID,
+    'BYTES_ID_COUNTER': define_BYTES_ID_COUNTER,
+    'BYTES_FUZZY_ID_COUNTER': define_BYTES_ID_COUNTER,
+    'BYTES_ID_START': define_BYTES_ID_START,
+    'BYTES_FUZZY_ID_START': define_BYTES_ID_START,
+    'BYTES_ID_LENGTH': define_BYTES_ID_LENGTH,
+    'BYTES_FUZZY_ID_LENGTH': define_BYTES_ID_LENGTH,
+    'BYTES_ID_END': define_BYTES_ID_END,
+    'BYTES_FUZZY_ID_END': define_BYTES_ID_END,
 
     'NAME': define_PLAIN_TEXT,
     'HEX_BYTES': define_HEX_BYTES,
     'FULL_MASK': define_FULL_MASK,
-    'SEMI_MASK': define_FULL_MASK,
+    'SEMI_MASK': define_SEMI_MASK,
     'REGEX_BYTES': define_PLAIN_TEXT,
     'REGEX_CLASSES': define_PLAIN_TEXT,
     'REGEX_RANGE': define_PLAIN_TEXT,
-- 
cgit v0.11.2-87-g4458