diff options
author | Cyrille Bagard <nocbos@gmail.com> | 2025-08-04 20:45:57 (GMT) |
---|---|---|
committer | Cyrille Bagard <nocbos@gmail.com> | 2025-08-04 20:45:57 (GMT) |
commit | 89ab00b2b4535612b69433041c5d6e6df95b3f3e (patch) | |
tree | 8606585177ea46caa96b8ae9d021dd4d3af58363 /gen-code.py | |
parent | 1c7c6cdeae20e824ad8447daec4dc12b3b286def (diff) |
Split the code and its configuration.
Diffstat (limited to 'gen-code.py')
-rw-r--r-- | gen-code.py | 139 |
1 files changed, 35 insertions, 104 deletions
diff --git a/gen-code.py b/gen-code.py index 4328d2b..3bd1cbc 100644 --- a/gen-code.py +++ b/gen-code.py @@ -1,4 +1,5 @@ +import importlib import json import re import subprocess @@ -17,39 +18,6 @@ __OPTIONS__ = dict( #'repeat_penalty': 1.2 ) - -__SYSTEM__PROMPT__ = ''' -You are an Android development engineer, writing low level code according to the instructions below. - -You are a powerful code editing assistant capable of writing code and creating artifacts in conversations with users, or modifying and updating existing artifacts as requested by users. - -An artifact refers to a runnable complete code snippet, you prefer to integrate and output such complete runnable code rather than breaking it down into several code blocks. - -''' -#Output only the smali-compatible code, without any additional descriptive text. - - -__SYSTEM__PROMPT__ = ''' -Act as an expert instructor with decades of experience in the Android ecosystem, low-level coding, software security, and reverse engineering. You can list all the Dalvik bytecode mnemonics. You know how to deal with all the Dalvik bytecode mnemonics and what to make the world know it. - -Your task is to write assembly code for the Dalvik Virtual Machine. The code has to be fully compatible with the smali assembler. - -# Coding and Response Practices - -- Use practical examples drawn from real-world open-source samples as inspiration to demonstrate techniques for coding complex structures. -- Encourage reasoning through original code writing before drawing conclusions and provide clear, concise explanations supported by annotated code and examples. -- After generation, please check the code execution again to ensure there are no errors in the output. - -# Output Format - -Avoid explanations and focus on code variety of Dalvik bytecode when producing code. Format the output in markdown for clarity, using only one code block for all code excerpts. - -All the generated code has to be included into one class only. -''' -# Include step-by-step reasoning where appropriate. - - - __PREFIX__ = 'code-' @@ -65,7 +33,7 @@ def send_message_to_ollama(messages, ins): msg = { 'role': 'user', - 'content': ins #.replace('\n', ' ') + 'content': ins } messages.append(msg) @@ -145,7 +113,7 @@ def extract_errors(data): found = [] - pat = re.compile("^" + __PREFIX__ + "\d+.smali\[(\d+),(\d+)] (.*)$") + pat = re.compile("^.*" + __PREFIX__ + "\d+.smali\[(\d+),(\d+)] (.*)$") lines = data.split('\n') @@ -164,7 +132,7 @@ def extract_errors(data): return found -def check_code(index): +def check_code(index, config): """Génère au besoin une nouvelle requête pour corriger le code fourni.""" filename = __PREFIX__ + '%04u.smali' % index @@ -173,105 +141,64 @@ def check_code(index): errors = extract_errors(stderr) - next_msg = [] + print('[i] Errors? %d' % len(errors)) - print('Errors? %d' % len(errors)) + next_msg = [] with open(filename, 'r') as fd: content = fd.read().split('\n') for e in errors: - if len(next_msg) == 0: - next_msg.append('The smali assembler encountered errors with the previously generated code.') - next_msg.append('') - next_msg.append('Here is the error list with location and bug origin:') - assert(e['line'] > 0) next_msg.append('- at line %d column %d: %s (erroneous line content : "%s" )' \ % (e['line'], e['col'], e['msg'], content[e['line'] - 1].lstrip())) - if len(next_msg) > 0: - next_msg.append('') - next_msg.append('Please fix your code and provide an updated version of smali assembly code!') - next_msg.append('') - next_msg.append('Focus on name suffixes and operands while solving errors. Check if used instructions actually exist according to the Dalvik bytecode specifications. For instance, there is no mul-int/lit16 mnemonic.') - next_msg.append('') - next_msg.append('Remember to use only plain hexdecimal integers for numbers. Break long high level statements into several Dalvik instructions as much as possible.') - next_msg.append('') - next_msg.append('For instance, call to System.out.println has to translate to instructions relying on sget-object and invoke-virtual mnemonics.') - next_msg.append('') - next_msg.append('Line counter starts at 1.') - next_msg.append('') - next_msg.append('If you do not know how to fix the generated bytecode, rewrite it completely or remove the relative line. Do not hesitate to remove an entire function if there are too much issues inside it.') + if len(next_msg) == 0: + next_msg = '' - return '\n'.join(next_msg) + else: + next_msg = \ + config.__ERROR_PROLOGUE__.lstrip('\n').rstrip('\n') \ + + '\n'.join(next_msg) \ + + config.__ERROR_EPILOGUE__.rstrip('\n') \ + + return next_msg if __name__ == '__main__': """Point d'entrée.""" - if len(sys.argv) == 1: - - messages = [ - { - 'role': 'system', - 'content': __SYSTEM__PROMPT__ #.replace('\n', ' ') - } - ] - - messages = [] - - instructions = ''' -Write me a bunch of valid Android smali bytecode. Your goal is to write a demonstration of the Dalvik bytecode features. - -Try to use one function per instruction set category. Pick one target from the list below: -- Data handling -- Arithmetic and logic operations -- Control flow operations -- Specific instructions for highest Dex files versions (038 or 039) - -As demonstration, include some function implementing well-known algorithms such as Fibonacci sequence, FNV1a or murmuhash3 hashes, aso. + argc = len(sys.argv) -Include all the created functions into one class only. + if argc < 2: + print('Usage: %s <config number> [--check]' % sys.argv[0]) + sys.exit(1) -Try to make sure that a lot of different Dalvik mnemonics are used. Mix as much different mnemonics as possible. + config = importlib.import_module('config%02u' % int(sys.argv[1])) -Ensure that each instruction is used according its proper format. + if argc == 2: -The result code HAS TO BE able to get assembled using the smali assembler without modification. -''' - instructions = ''' -Write me a bunch of valid Android smali bytecode. Your goal is to write a demonstration of the Dalvik bytecode features. - -Try to use one function per instruction set category. Pick one target from the list below: -- Data handling -- Arithmetic and logic operations -- Control flow operations -- Specific instructions for highest Dex files versions (038 or 039) - -Include all the created functions into one class only. - -Try to make sure that a lot of different Dalvik mnemonics are used. Mix as much different mnemonics as possible. - -Ensure that each instruction is used according its proper format. + messages = [] -Do not write explainations. Do not write comments in generated Dalvik code. Use only plain hexdecimal integers for numbers. Break long high level statements into several Dalvik instructions as much as possible. + if config.__SYSTEM__PROMPT__: -For instance, call to System.out.println has to translate to instructions relying on sget-object and invoke-virtual mnemonics. + msg = { + 'role': 'system', + 'content': config.__SYSTEM__PROMPT__ + } -The result code HAS TO BE able to get assembled using the smali assembler without modification. -''' + messages.append(msg) - response = send_message_to_ollama(messages, instructions) + response = send_message_to_ollama(messages, config.__FIRST_INSTRUCTIONS__) counter = 0 dump_smali_code(response, counter) while True: - instructions = check_code(counter) + instructions = check_code(counter, config) if len(instructions) == 0: break @@ -283,8 +210,12 @@ The result code HAS TO BE able to get assembled using the smali assembler withou else: - stderr = compile_file(sys.argv[1]) + filename = sys.argv[2] + + stderr = compile_file(filename) errors = extract_errors(stderr) + print('%s: %d' % (filename, len(errors))) + sys.exit(0 if len(errors) == 0 else 1) |