diff options
author | Cyrille Bagard <nocbos@gmail.com> | 2012-11-28 09:43:50 (GMT) |
---|---|---|
committer | Cyrille Bagard <nocbos@gmail.com> | 2012-11-28 09:43:50 (GMT) |
commit | f95598b68b98f6eda701f8f02bc09cb13f65fc72 (patch) | |
tree | eefee33963448a1ce53a7eb80dacabbcdce8fc21 /src/analysis | |
parent | fbb4b6f53d2189ba9f61c1fd149534d8aef82dcd (diff) |
Followed the excution flow to decompile instructions.
git-svn-id: svn://svn.gna.org/svn/chrysalide/trunk@293 abbe820e-26c8-41b2-8c08-b7b2b41f8b0a
Diffstat (limited to 'src/analysis')
-rwxr-xr-x | src/analysis/decomp/Makefile.am | 4 | ||||
-rw-r--r-- | src/analysis/decomp/decompiler.c | 33 | ||||
-rw-r--r-- | src/analysis/decomp/il.c | 382 | ||||
-rw-r--r-- | src/analysis/decomp/il.h | 38 | ||||
-rw-r--r-- | src/analysis/decomp/reduce.c | 53 | ||||
-rw-r--r-- | src/analysis/decomp/reduce.h | 30 | ||||
-rw-r--r-- | src/analysis/routine.c | 2 |
7 files changed, 537 insertions, 5 deletions
diff --git a/src/analysis/decomp/Makefile.am b/src/analysis/decomp/Makefile.am index 7b797bd..27388d6 100755 --- a/src/analysis/decomp/Makefile.am +++ b/src/analysis/decomp/Makefile.am @@ -2,7 +2,9 @@ noinst_LTLIBRARIES = libanalysisdecomp.la libanalysisdecomp_la_SOURCES = \ - decompiler.h decompiler.c + decompiler.h decompiler.c \ + il.h il.c \ + reduce.h reduce.c libanalysisdecomp_la_LDFLAGS = diff --git a/src/analysis/decomp/decompiler.c b/src/analysis/decomp/decompiler.c index 8e574c0..91a9163 100644 --- a/src/analysis/decomp/decompiler.c +++ b/src/analysis/decomp/decompiler.c @@ -2,7 +2,7 @@ /* OpenIDA - Outil d'analyse de fichiers binaires * decompiler.c - encadrement des phases de décompilation * - * Copyright (C) 2010 Cyrille Bagard + * Copyright (C) 2010-2012 Cyrille Bagard * * This file is part of OpenIDA. * @@ -32,7 +32,9 @@ #include <i18n.h> +#include "il.h" #include "../../decomp/output.h" +#include "../../decomp/expr/block.h" #include "../../decomp/lang/java.h" /* FIXME : remme ! */ #include "../../format/format.h" @@ -138,7 +140,11 @@ static void prepare_all_routines_for_decomp(const GLoadedBinary *binary, const c size_t i; GDecContext *context; /* Contexte pour la décompil. */ - GDecInstruction *instr; + GDecInstruction *dinstrs; + + GArchInstruction *instrs; /* Instructions natives */ + + vmpa_t max; /* Première adresse à écarter */ format = g_loaded_binary_get_format(binary); proc = get_arch_processor_from_format(G_EXE_FORMAT(format)); @@ -152,7 +158,28 @@ static void prepare_all_routines_for_decomp(const GLoadedBinary *binary, const c { context = g_arch_processor_get_decomp_context(proc); - instr = g_binary_format_decompile_routine(G_BIN_FORMAT(format), routines[i], context); + g_object_set_data(G_OBJECT(context), "format", format); + g_object_set_data(G_OBJECT(context), "routine", routines[i]); + g_dec_context_set_max_address(context, max); + + instrs = g_binary_routine_get_instructions(routines[i]); + + max = g_binary_routine_get_address(routines[i]) + + g_binary_routine_get_size(routines[i]); + + printf("##### DECOMPILE '%s' #####\n", g_binary_routine_to_string(routines[i])); + + dinstrs = build_decompiled_block(instrs, + g_binary_routine_get_address(routines[i]), + max, VMPA_MAX, context); + + //instr = g_binary_format_decompile_routine(G_BIN_FORMAT(format), routines[i], context); + + + + g_binary_routine_set_decomp_instructions(routines[i], dinstrs); + + if (context != NULL) g_object_unref(context); diff --git a/src/analysis/decomp/il.c b/src/analysis/decomp/il.c new file mode 100644 index 0000000..d9b9588 --- /dev/null +++ b/src/analysis/decomp/il.c @@ -0,0 +1,382 @@ + +/* OpenIDA - Outil d'analyse de fichiers binaires + * il.h - mise en place d'un langage intermédiaire + * + * Copyright (C) 2012 Cyrille Bagard + * + * This file is part of OpenIDA. + * + * OpenIDA is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * OpenIDA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Foobar. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include "il.h" + + +#include <malloc.h> +#include <stdlib.h> +#include <string.h> + + +#include "../../decomp/expr/block.h" +#include "../../decomp/instr/ite.h" + + + +/* Indications sur une branche */ +typedef struct _branch_info +{ + vmpa_t *jumps; /* Jalons de la branche */ + size_t count; /* Quantité de ces jalons */ + +} branch_info; + + +/* Indique si une adresse est retenue comme point de passage. */ +static bool is_addr_in_branch(const branch_info *, const vmpa_t *, bool); + +/* Identifie les différents points de passage d'une branche. */ +static void find_next_jumps(GArchInstruction *, vmpa_t, vmpa_t, branch_info *); + +/* Retrouve le point de ralliement entre deux branches. */ +static vmpa_t compute_first_common_addr(branch_info *, branch_info *); + + + + + + + + + +/****************************************************************************** +* * +* Paramètres : info = informations à consulter. * +* addr = adresse à rechercher. * +* fast = autorise une recherche rapide. * +* * +* Description : Indique si une adresse est retenue comme point de passage. * +* * +* Retour : true si le jalon est déjà dans la liste, false sinon. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static bool is_addr_in_branch(const branch_info *info, const vmpa_t *addr, bool fast) +{ + bool result; /* Bilan à retourner */ + size_t i; /* Boucle de parcours */ + void *ptr; /* Résultat des recherches */ + + result = false; + + if (!fast) + for (i = 0; i < info->count && !result; i++) + result = (info->jumps[i] == *addr); + + else + { + ptr = bsearch(addr, info->jumps, info->count, sizeof(vmpa_t), (__compar_fn_t)compare_vmpa); + result = (ptr != NULL); + } + + return result; + +} + + +/****************************************************************************** +* * +* Paramètres : instrs = ensemble des instructions d'assemblage. * +* start = adresse de début du bloc. * +* end = adresse de fin du bloc (exclusive). * +* count = nombre de sauts détectés. [OUT] * +* * +* Description : Identifie les différents points de passage d'une branche. * +* * +* Retour : Jalons dans le flot d'exécution. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void find_next_jumps(GArchInstruction *instrs, vmpa_t start, vmpa_t end, branch_info *info) +{ + GArchInstruction *iter; /* Boucle de parcours #1 */ + GArchInstruction **dests; /* Instr. visée par une autre */ + InstructionLinkType *types; /* Type de lien entre lignes */ + size_t dcount; /* Nombre de liens de dest. */ + size_t i; /* Boucle de parcours #2 */ + vmpa_t addr; /* Adresse de la destination */ + + /* On évite de boucler... */ + if (is_addr_in_branch(info, &start, false)) + return; + + info->jumps = (vmpa_t *)realloc(info->jumps, ++(info->count) * sizeof(vmpa_t)); + info->jumps[info->count - 1] = start; + + /* On suit le flot jusqu'à la prochaine bifurcation */ + for (iter = g_arch_instruction_find_by_address(instrs, start, true); + iter != NULL; + iter = g_arch_instruction_get_next_iter(instrs, iter, end)) + { + if (!g_arch_instruction_has_destinations(iter)) + continue; + + dcount = g_arch_instruction_get_destinations(iter, &dests, &types); + + for (i = 0; i < dcount; i++) + switch (types[i]) + { + case ILT_EXEC_FLOW: + case ILT_JUMP: + case ILT_JUMP_IF_TRUE: + case ILT_JUMP_IF_FALSE: + g_arch_instruction_get_location(dests[i], NULL, NULL, &addr); + find_next_jumps(instrs, addr, end, info); + break; + + default: + break; + + } + + break; + + } + + /* Si on termine... */ + if (iter != NULL && !is_addr_in_branch(info, &end, false)) + { + info->jumps = (vmpa_t *)realloc(info->jumps, ++(info->count) * sizeof(vmpa_t)); + info->jumps[info->count - 1] = end; + } + +} + + +/****************************************************************************** +* * +* Paramètres : a = premier ensemble de jalons à parcourir. * +* b = second ensemble de jalons à parcourir. * +* * +* Description : Retrouve le point de ralliement entre deux branches. * +* * +* Retour : Adresse commune à deux branches. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static vmpa_t compute_first_common_addr(branch_info *a, branch_info *b) +{ + vmpa_t result; /* Adresse trouvée à retourner */ + size_t i; /* Boucle de parcours */ + + /* Valeur conceptuellement impossible à renvoyer */ + result = VMPA_MAX; + + //qsort(a->jumps, a->count, sizeof(vmpa_t), (__compar_fn_t)compare_vmpa); + //qsort(b->jumps, b->count, sizeof(vmpa_t), (__compar_fn_t)compare_vmpa); + + for (i = 0; i < a->count && result == VMPA_MAX; i++) + if (is_addr_in_branch(b, &a->jumps[i], false)) + result = a->jumps[i]; + + return result; + +} + + + +#include "../../arch/processor.h" + + +/****************************************************************************** +* * +* Paramètres : instrs = ensemble des instructions d'assemblage. * +* start = adresse de début du bloc. * +* end = adresse de fin du bloc (exclusive). * +* stop = adresse d'arrêt en cas de saut ou VMPA_MAX. * +* ctx = contexte de soutien à associer à l'opération. * +* * +* Description : Procède à la décompilation basique d'un bloc déterminé. * +* * +* Retour : Instructions créées et enregistrées, ou NULL si erreur. * +* * +* Remarques : - * +* * +******************************************************************************/ + +GDecInstruction *build_decompiled_block(GArchInstruction *instrs, vmpa_t start, vmpa_t end, vmpa_t stop, GDecContext *ctx) +{ + GDecInstruction *result; /* Instructions décompilées */ + GArchInstruction *iter; /* Boucle de parcours */ + + GDecInstruction *pite; /* IfThenElse potientiel... */ + + GArchInstruction **dests; /* Instr. visée par une autre */ + InstructionLinkType *types; /* Type de lien entre lignes */ + size_t dcount; /* Nombre de liens de dest. */ + + size_t i; /* Boucle de parcours */ + vmpa_t addr; /* Adresse de la destination */ + + branch_info true_branch; /* Branche 'condition vraie' */ + branch_info false_branch; /* Branche 'condition fausse' */ + GDecInstruction *true_dinstr; /* Décompilation 'cond vraie' */ + GDecInstruction *false_dinstr; /* Décompilation 'cond fausse' */ + + vmpa_t next_addr; /* Prochaine instruction visée */ + + GDecInstruction *first; /* Première décompilation */ + GDecInstruction *dinstr; /* Nouvelle décompilation */ + + + GExeFormat *format; /* Format du binaire fourni */ + GArchProcessor *proc; /* Architecture du binaire */ + GDecContext *context; /* Contexte pour la décompil. */ + + + result = NULL; + + printf("[+] processing 0x%08llx -> 0x%08llx... stop @ 0x%08llx\n", start, end, stop); + + for (iter = g_arch_instruction_find_by_address(instrs, start, true); + iter != NULL; + ) + { + /* On s'arrêter si l'instruction est déjà décompilée */ + if (g_object_get_data(G_OBJECT(iter), "decomp_done") != NULL) break; + g_object_set_data(G_OBJECT(iter), "decomp_done", iter); + + pite = g_arch_instruction_decompile(iter, ctx); + + g_arch_instruction_get_location(iter, NULL, NULL, &addr); + printf(" --- decomp %p @ 0x%08llx\n", pite, addr); + + /* On n'approfondit que les chemins qui se séparent */ + if (!g_arch_instruction_has_destinations(iter)) + { + iter = g_arch_instruction_get_next_iter(instrs, iter, end); + continue; + } + + /* Adaptations en fonction du type de bifurcation */ + + dcount = g_arch_instruction_get_destinations(iter, &dests, &types); + + next_addr = 0; + memset(&true_branch, 0, sizeof(branch_info)); + memset(&false_branch, 0, sizeof(branch_info)); + + for (i = 0; i < dcount; i++) + switch (types[i]) + { + case ILT_EXEC_FLOW: + case ILT_JUMP: + g_arch_instruction_get_location(dests[i], NULL, NULL, &next_addr); + break; + + case ILT_JUMP_IF_TRUE: + g_arch_instruction_get_location(dests[i], NULL, NULL, &addr); + find_next_jumps(instrs, addr, end, &true_branch); + break; + + case ILT_JUMP_IF_FALSE: + g_arch_instruction_get_location(dests[i], NULL, NULL, &addr); + find_next_jumps(instrs, addr, end, &false_branch); + break; + + default: + next_addr = VMPA_MAX; + break; + + } + + if (next_addr == VMPA_MAX) + { + iter = g_arch_instruction_get_next_iter(instrs, iter, end); + continue; + } + + else if (true_branch.count > 0 || false_branch.count > 0) + { + next_addr = compute_first_common_addr(&true_branch, &false_branch); + next_addr = MIN(next_addr, end); + + format = g_object_get_data(G_OBJECT(ctx), "format"); + proc = get_arch_processor_from_format(G_EXE_FORMAT(format)); + + context = g_arch_processor_get_decomp_context(proc); + + g_object_set_data(G_OBJECT(context), "format", g_object_get_data(G_OBJECT(ctx), "format")); + g_object_set_data(G_OBJECT(context), "routine", g_object_get_data(G_OBJECT(ctx), "routine")); + g_dec_context_set_max_address(context, next_addr); + + true_dinstr = build_decompiled_block(instrs, true_branch.jumps[0], + end, next_addr, context); + + + context = g_arch_processor_get_decomp_context(proc); + + g_object_set_data(G_OBJECT(context), "format", g_object_get_data(G_OBJECT(ctx), "format")); + g_object_set_data(G_OBJECT(context), "routine", g_object_get_data(G_OBJECT(ctx), "routine")); + g_dec_context_set_max_address(context, next_addr); + + false_dinstr = build_decompiled_block(instrs, false_branch.jumps[0], + end, next_addr, context); + + + printf("{branch : %p (0x%08llx) | %p (0x%08llx)\n", + true_dinstr, true_branch.jumps[0], + false_dinstr, false_branch.jumps[0]); + + g_ite_instruction_set_branches(G_ITE_INSTRUCTION(pite), true_dinstr, false_dinstr); + + if (next_addr == end) break; + + } + + /* Détermination du prochain point de chute */ + + if (next_addr == stop) break; + + iter = g_arch_instruction_find_by_address(instrs, next_addr, true); + + } + + + + + first = g_dec_context_get_decomp_instrs(ctx); + + printf(" ... context instr : %p\n", first); + + for (dinstr = first; + dinstr != NULL; + dinstr = g_dec_instruction_get_next_iter(first, dinstr)) + { + if (result == NULL) result = g_expr_block_new(dinstr); + else g_expr_block_add_item(G_EXPR_BLOCK(result), dinstr); + + } + + printf(" ... return %p\n", result); + + return result; + +} diff --git a/src/analysis/decomp/il.h b/src/analysis/decomp/il.h new file mode 100644 index 0000000..4f38b4f --- /dev/null +++ b/src/analysis/decomp/il.h @@ -0,0 +1,38 @@ + +/* OpenIDA - Outil d'analyse de fichiers binaires + * il.h - prototypes pour la mise en place d'un langage intermédiaire + * + * Copyright (C) 2012 Cyrille Bagard + * + * This file is part of OpenIDA. + * + * OpenIDA is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * OpenIDA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Foobar. If not, see <http://www.gnu.org/licenses/>. + */ + + +#ifndef _ANALYSIS_DECOMP_IL_H +#define _ANALYSIS_DECOMP_IL_H + + +#include "../../arch/instruction.h" +#include "../../decomp/instruction.h" + + + +/* Procède à la décompilation basique d'un bloc déterminé. */ +GDecInstruction *build_decompiled_block(GArchInstruction *, vmpa_t, vmpa_t, vmpa_t, GDecContext *); + + + +#endif /* _ANALYSIS_DECOMP_IL_H */ diff --git a/src/analysis/decomp/reduce.c b/src/analysis/decomp/reduce.c new file mode 100644 index 0000000..926650d --- /dev/null +++ b/src/analysis/decomp/reduce.c @@ -0,0 +1,53 @@ + +/* OpenIDA - Outil d'analyse de fichiers binaires + * reduce.c - réduction de l'usage des [pseudo]-registres + * + * Copyright (C) 2012 Cyrille Bagard + * + * This file is part of OpenIDA. + * + * OpenIDA is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * OpenIDA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Foobar. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include "reduce.h" + + +#if 0 + + + +/* Mémorisation de l'usage de chaque variable */ +typedef struct _var_usage +{ + GDecInstruction *var; /* Variable manipulée */ + + size_t ref_counter; /* Décompte des utilisations */ + GDecInstruction *creation; /* Emplacement de la création */ + GDecInstruction *usage; /* Lieu de dernère utilisation */ + +} var_usage; + +/* Réduction des usages */ +typedef struct _usage_reduc +{ + + + +} usage_reduc; + + +#endif + + diff --git a/src/analysis/decomp/reduce.h b/src/analysis/decomp/reduce.h new file mode 100644 index 0000000..aa13d7f --- /dev/null +++ b/src/analysis/decomp/reduce.h @@ -0,0 +1,30 @@ + +/* OpenIDA - Outil d'analyse de fichiers binaires + * reduce.h - prototypes pour la réduction de l'usage des [pseudo]-registres + * + * Copyright (C) 2012 Cyrille Bagard + * + * This file is part of OpenIDA. + * + * OpenIDA is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * OpenIDA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Foobar. If not, see <http://www.gnu.org/licenses/>. + */ + + +#ifndef _ANALYSIS_DECOMP_REDUCE_H +#define _ANALYSIS_DECOMP_REDUCE_H + + + + +#endif /* _ANALYSIS_DECOMP_REDUCE_H */ diff --git a/src/analysis/routine.c b/src/analysis/routine.c index a21b6e7..7a191c9 100644 --- a/src/analysis/routine.c +++ b/src/analysis/routine.c @@ -998,7 +998,7 @@ void g_binary_routine_print_code(const GBinRoutine *routine, GLangOutput *lang, g_lang_output_start_routine_body(lang, buffer, line); if (routine->dinstr != NULL) - g_dec_instruction_print(routine->dinstr, buffer, NULL, lang); + g_dec_instruction_print(routine->dinstr, buffer, line, lang); g_lang_output_end_routine_body(lang, buffer); |