From 20289d6a5d60d1bcf979ff7fdfc236486848d149 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard <nocbos@gmail.com> Date: Mon, 14 Jan 2019 01:04:49 +0100 Subject: Handled irreducible loops without blocking. --- src/analysis/disass/dragon.c | 2 + src/analysis/disass/loop.c | 438 ++++++++++++++++++++++++++++++++---- src/analysis/disass/loop.h | 4 +- src/analysis/disass/routines.c | 4 +- tests/analysis/disass/Makefile | 5 +- tests/analysis/disass/block.py | 38 ++++ tests/analysis/disass/irreducible.c | 37 +++ 7 files changed, 476 insertions(+), 52 deletions(-) create mode 100644 tests/analysis/disass/irreducible.c diff --git a/src/analysis/disass/dragon.c b/src/analysis/disass/dragon.c index eb91ad1..89dc784 100644 --- a/src/analysis/disass/dragon.c +++ b/src/analysis/disass/dragon.c @@ -817,6 +817,8 @@ GBlockList *translate_dragon_knight(const dragon_knight *knight, GLoadedBinary * get_dragon_knight_content(knight, &nodes, &count); + compute_all_dominators(nodes, count); + result = g_block_list_new(count); for (i = 0; i < count; i++) diff --git a/src/analysis/disass/loop.c b/src/analysis/disass/loop.c index b41e6b2..9e964e5 100644 --- a/src/analysis/disass/loop.c +++ b/src/analysis/disass/loop.c @@ -24,18 +24,140 @@ #include "loop.h" +#include <assert.h> +#include <malloc.h> -/* Matérialise les liens de retour arrière en tant que boucles. */ -static void detect_back_edges(dragon_node *, size_t); + +#include "block.h" + + + +/** + * Adaptation de l'algorithme : "A New Algorithm for Identifying Loops in Decompilation". + */ + + + +/* Informations associées à un bloc */ +typedef struct _bblock_info_t +{ + bool traversed; /* Etat du traitement */ + unsigned int dfsp_pos; /* Indice dans un chemin */ + struct _bblock_info_t *iloop_header; /* Pointeur de bloc d'entête */ + + bool irreducible; /* Détection d'irreducible */ + +} bblock_info_t; + +/* Informations associées à un lien */ +typedef struct _bblock_link_t +{ + GCodeBlock *linked; /* Destination du bloc visé */ + InstructionLinkType type; /* Type de liaison */ + bblock_info_t *info; /* Informations sur ce bloc */ + +} bblock_link_t; + + +/* Détermine les liens vers des blocs successeurs d'un bloc. */ +static bblock_link_t *get_block_links(GCodeBlock *, bblock_info_t *, bool, size_t *); + +#define get_block_predecessors(b, i, c) \ + get_block_links(b, i, false, c) + +#define get_block_successors(b, i, c) \ + get_block_links(b, i, true, c) + +/* Libère de la mémoire les liens vers des blocs successeurs. */ +static void delete_block_links(bblock_link_t *, size_t); + +/* Marque un bloc comme étant un entête de boucle. */ +static void tag_loop_head(bblock_info_t *, bblock_info_t *); + +/* Parcourt une arborescence de blocs à la recherche de boucles. */ +static bblock_info_t *traverse_basic_blocks_dfs(bblock_info_t *, GBlockList *, bblock_info_t *, unsigned int); + +/* Définit les boucles entre un ensemble de blocs basiques. */ +static void define_basic_blocks_loops(GBlockList *list, bblock_info_t *); /****************************************************************************** * * -* Paramètres : nodes = liste de noeuds détectés dans une routine. * -* count = taille de cette liste de noeuds à traiter. * +* Paramètres : block = bloc de code, basique, à considérer. * +* info = informations complémentaires quant aux blocs. * +* succ = true si les successeurs sont visés, false sinon. * +* count = nombre de liens effectivement utiles. [OUT] * +* * +* Description : Détermine les liens vers des blocs successeurs d'un bloc. * +* * +* Retour : Liste de liens retrouvés ou NULL si vraiment aucun. * * * -* Description : Matérialise les liens de retour arrière en tant que boucles. * +* Remarques : - * +* * +******************************************************************************/ + +static bblock_link_t *get_block_links(GCodeBlock *block, bblock_info_t *info, bool succ, size_t *count) +{ + bblock_link_t *result; /* Liste à retourner */ + size_t allocated; /* Nombre d'éléments au maximum*/ + block_link_t *links; /* Liens associés au bloc */ + size_t i; /* Boucle de parcours */ + bblock_link_t *new; /* Mémorisation de ce lien */ + + if (succ) + links = g_code_block_get_destinations(block, &allocated); + else + links = g_code_block_get_sources(block, &allocated); + + result = malloc(allocated * sizeof(bblock_link_t)); + + *count = 0; + + for (i = 0; i < allocated; i++) + { + switch (links[i].type) + { + case ILT_EXEC_FLOW: + case ILT_JUMP: + case ILT_CASE_JUMP: + case ILT_JUMP_IF_TRUE: + case ILT_JUMP_IF_FALSE: + + new = &result[(*count)++]; + + new->linked = links[i].linked; + g_object_ref(G_OBJECT(new->linked)); + + new->type = links[i].type; + + new->info = info + g_code_block_get_index(new->linked); + + break; + + default: + break; + + } + + unref_block_link(&links[i]); + + } + + if (links != NULL) + free(links); + + return result; + +} + + +/****************************************************************************** +* * +* Paramètres : links = liste de liens retrouvés ou NULL si vraiment aucun. * +* count = nombre de liens effectivement utiles. * +* * +* Description : Libère de la mémoire les liens vers des blocs successeurs. * * * * Retour : - * * * @@ -43,61 +165,275 @@ static void detect_back_edges(dragon_node *, size_t); * * ******************************************************************************/ -static void detect_back_edges(dragon_node *nodes, size_t count) +static void delete_block_links(bblock_link_t *links, size_t count) { - size_t k; /* Boucle de parcours #1 */ - dragon_node *node; /* Noeud à traiter */ - const bitfield_t *dominators; /* Liste de dominateurs */ - GArchInstruction *last; /* Instruction finale de noeud */ - size_t dcount; /* Nombre de liens de dest. */ - size_t i; /* Boucle de parcours #2 */ - const instr_link_t *dest; /* Instr. visée par une autre */ - dragon_node *target; /* Noeud référence à tester */ - size_t id; /* Indice du bit associé */ - - for (k = 0; k < count; k++) + size_t i; /* Boucle de parcours */ + + for (i = 0; i < count; i++) + g_object_unref(G_OBJECT(links[i].linked)); + + if (links != NULL) + free(links); + +} + + +/****************************************************************************** +* * +* Paramètres : blk = bloc à mettre à jour. * +* hdr = bloc d'entête à mémoriser. * +* * +* Description : Marque un bloc comme étant un entête de boucle. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void tag_loop_head(bblock_info_t *blk, bblock_info_t *hdr) +{ + bblock_info_t *cur1; /* Boucle de parcours #1 */ + bblock_info_t *cur2; /* Boucle de parcours #2 */ + bblock_info_t *ih; /* Boucle de parcours #3 */ + + if (blk == hdr || hdr == NULL) + goto done; + + cur1 = blk; + cur2 = hdr; + + while (cur1->iloop_header != NULL) { - node = get_dragon_node(nodes, k); + ih = cur1->iloop_header; + + if (ih == cur2) + goto done; + + if (ih->dfsp_pos < cur2->dfsp_pos) + { + cur1->iloop_header = cur2; + cur1 = cur2; + cur2 = ih; + } + + else + cur1 = ih; - dominators = get_domination_bits(node); + } - get_dragon_node_bounding_instructions(node, NULL, &last); + cur1->iloop_header = cur2; - g_arch_instruction_lock_dest(last); - dcount = g_arch_instruction_count_destinations(last); + done: - for (i = 0; i < dcount; i++) + ; + +} + + +/****************************************************************************** +* * +* Paramètres : root = élément racine à considérer. * +* list = liste de blocs de code à consulter. * +* info = informations complémentaires quant aux blocs. * +* pos = position dans le chemin courant. * +* * +* Description : Parcourt une arborescence de blocs à la recherche de boucles.* +* * +* Retour : Entête de boucle trouvée ou NULL. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static bblock_info_t *traverse_basic_blocks_dfs(bblock_info_t *root, GBlockList *list, bblock_info_t *info, unsigned int pos) +{ + GCodeBlock *block; /* Bloc basique courant */ + bblock_link_t *links; /* Liste de successeurs */ + size_t count; /* Taille de cette liste */ + size_t i; /* Boucle de parcours */ + bblock_info_t *succ; /* Successeur à traiter */ + bblock_info_t *nh; /* Nouvel entête de boucle */ + bblock_info_t *h; /* Entête de boucle */ + + root->traversed = true; + root->dfsp_pos = pos; + + block = g_block_list_get_block(list, root - info); + + links = get_block_successors(block, info, &count); + + for (i = 0; i < count; i++) + { + succ = links[i].info; + + /* Cas A : bloc jamais traversé */ + if (!succ->traversed) { - dest = g_arch_instruction_get_destination(last, i); + nh = traverse_basic_blocks_dfs(succ, list, info, pos + 1); + tag_loop_head(root, nh); + } - switch (dest->type) + else + { + /* Le successeur est dans DFSP(root) */ + if (succ->dfsp_pos > 0) { - case ILT_EXEC_FLOW: - case ILT_JUMP: - case ILT_CASE_JUMP: - case ILT_JUMP_IF_TRUE: - case ILT_JUMP_IF_FALSE: + /* Cas B : on le marque en tant qu'entête de boucle */ + tag_loop_head(root, succ); + } + + /* Cas C : on ne fait rien */ + else if (succ->iloop_header == NULL) + { + + } - target = find_node_for_instruction(nodes, count, false, dest->linked); - if (target == NULL) break; + else + { + h = succ->iloop_header; + + /* h est dans DFSP(root) */ + if (h->dfsp_pos > 0) + { + /* Cas D */ + tag_loop_head(root, h); + } + + /* h n'est pas dans DFSP(root) */ + else + { + /* Cas E : réentrance */ - id = get_dragon_node_index(nodes, target); + succ->irreducible = true; - if (test_in_bit_field(dominators, id)) - g_arch_instruction_change_link(last, dest->linked, dest->type, ILT_LOOP); + while (h->iloop_header != NULL) + { + h = h->iloop_header; - break; + /* h est dans DFSP(root) */ + if (h->dfsp_pos > 0) + { + tag_loop_head(root, h); + break; + } - default: - break; + } + + } } - unref_instr_link(dest); + } + + } + + delete_block_links(links, count); + + g_object_unref(G_OBJECT(block)); + + /* Efface la position du bloc courant dans le chemin */ + root->dfsp_pos = 0; + + return root->iloop_header; + +} + + +/****************************************************************************** +* * +* Paramètres : list = liste de blocs de code à consulter. * +* info = informations complémentaires quant aux blocs. * +* * +* Description : Définit les boucles entre un ensemble de blocs basiques. * +* * +* Retour : - * +* * +* Remarques : - * +* * +******************************************************************************/ + +static void define_basic_blocks_loops(GBlockList *list, bblock_info_t *info) +{ + size_t available; /* Quantité de blocs présents */ + size_t i; /* Boucle de parcours #1 */ + bblock_info_t *iter; /* Boucle de parcours #2 */ + GCodeBlock *block; /* Bloc basique courant */ + bblock_link_t *links; /* Liste de successeurs */ + size_t count; /* Taille de cette liste */ + size_t k; /* Boucle de parcours #3 */ + GArchInstruction *first; /* Instruction initiale de bloc*/ + GArchInstruction *last; /* Instruction finale de bloc */ +#ifndef NDEBUG + bool status; /* Bilan du changement */ +#endif + + available = g_block_list_count_blocks(list); + + for (i = 0, iter = info; i < available; i++, iter++) + { + block = g_block_list_get_block(list, i); + + if (iter->irreducible) + { + links = get_block_predecessors(block, info, &count); + + + for (k = 0; k < count; k++) + if (links[k].info->iloop_header == iter->iloop_header) + { + g_basic_block_get_boundaries(G_BASIC_BLOCK(links[k].linked), NULL, &last); + g_basic_block_get_boundaries(G_BASIC_BLOCK(block), &first, NULL); + + g_arch_instruction_lock_dest(last); + +#ifndef NDEBUG + status = g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP); + assert(status); +#else + g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP); +#endif + + g_arch_instruction_unlock_dest(last); + + g_object_unref(G_OBJECT(first)); + g_object_unref(G_OBJECT(last)); + + } + + } + + else + { + links = get_block_successors(block, info, &count); + + for (k = 0; k < count; k++) + if (links[k].info == iter->iloop_header) + { + g_basic_block_get_boundaries(G_BASIC_BLOCK(block), NULL, &last); + g_basic_block_get_boundaries(G_BASIC_BLOCK(links[k].linked), &first, NULL); + + g_arch_instruction_lock_dest(last); + +#ifndef NDEBUG + status = g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP); + assert(status); +#else + g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP); +#endif + + g_arch_instruction_unlock_dest(last); + + g_object_unref(G_OBJECT(first)); + g_object_unref(G_OBJECT(last)); + + } } - g_arch_instruction_unlock_dest(last); + delete_block_links(links, count); + + g_object_unref(G_OBJECT(block)); } @@ -106,7 +442,7 @@ static void detect_back_edges(dragon_node *nodes, size_t count) /****************************************************************************** * * -* Paramètres : knight = informations quant à la complexité gérée du code. * +* Paramètres : list = liste de blocs de code à consulter. * * * * Description : Détecte les boucles dans du code machine. * * * @@ -116,15 +452,23 @@ static void detect_back_edges(dragon_node *nodes, size_t count) * * ******************************************************************************/ -void detect_loops_in_code(dragon_knight *knight) +void detect_loops_in_basic_blocks(GBlockList *list) { - dragon_node *nodes; /* Liste des noeuds détectés */ - size_t count; /* Taille de cette liste */ + size_t count; /* Quantité de blocs présents */ + bblock_info_t *info; /* Informations supplémentaires*/ - get_dragon_knight_content(knight, &nodes, &count); + count = g_block_list_count_blocks(list); - compute_all_dominators(nodes, count); + if (count > 1) + { + info = calloc(count, sizeof(bblock_info_t)); + + traverse_basic_blocks_dfs(&info[0], list, info, 1); + + define_basic_blocks_loops(list, info); - detect_back_edges(nodes, count); + free(info); + + } } diff --git a/src/analysis/disass/loop.h b/src/analysis/disass/loop.h index 4e8ccb0..c8ef0e8 100644 --- a/src/analysis/disass/loop.h +++ b/src/analysis/disass/loop.h @@ -25,12 +25,12 @@ #define _ANALYSIS_DISASS_LOOP_H -#include "dragon.h" +#include "../block.h" /* Détecte les boucles dans du code machine. */ -void detect_loops_in_code(dragon_knight *); +void detect_loops_in_basic_blocks(GBlockList *); diff --git a/src/analysis/disass/routines.c b/src/analysis/disass/routines.c index d260bad..d288fc0 100644 --- a/src/analysis/disass/routines.c +++ b/src/analysis/disass/routines.c @@ -400,12 +400,12 @@ void g_routines_study_handle_blocks(GRoutinesStudy *study, GBinRoutine *routine, /* Traitement par blocs */ - detect_loops_in_code(knight); - blocks = translate_dragon_knight(knight, study->binary); g_binary_routine_set_basic_blocks(routine, blocks); + detect_loops_in_basic_blocks(blocks); + rank_routine_blocks(routine); /* Nettoyage final */ diff --git a/tests/analysis/disass/Makefile b/tests/analysis/disass/Makefile index 8155642..030e868 100644 --- a/tests/analysis/disass/Makefile +++ b/tests/analysis/disass/Makefile @@ -1,5 +1,5 @@ -EXECUTABLES=hello endofname +EXECUTABLES=hello endofname irreducible all: $(EXECUTABLES) @@ -9,5 +9,8 @@ hello: hello.c endofname: endofname.c $(ARM_CROSS)gcc $< -o $@ +irreducible: irreducible.c + $(ARM_CROSS)gcc $< -o $@ + clean: rm -f $(EXECUTABLES) diff --git a/tests/analysis/disass/block.py b/tests/analysis/disass/block.py index 1663150..84fa4c3 100644 --- a/tests/analysis/disass/block.py +++ b/tests/analysis/disass/block.py @@ -8,6 +8,7 @@ from chrysacase import ChrysalideTestCase from pychrysalide.analysis.contents import FileContent from pychrysalide.analysis import LoadedBinary +from pychrysalide.arch import ArchInstruction from pychrysalide.format.elf import ElfFormat import os import sys @@ -28,6 +29,8 @@ class TestBasicBlocks(ChrysalideTestCase): os.system('make -C %s hello > /dev/null 2>&1' % dirpath) + os.system('make -C %s irreducible > /dev/null 2>&1' % dirpath) + @classmethod def tearDownClass(cls): @@ -72,3 +75,38 @@ class TestBasicBlocks(ChrysalideTestCase): self.assertEqual(found.index, 0) self.assertEqual(found.rank, 0) + + + def testIrreducible(self): + """Validate support for irreducible loops.""" + + fullname = sys.modules[self.__class__.__module__].__file__ + filename = os.path.basename(fullname) + + baselen = len(fullname) - len(filename) + + cnt = FileContent(fullname[:baselen] + 'irreducible') + self.assertIsNotNone(cnt) + + fmt = ElfFormat(cnt) + self.assertIsNotNone(fmt) + + binary = LoadedBinary(fmt) + self.assertIsNotNone(binary) + + binary.analyze_and_wait() + + sym = fmt.find_symbol_by_label('argstr') + self.assertIsNotNone(sym) + + found = sym.basic_blocks.find_by_starting_addr(sym.range.addr) + self.assertIsNotNone(found) + + loop_count = 0 + + for blk in sym.basic_blocks: + for _, dt in blk.destinations: + if dt == ArchInstruction.ILT_LOOP: + loop_count += 1 + + self.assertEqual(loop_count, 2) diff --git a/tests/analysis/disass/irreducible.c b/tests/analysis/disass/irreducible.c new file mode 100644 index 0000000..8edd592 --- /dev/null +++ b/tests/analysis/disass/irreducible.c @@ -0,0 +1,37 @@ + +static void argstr(char *p, int flags) +{ + if (flags) + { + tilde: + p++; + } + + for (;;) + { + switch (*p) + { + case '\0': + goto breakloop; + + case ':': + if (*--p == '~') + goto tilde; + continue; + } + + } + + breakloop: + + ; + +} + +int main(int argc, char **argv) +{ + argstr(argv[0], 0); + + return 0; + +} -- cgit v0.11.2-87-g4458