From 20289d6a5d60d1bcf979ff7fdfc236486848d149 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Mon, 14 Jan 2019 01:04:49 +0100
Subject: Handled irreducible loops without blocking.

---
 src/analysis/disass/dragon.c        |   2 +
 src/analysis/disass/loop.c          | 438 ++++++++++++++++++++++++++++++++----
 src/analysis/disass/loop.h          |   4 +-
 src/analysis/disass/routines.c      |   4 +-
 tests/analysis/disass/Makefile      |   5 +-
 tests/analysis/disass/block.py      |  38 ++++
 tests/analysis/disass/irreducible.c |  37 +++
 7 files changed, 476 insertions(+), 52 deletions(-)
 create mode 100644 tests/analysis/disass/irreducible.c

diff --git a/src/analysis/disass/dragon.c b/src/analysis/disass/dragon.c
index eb91ad1..89dc784 100644
--- a/src/analysis/disass/dragon.c
+++ b/src/analysis/disass/dragon.c
@@ -817,6 +817,8 @@ GBlockList *translate_dragon_knight(const dragon_knight *knight, GLoadedBinary *
 
     get_dragon_knight_content(knight, &nodes, &count);
 
+    compute_all_dominators(nodes, count);
+
     result = g_block_list_new(count);
 
     for (i = 0; i < count; i++)
diff --git a/src/analysis/disass/loop.c b/src/analysis/disass/loop.c
index b41e6b2..9e964e5 100644
--- a/src/analysis/disass/loop.c
+++ b/src/analysis/disass/loop.c
@@ -24,18 +24,140 @@
 #include "loop.h"
 
 
+#include <assert.h>
+#include <malloc.h>
 
-/* Matérialise les liens de retour arrière en tant que boucles. */
-static void detect_back_edges(dragon_node *, size_t);
+
+#include "block.h"
+
+
+
+/**
+ * Adaptation de l'algorithme : "A New Algorithm for Identifying Loops in Decompilation".
+ */
+
+
+
+/* Informations associées à un bloc */
+typedef struct _bblock_info_t
+{
+    bool traversed;                         /* Etat du traitement          */
+    unsigned int dfsp_pos;                  /* Indice dans un chemin       */
+    struct _bblock_info_t *iloop_header;    /* Pointeur de bloc d'entête   */
+
+    bool irreducible;                       /* Détection d'irreducible     */
+
+} bblock_info_t;
+
+/* Informations associées à un lien */
+typedef struct _bblock_link_t
+{
+    GCodeBlock *linked;                     /* Destination du bloc visé    */
+    InstructionLinkType type;               /* Type de liaison             */
+    bblock_info_t *info;                    /* Informations sur ce bloc    */
+
+} bblock_link_t;
+
+
+/* Détermine les liens vers des blocs successeurs d'un bloc. */
+static bblock_link_t *get_block_links(GCodeBlock *, bblock_info_t *, bool, size_t *);
+
+#define get_block_predecessors(b, i, c) \
+    get_block_links(b, i, false, c)
+
+#define get_block_successors(b, i, c) \
+    get_block_links(b, i, true, c)
+
+/* Libère de la mémoire les liens vers des blocs successeurs. */
+static void delete_block_links(bblock_link_t *, size_t);
+
+/* Marque un bloc comme étant un entête de boucle. */
+static void tag_loop_head(bblock_info_t *, bblock_info_t *);
+
+/* Parcourt une arborescence de blocs à la recherche de boucles. */
+static bblock_info_t *traverse_basic_blocks_dfs(bblock_info_t *, GBlockList *, bblock_info_t *, unsigned int);
+
+/* Définit les boucles entre un ensemble de blocs basiques. */
+static void define_basic_blocks_loops(GBlockList *list, bblock_info_t *);
 
 
 
 /******************************************************************************
 *                                                                             *
-*  Paramètres  : nodes = liste de noeuds détectés dans une routine.           *
-*                count = taille de cette liste de noeuds à traiter.           *
+*  Paramètres  : block = bloc de code, basique, à considérer.                 *
+*                info  = informations complémentaires quant aux blocs.        *
+*                succ  = true si les successeurs sont visés, false sinon.     *
+*                count = nombre de liens effectivement utiles. [OUT]          *
+*                                                                             *
+*  Description : Détermine les liens vers des blocs successeurs d'un bloc.    *
+*                                                                             *
+*  Retour      : Liste de liens retrouvés ou NULL si vraiment aucun.          *
 *                                                                             *
-*  Description : Matérialise les liens de retour arrière en tant que boucles. *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static bblock_link_t *get_block_links(GCodeBlock *block, bblock_info_t *info, bool succ, size_t *count)
+{
+    bblock_link_t *result;                  /* Liste à retourner           */
+    size_t allocated;                       /* Nombre d'éléments au maximum*/
+    block_link_t *links;                    /* Liens associés au bloc      */
+    size_t i;                               /* Boucle de parcours          */
+    bblock_link_t *new;                     /* Mémorisation de ce lien     */
+
+    if (succ)
+        links = g_code_block_get_destinations(block, &allocated);
+    else
+        links = g_code_block_get_sources(block, &allocated);
+
+    result = malloc(allocated * sizeof(bblock_link_t));
+
+    *count = 0;
+
+    for (i = 0; i < allocated; i++)
+    {
+        switch (links[i].type)
+        {
+            case ILT_EXEC_FLOW:
+            case ILT_JUMP:
+            case ILT_CASE_JUMP:
+            case ILT_JUMP_IF_TRUE:
+            case ILT_JUMP_IF_FALSE:
+
+                new = &result[(*count)++];
+
+                new->linked = links[i].linked;
+                g_object_ref(G_OBJECT(new->linked));
+
+                new->type = links[i].type;
+
+                new->info = info + g_code_block_get_index(new->linked);
+
+                break;
+
+            default:
+                break;
+
+        }
+
+        unref_block_link(&links[i]);
+
+    }
+
+    if (links != NULL)
+        free(links);
+
+    return result;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : links = liste de liens retrouvés ou NULL si vraiment aucun.  *
+*                count = nombre de liens effectivement utiles.                *
+*                                                                             *
+*  Description : Libère de la mémoire les liens vers des blocs successeurs.   *
 *                                                                             *
 *  Retour      : -                                                            *
 *                                                                             *
@@ -43,61 +165,275 @@ static void detect_back_edges(dragon_node *, size_t);
 *                                                                             *
 ******************************************************************************/
 
-static void detect_back_edges(dragon_node *nodes, size_t count)
+static void delete_block_links(bblock_link_t *links, size_t count)
 {
-    size_t k;                               /* Boucle de parcours #1       */
-    dragon_node *node;                      /* Noeud à traiter             */
-    const bitfield_t *dominators;           /* Liste de dominateurs        */
-    GArchInstruction *last;                 /* Instruction finale de noeud */
-    size_t dcount;                          /* Nombre de liens de dest.    */
-    size_t i;                               /* Boucle de parcours #2       */
-    const instr_link_t *dest;               /* Instr. visée par une autre  */
-    dragon_node *target;                    /* Noeud référence à tester    */
-    size_t id;                              /* Indice du bit associé       */
-
-    for (k = 0; k < count; k++)
+    size_t i;                               /* Boucle de parcours          */
+
+    for (i = 0; i < count; i++)
+        g_object_unref(G_OBJECT(links[i].linked));
+
+    if (links != NULL)
+        free(links);
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : blk = bloc à mettre à jour.                                  *
+*                hdr = bloc d'entête à mémoriser.                             *
+*                                                                             *
+*  Description : Marque un bloc comme étant un entête de boucle.              *
+*                                                                             *
+*  Retour      : -                                                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static void tag_loop_head(bblock_info_t *blk, bblock_info_t *hdr)
+{
+    bblock_info_t *cur1;                    /* Boucle de parcours #1       */
+    bblock_info_t *cur2;                    /* Boucle de parcours #2       */
+    bblock_info_t *ih;                      /* Boucle de parcours #3       */
+
+    if (blk == hdr || hdr == NULL)
+        goto done;
+
+    cur1 = blk;
+    cur2 = hdr;
+
+    while (cur1->iloop_header != NULL)
     {
-        node = get_dragon_node(nodes, k);
+        ih = cur1->iloop_header;
+
+        if (ih == cur2)
+            goto done;
+
+        if (ih->dfsp_pos < cur2->dfsp_pos)
+        {
+            cur1->iloop_header = cur2;
+            cur1 = cur2;
+            cur2 = ih;
+        }
+
+        else
+            cur1 = ih;
 
-        dominators = get_domination_bits(node);
+    }
 
-        get_dragon_node_bounding_instructions(node, NULL, &last);
+    cur1->iloop_header = cur2;
 
-        g_arch_instruction_lock_dest(last);
-        dcount = g_arch_instruction_count_destinations(last);
+ done:
 
-        for (i = 0; i < dcount; i++)
+    ;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : root = élément racine à considérer.                          *
+*                list = liste de blocs de code à consulter.                   *
+*                info = informations complémentaires quant aux blocs.         *
+*                pos  = position dans le chemin courant.                      *
+*                                                                             *
+*  Description : Parcourt une arborescence de blocs à la recherche de boucles.*
+*                                                                             *
+*  Retour      : Entête de boucle trouvée ou NULL.                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static bblock_info_t *traverse_basic_blocks_dfs(bblock_info_t *root, GBlockList *list, bblock_info_t *info, unsigned int pos)
+{
+    GCodeBlock *block;                      /* Bloc basique courant        */
+    bblock_link_t *links;                   /* Liste de successeurs        */
+    size_t count;                           /* Taille de cette liste       */
+    size_t i;                               /* Boucle de parcours          */
+    bblock_info_t *succ;                    /* Successeur à traiter        */
+    bblock_info_t *nh;                      /* Nouvel entête de boucle     */
+    bblock_info_t *h;                       /* Entête de boucle            */
+
+    root->traversed = true;
+    root->dfsp_pos = pos;
+
+    block = g_block_list_get_block(list, root - info);
+
+    links = get_block_successors(block, info, &count);
+
+    for (i = 0; i < count; i++)
+    {
+        succ = links[i].info;
+
+        /* Cas A : bloc jamais traversé */
+        if (!succ->traversed)
         {
-            dest = g_arch_instruction_get_destination(last, i);
+            nh = traverse_basic_blocks_dfs(succ, list, info, pos + 1);
+            tag_loop_head(root, nh);
+        }
 
-            switch (dest->type)
+        else
+        {
+            /* Le successeur est dans DFSP(root) */
+            if (succ->dfsp_pos > 0)
             {
-                case ILT_EXEC_FLOW:
-                case ILT_JUMP:
-                case ILT_CASE_JUMP:
-                case ILT_JUMP_IF_TRUE:
-                case ILT_JUMP_IF_FALSE:
+                /* Cas B : on le marque en tant qu'entête de boucle */
+                tag_loop_head(root, succ);
+            }
+
+            /* Cas C : on ne fait rien */
+            else if (succ->iloop_header == NULL)
+            {
+
+            }
 
-                    target = find_node_for_instruction(nodes, count, false, dest->linked);
-                    if (target == NULL) break;
+            else
+            {
+                h = succ->iloop_header;
+
+                /* h est dans DFSP(root) */
+                if (h->dfsp_pos > 0)
+                {
+                    /* Cas D */
+                    tag_loop_head(root, h);
+                }
+
+                /* h n'est pas dans DFSP(root) */
+                else
+                {
+                    /* Cas E : réentrance */
 
-                    id = get_dragon_node_index(nodes, target);
+                    succ->irreducible = true;
 
-                    if (test_in_bit_field(dominators, id))
-                        g_arch_instruction_change_link(last, dest->linked, dest->type, ILT_LOOP);
+                    while (h->iloop_header != NULL)
+                    {
+                        h = h->iloop_header;
 
-                    break;
+                        /* h est dans DFSP(root) */
+                        if (h->dfsp_pos > 0)
+                        {
+                            tag_loop_head(root, h);
+                            break;
+                        }
 
-                default:
-                    break;
+                    }
+
+                }
 
             }
 
-            unref_instr_link(dest);
+        }
+
+    }
+
+    delete_block_links(links, count);
+
+    g_object_unref(G_OBJECT(block));
+
+    /* Efface la position du bloc courant dans le chemin */
+    root->dfsp_pos = 0;
+
+    return root->iloop_header;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : list = liste de blocs de code à consulter.                   *
+*                info = informations complémentaires quant aux blocs.         *
+*                                                                             *
+*  Description : Définit les boucles entre un ensemble de blocs basiques.     *
+*                                                                             *
+*  Retour      : -                                                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static void define_basic_blocks_loops(GBlockList *list, bblock_info_t *info)
+{
+    size_t available;                       /* Quantité de blocs présents  */
+    size_t i;                               /* Boucle de parcours #1       */
+    bblock_info_t *iter;                    /* Boucle de parcours #2       */
+    GCodeBlock *block;                      /* Bloc basique courant        */
+    bblock_link_t *links;                   /* Liste de successeurs        */
+    size_t count;                           /* Taille de cette liste       */
+    size_t k;                               /* Boucle de parcours #3       */
+    GArchInstruction *first;                /* Instruction initiale de bloc*/
+    GArchInstruction *last;                 /* Instruction finale de bloc  */
+#ifndef NDEBUG
+    bool status;                            /* Bilan du changement         */
+#endif
+
+    available = g_block_list_count_blocks(list);
+
+    for (i = 0, iter = info; i < available; i++, iter++)
+    {
+        block = g_block_list_get_block(list, i);
+
+        if (iter->irreducible)
+        {
+            links = get_block_predecessors(block, info, &count);
+
+
+            for (k = 0; k < count; k++)
+                if (links[k].info->iloop_header == iter->iloop_header)
+                {
+                    g_basic_block_get_boundaries(G_BASIC_BLOCK(links[k].linked), NULL, &last);
+                    g_basic_block_get_boundaries(G_BASIC_BLOCK(block), &first, NULL);
+
+                    g_arch_instruction_lock_dest(last);
+
+#ifndef NDEBUG
+                    status = g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP);
+                    assert(status);
+#else
+                    g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP);
+#endif
+
+                    g_arch_instruction_unlock_dest(last);
+
+                    g_object_unref(G_OBJECT(first));
+                    g_object_unref(G_OBJECT(last));
+
+                }
+
+        }
+
+        else
+        {
+            links = get_block_successors(block, info, &count);
+
+            for (k = 0; k < count; k++)
+                if (links[k].info == iter->iloop_header)
+                {
+                    g_basic_block_get_boundaries(G_BASIC_BLOCK(block), NULL, &last);
+                    g_basic_block_get_boundaries(G_BASIC_BLOCK(links[k].linked), &first, NULL);
+
+                    g_arch_instruction_lock_dest(last);
+
+#ifndef NDEBUG
+                    status = g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP);
+                    assert(status);
+#else
+                    g_arch_instruction_change_link(last, first, links[k].type, ILT_LOOP);
+#endif
+
+                    g_arch_instruction_unlock_dest(last);
+
+                    g_object_unref(G_OBJECT(first));
+                    g_object_unref(G_OBJECT(last));
+
+                }
 
         }
 
-        g_arch_instruction_unlock_dest(last);
+        delete_block_links(links, count);
+
+        g_object_unref(G_OBJECT(block));
 
     }
 
@@ -106,7 +442,7 @@ static void detect_back_edges(dragon_node *nodes, size_t count)
 
 /******************************************************************************
 *                                                                             *
-*  Paramètres  : knight = informations quant à la complexité gérée du code.   *
+*  Paramètres  : list = liste de blocs de code à consulter.                   *
 *                                                                             *
 *  Description : Détecte les boucles dans du code machine.                    *
 *                                                                             *
@@ -116,15 +452,23 @@ static void detect_back_edges(dragon_node *nodes, size_t count)
 *                                                                             *
 ******************************************************************************/
 
-void detect_loops_in_code(dragon_knight *knight)
+void detect_loops_in_basic_blocks(GBlockList *list)
 {
-    dragon_node *nodes;                     /* Liste des noeuds détectés   */
-    size_t count;                           /* Taille de cette liste       */
+    size_t count;                           /* Quantité de blocs présents  */
+    bblock_info_t *info;                    /* Informations supplémentaires*/
 
-    get_dragon_knight_content(knight, &nodes, &count);
+    count = g_block_list_count_blocks(list);
 
-    compute_all_dominators(nodes, count);
+    if (count > 1)
+    {
+        info = calloc(count, sizeof(bblock_info_t));
+
+        traverse_basic_blocks_dfs(&info[0], list, info, 1);
+
+        define_basic_blocks_loops(list, info);
 
-    detect_back_edges(nodes, count);
+        free(info);
+
+    }
 
 }
diff --git a/src/analysis/disass/loop.h b/src/analysis/disass/loop.h
index 4e8ccb0..c8ef0e8 100644
--- a/src/analysis/disass/loop.h
+++ b/src/analysis/disass/loop.h
@@ -25,12 +25,12 @@
 #define _ANALYSIS_DISASS_LOOP_H
 
 
-#include "dragon.h"
+#include "../block.h"
 
 
 
 /* Détecte les boucles dans du code machine. */
-void detect_loops_in_code(dragon_knight *);
+void detect_loops_in_basic_blocks(GBlockList *);
 
 
 
diff --git a/src/analysis/disass/routines.c b/src/analysis/disass/routines.c
index d260bad..d288fc0 100644
--- a/src/analysis/disass/routines.c
+++ b/src/analysis/disass/routines.c
@@ -400,12 +400,12 @@ void g_routines_study_handle_blocks(GRoutinesStudy *study, GBinRoutine *routine,
 
     /* Traitement par blocs */
 
-    detect_loops_in_code(knight);
-
     blocks = translate_dragon_knight(knight, study->binary);
 
     g_binary_routine_set_basic_blocks(routine, blocks);
 
+    detect_loops_in_basic_blocks(blocks);
+
     rank_routine_blocks(routine);
 
     /* Nettoyage final */
diff --git a/tests/analysis/disass/Makefile b/tests/analysis/disass/Makefile
index 8155642..030e868 100644
--- a/tests/analysis/disass/Makefile
+++ b/tests/analysis/disass/Makefile
@@ -1,5 +1,5 @@
 
-EXECUTABLES=hello endofname
+EXECUTABLES=hello endofname irreducible
 
 all: $(EXECUTABLES)
 
@@ -9,5 +9,8 @@ hello: hello.c
 endofname: endofname.c
 	$(ARM_CROSS)gcc $< -o $@
 
+irreducible: irreducible.c
+	$(ARM_CROSS)gcc $< -o $@
+
 clean:
 	rm -f $(EXECUTABLES)
diff --git a/tests/analysis/disass/block.py b/tests/analysis/disass/block.py
index 1663150..84fa4c3 100644
--- a/tests/analysis/disass/block.py
+++ b/tests/analysis/disass/block.py
@@ -8,6 +8,7 @@
 from chrysacase import ChrysalideTestCase
 from pychrysalide.analysis.contents import FileContent
 from pychrysalide.analysis import LoadedBinary
+from pychrysalide.arch import ArchInstruction
 from pychrysalide.format.elf import ElfFormat
 import os
 import sys
@@ -28,6 +29,8 @@ class TestBasicBlocks(ChrysalideTestCase):
 
         os.system('make -C %s hello > /dev/null 2>&1' % dirpath)
 
+        os.system('make -C %s irreducible > /dev/null 2>&1' % dirpath)
+
 
     @classmethod
     def tearDownClass(cls):
@@ -72,3 +75,38 @@ class TestBasicBlocks(ChrysalideTestCase):
         self.assertEqual(found.index, 0)
 
         self.assertEqual(found.rank, 0)
+
+
+    def testIrreducible(self):
+        """Validate support for irreducible loops."""
+
+        fullname = sys.modules[self.__class__.__module__].__file__
+        filename = os.path.basename(fullname)
+
+        baselen = len(fullname) - len(filename)
+
+        cnt = FileContent(fullname[:baselen] + 'irreducible')
+        self.assertIsNotNone(cnt)
+
+        fmt = ElfFormat(cnt)
+        self.assertIsNotNone(fmt)
+
+        binary = LoadedBinary(fmt)
+        self.assertIsNotNone(binary)
+
+        binary.analyze_and_wait()
+
+        sym = fmt.find_symbol_by_label('argstr')
+        self.assertIsNotNone(sym)
+
+        found = sym.basic_blocks.find_by_starting_addr(sym.range.addr)
+        self.assertIsNotNone(found)
+
+        loop_count = 0
+
+        for blk in sym.basic_blocks:
+            for _, dt in blk.destinations:
+                if dt == ArchInstruction.ILT_LOOP:
+                    loop_count += 1
+
+        self.assertEqual(loop_count, 2)
diff --git a/tests/analysis/disass/irreducible.c b/tests/analysis/disass/irreducible.c
new file mode 100644
index 0000000..8edd592
--- /dev/null
+++ b/tests/analysis/disass/irreducible.c
@@ -0,0 +1,37 @@
+
+static void argstr(char *p, int flags)
+{
+    if (flags)
+    {
+ tilde:
+        p++;
+    }
+
+    for (;;)
+    {
+        switch (*p)
+        {
+            case '\0':
+                goto breakloop;
+
+            case ':':
+                if (*--p == '~')
+                    goto tilde;
+                continue;
+        }
+
+    }
+
+ breakloop:
+
+    ;
+
+}
+
+int main(int argc, char **argv)
+{
+    argstr(argv[0], 0);
+
+    return 0;
+
+}
-- 
cgit v0.11.2-87-g4458