From 831f90f15a145636177c387fdd73d777a9e6d84f Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Tue, 15 Jan 2019 23:00:38 +0100
Subject: Improved complex loop detection.

---
 src/analysis/disass/loop.c          |  32 ++++-
 tests/analysis/disass/Makefile      |   5 +-
 tests/analysis/disass/block.py      |  37 ++++++
 tests/analysis/disass/evalcommand.c | 243 ++++++++++++++++++++++++++++++++++++
 4 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 tests/analysis/disass/evalcommand.c

diff --git a/src/analysis/disass/loop.c b/src/analysis/disass/loop.c
index b82d58e..4a499fd 100644
--- a/src/analysis/disass/loop.c
+++ b/src/analysis/disass/loop.c
@@ -77,6 +77,9 @@ static void tag_loop_head(bblock_info_t *, bblock_info_t *);
 /* Parcourt une arborescence de blocs à la recherche de boucles. */
 static bblock_info_t *traverse_basic_blocks_dfs(bblock_info_t *, GBlockList *, bblock_info_t *, unsigned int);
 
+/* Indique si une boucle doit être définie. */
+static bool should_be_natural_loop_link(bblock_info_t *, bblock_info_t *);
+
 /* Définit les boucles entre un ensemble de blocs basiques. */
 static void define_basic_blocks_loops(GBlockList *list, bblock_info_t *);
 
@@ -342,6 +345,33 @@ static bblock_info_t *traverse_basic_blocks_dfs(bblock_info_t *root, GBlockList
 
 /******************************************************************************
 *                                                                             *
+*  Paramètres  : dest   = informations du bloc de destination.                *
+*                header = informations de l'entête de boucle.                 *
+*                                                                             *
+*  Description : Indique si une boucle doit être définie.                     *
+*                                                                             *
+*  Retour      : true si une boucle naturelle est bien présente.              *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static bool should_be_natural_loop_link(bblock_info_t *dest, bblock_info_t *header)
+{
+    bool result;                            /* Conclusion à retourner      */
+
+    result = (dest == header);
+
+    if (!result && header != NULL)
+        result = should_be_natural_loop_link(dest, header->iloop_header);
+
+    return result;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
 *  Paramètres  : list = liste de blocs de code à consulter.                   *
 *                info = informations complémentaires quant aux blocs.         *
 *                                                                             *
@@ -408,7 +438,7 @@ static void define_basic_blocks_loops(GBlockList *list, bblock_info_t *info)
             links = get_block_successors(block, info, &count);
 
             for (k = 0; k < count; k++)
-                if (links[k].info == iter->iloop_header
+                if (should_be_natural_loop_link(links[k].info, iter->iloop_header)
                     /**
                      * Il se peut qu'un bloc fasse référence à lui même !
                      *
diff --git a/tests/analysis/disass/Makefile b/tests/analysis/disass/Makefile
index ef70dec..17df230 100644
--- a/tests/analysis/disass/Makefile
+++ b/tests/analysis/disass/Makefile
@@ -1,5 +1,5 @@
 
-EXECUTABLES=hello endofname irreducible selfloop
+EXECUTABLES=hello endofname irreducible selfloop evalcommand
 
 all: $(EXECUTABLES)
 
@@ -15,5 +15,8 @@ irreducible: irreducible.c
 selfloop: selfloop.c
 	$(ARM_CROSS)gcc $< -o $@
 
+evalcommand: evalcommand.c
+	$(ARM_CROSS)gcc $< -o $@
+
 clean:
 	rm -f $(EXECUTABLES)
diff --git a/tests/analysis/disass/block.py b/tests/analysis/disass/block.py
index 0b4f3dd..f8e6fe9 100644
--- a/tests/analysis/disass/block.py
+++ b/tests/analysis/disass/block.py
@@ -33,6 +33,8 @@ class TestBasicBlocks(ChrysalideTestCase):
 
         os.system('make -C %s selfloop > /dev/null 2>&1' % dirpath)
 
+        os.system('make -C %s evalcommand > /dev/null 2>&1' % dirpath)
+
 
     @classmethod
     def tearDownClass(cls):
@@ -147,3 +149,38 @@ class TestBasicBlocks(ChrysalideTestCase):
                     loop_count += 1
 
         self.assertEqual(loop_count, 1)
+
+
+    def testComplexLoopBlock(self):
+        """Validate support for complex loop blocks."""
+
+        fullname = sys.modules[self.__class__.__module__].__file__
+        filename = os.path.basename(fullname)
+
+        baselen = len(fullname) - len(filename)
+
+        cnt = FileContent(fullname[:baselen] + 'evalcommand')
+        self.assertIsNotNone(cnt)
+
+        fmt = ElfFormat(cnt)
+        self.assertIsNotNone(fmt)
+
+        binary = LoadedBinary(fmt)
+        self.assertIsNotNone(binary)
+
+        binary.analyze_and_wait()
+
+        sym = fmt.find_symbol_by_label('evalcommand')
+        self.assertIsNotNone(sym)
+
+        found = sym.basic_blocks.find_by_addr(sym.range.addr)
+        self.assertIsNotNone(found)
+
+        loop_count = 0
+
+        for blk in sym.basic_blocks:
+            for _, dt in blk.destinations:
+                if dt == ArchInstruction.ILT_LOOP:
+                    loop_count += 1
+
+        self.assertEqual(loop_count, 3)
diff --git a/tests/analysis/disass/evalcommand.c b/tests/analysis/disass/evalcommand.c
new file mode 100644
index 0000000..f6effc3
--- /dev/null
+++ b/tests/analysis/disass/evalcommand.c
@@ -0,0 +1,243 @@
+
+
+/**
+ * On reproduit un code similaire à celui d'un fichier de Busybox (busybox-1.30.0/shell/ash.c).
+ *
+ * La commande de compilation est :
+ *
+ * arm-linux-gnueabi-gcc -Wp,-MD,shell/.ash.o.d   -std=gnu99 -Iinclude -Ilibbb  -include include/autoconf.h -D_GNU_SOURCE -DNDEBUG  -D"BB_VER=KBUILD_STR(1.30.0)"  -Wall -Wshadow -Wwrite-strings -Wundef -Wstrict-prototypes -Wunused -Wunused-parameter -Wunused-function -Wunused-value -Wmissing-prototypes -Wmissing-declarations -Wno-format-security -Wdeclaration-after-statement -Wold-style-definition -fno-builtin-strlen -finline-limit=0 -fomit-frame-pointer -ffunction-sections -fdata-sections -fno-guess-branch-probability -funsigned-char -static-libgcc -falign-functions=1 -falign-jumps=1 -falign-labels=1 -falign-loops=1 -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-builtin-printf -Os     -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ash)"  -D"KBUILD_MODNAME=KBUILD_STR(ash)" -c -o shell/ash.o shell/ash.c
+ *
+ */
+
+#if 0
+
+static int
+evalcommand(union node *cmd, int flags)
+{
+	static const struct builtincmd null_bltin = {
+		"\0\0", bltincmd /* why three NULs? */
+	};
+
+	union node *argp;
+	struct arglist arglist;
+	char **argv;
+	int argc;
+	const struct strlist *sp;
+	struct cmdentry cmdentry;
+	const char *path;
+	char **nargv;
+
+
+	/* First expand the arguments. */
+
+	cmdentry.cmdtype = CMDBUILTIN;
+	cmdentry.u.cmd = &null_bltin;
+	arglist.lastp = &arglist.list;
+	*arglist.lastp = NULL;
+
+	argc = 0;
+	if (cmd->ncmd.args) {
+		smallint pseudovarflag;
+
+		for (argp = cmd->ncmd.args; argp; argp = argp->narg.next) {
+			struct strlist **spp;
+
+			spp = arglist.lastp;
+			if (pseudovarflag)
+				expandarg(argp, &arglist, EXP_VARTILDE);
+
+			for (sp = *spp; sp; sp = sp->next)
+				argc++;
+		}
+	}
+
+	/* Reserve one extra spot at the front for shellexec. */
+	nargv = NULL;//stalloc(sizeof(char *) * (argc + 2));
+	argv = ++nargv;
+	*nargv = NULL;
+
+
+
+
+	path = vpath.var_text;
+
+	/* Now locate the command. */
+
+		for (;;) {
+			find_command(argv[0], &cmdentry, DO_ERR, path);
+
+#if ENABLE_ASH_CMDCMD
+			if (cmdentry.u.cmd == COMMANDCMD) {
+				nargv = parse_command_args(argv, &path);
+                break;
+
+			} else
+#endif
+				break;
+		}
+
+
+
+	return 0;
+
+}
+
+#endif
+
+
+static int __attribute__ ((naked)) evalcommand(void /*union node*/ *cmd, int flags)
+{
+
+    /*
+   138ac:       e92d401f        push    {r0, r1, r2, r3, r4, lr}
+   138b0:       e3a03002        mov     r3, #2
+   138b4:       e58d3008        str     r3, [sp, #8]
+   138b8:       e59f30b8        ldr     r3, [pc, #184]  ; 13978 <evalcommand+0xcc>
+   138bc:       e58dd004        str     sp, [sp, #4]
+   138c0:       e58d300c        str     r3, [sp, #12]
+   138c4:       e3a03000        mov     r3, #0
+   138c8:       e58d3000        str     r3, [sp]
+   138cc:       e590300c        ldr     r3, [r0, #12]
+   138d0:       e3530000        cmp     r3, #0
+   138d4:       1a00000d        bne     13910 <evalcommand+0x64>
+   138d8:       e3a00000        mov     r0, #0
+   138dc:       e59f3098        ldr     r3, [pc, #152]  ; 1397c <evalcommand+0xd0>
+   138e0:       e5800004        str     r0, [r0, #4]
+   138e4:       e5933000        ldr     r3, [r3]
+   138e8:       e3a02001        mov     r2, #1
+   138ec:       e59330f0        ldr     r3, [r3, #240]  ; 0xf0
+   138f0:       e28d1008        add     r1, sp, #8
+   138f4:       ebfffe71        bl      132c0 <find_command>
+   138f8:       e59d200c        ldr     r2, [sp, #12]
+   138fc:       e59f307c        ldr     r3, [pc, #124]  ; 13980 <evalcommand+0xd4>
+   13900:       e1520003        cmp     r2, r3
+   13904:       1a000018        bne     1396c <evalcommand+0xc0>
+   13908:       e3a01004        mov     r1, #4
+   1390c:       ea00000f        b       13950 <evalcommand+0xa4>
+   13910:       e5933004        ldr     r3, [r3, #4]
+   13914:       eaffffed        b       138d0 <evalcommand+0x24>
+   13918:       e5d23000        ldrb    r3, [r2]
+   1391c:       e353002d        cmp     r3, #45 ; 0x2d
+   13920:       1a000011        bne     1396c <evalcommand+0xc0>
+   13924:       e5d23001        ldrb    r3, [r2, #1]
+   13928:       e2820002        add     r0, r2, #2
+   1392c:       e3530000        cmp     r3, #0
+   13930:       0a00000d        beq     1396c <evalcommand+0xc0>
+   13934:       e353002d        cmp     r3, #45 ; 0x2d
+   13938:       0a000008        beq     13960 <evalcommand+0xb4>
+   1393c:       e3530070        cmp     r3, #112        ; 0x70
+   13940:       1a000009        bne     1396c <evalcommand+0xc0>
+   13944:       e4d03001        ldrb    r3, [r0], #1
+   13948:       e3530000        cmp     r3, #0
+   1394c:       1afffffa        bne     1393c <evalcommand+0x90>
+   13950:       e5b12004        ldr     r2, [r1, #4]!
+   13954:       e3520000        cmp     r2, #0
+   13958:       0a000003        beq     1396c <evalcommand+0xc0>
+   1395c:       eaffffed        b       13918 <evalcommand+0x6c>
+   13960:       e5d22002        ldrb    r2, [r2, #2]
+   13964:       e3520000        cmp     r2, #0
+   13968:       1afffff3        bne     1393c <evalcommand+0x90>
+   1396c:       e3a00000        mov     r0, #0
+   13970:       e28dd014        add     sp, sp, #20
+   13974:       e49df004        pop     {pc}            ; (ldr pc, [sp], #4)
+   13978:       00017fa4        .word   0x00017fa4
+   1397c:       000213f4        .word   0x000213f4
+   13980:       00017e60        .word   0x00017e60
+    */
+
+
+asm (
+
+     "push    {r0, r1, r2, r3, r4, lr}" "\n"
+     "mov     r3, #2"                   "\n"
+     "str     r3, [sp, #8]"             "\n"
+     "ldr     r3, [pc, #184]"           "\n"
+     "str     sp, [sp, #4]"             "\n"
+     "str     r3, [sp, #12]"            "\n"
+     "mov     r3, #0"                   "\n"
+     "str     r3, [sp]"                 "\n"
+     "ldr     r3, [r0, #12]"            "\n"
+
+     ".Lbl_138d0%=:"                    "\n"
+
+     "cmp     r3, #0"                   "\n"
+     "bne     .Lbl_13910%="             "\n"
+
+     "mov     r0, #0"                   "\n"
+     "ldr     r3, [pc, #152]"           "\n"
+     "str     r0, [r0, #4]"             "\n"
+     "ldr     r3, [r3]"                 "\n"
+     "mov     r2, #1"                   "\n"
+     "ldr     r3, [r3, #240]"           "\n"
+     "add     r1, sp, #8"               "\n"
+     "bl      .Lbl_find%="              "\n" /* <find_command> */
+     "ldr     r2, [sp, #12]"            "\n"
+     "ldr     r3, [pc, #124]"           "\n"
+     "cmp     r2, r3"                   "\n"
+     "bne     .Lbl_1396c%="             "\n"
+
+     "mov     r1, #4"                   "\n"
+     "b       .Lbl_13950%="             "\n"
+
+     ".Lbl_13910%=:"                    "\n"
+
+     "ldr     r3, [r3, #4]"             "\n"
+     "b       .Lbl_138d0%="             "\n"
+
+     ".Lbl_13918%=:"                    "\n"
+
+     "ldrb    r3, [r2]"                 "\n"
+     "cmp     r3, #45"                  "\n"
+     "bne     .Lbl_1396c%="             "\n"
+
+     "ldrb    r3, [r2, #1]"             "\n"
+     "add     r0, r2, #2"               "\n"
+     "cmp     r3, #0"                   "\n"
+     "beq     .Lbl_1396c%="             "\n"
+
+     "cmp     r3, #45"                  "\n"
+     "beq     .Lbl_13960%="             "\n"
+
+     ".Lbl_1393c%=:"                    "\n"
+
+     "cmp     r3, #112"                 "\n"
+     "bne     .Lbl_1396c%="             "\n"
+
+     "ldrb    r3, [r0], #1"             "\n"
+     "cmp     r3, #0"                   "\n"
+     "bne     .Lbl_1393c%="             "\n"
+
+     ".Lbl_13950%=:"                    "\n"
+
+     "ldr     r2, [r1, #4]!"            "\n"
+     "cmp     r2, #0"                   "\n"
+     "beq     .Lbl_1396c%="             "\n"
+     "b       .Lbl_13918%="             "\n"
+
+     ".Lbl_13960%=:"                    "\n"
+
+     "ldrb    r2, [r2, #2]"             "\n"
+     "cmp     r2, #0"                   "\n"
+     "bne     .Lbl_1393c%="             "\n"
+
+     ".Lbl_1396c%=:"                    "\n"
+
+     "mov     r0, #0"                   "\n"
+     "add     sp, sp, #20"              "\n"
+     "pop     {pc}"                     "\n"
+
+     ".word   0x00017fa4"               "\n"
+     ".word   0x000213f4"               "\n"
+     ".word   0x00017e60"               "\n"
+
+     ".Lbl_find%=:"                     "\n"
+
+     ::"r" (cmd, flags));
+
+}
+
+int main(int argc, char **argv)
+{
+    return evalcommand((void *)0, 0);
+
+}
-- 
cgit v0.11.2-87-g4458