From 2ed3c9274c8dafb660bc25743a1aac7c72c12965 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Tue, 8 Jan 2019 23:29:47 +0100
Subject: Marked the ARMv7 branch instructions with the LR register as return
 points.

---
 plugins/arm/v7/link.c             | 12 ++++-
 plugins/arm/v7/opdefs/A8827_bx.d  |  4 +-
 plugins/arm/v7/opdefs/A8828_bxj.d | 14 ++++++
 tests/analysis/disass/Makefile    |  5 +-
 tests/analysis/disass/armv7.py    | 69 +++++++++++++++++++++++++++
 tests/analysis/disass/endofname.c | 99 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 200 insertions(+), 3 deletions(-)
 create mode 100644 tests/analysis/disass/armv7.py
 create mode 100644 tests/analysis/disass/endofname.c

diff --git a/plugins/arm/v7/link.c b/plugins/arm/v7/link.c
index 4a63890..e30b0c1 100644
--- a/plugins/arm/v7/link.c
+++ b/plugins/arm/v7/link.c
@@ -62,7 +62,17 @@ void handle_armv7_conditional_branch_from_register(GArchInstruction *instr, GArc
         g_arch_instruction_set_flag(instr, AIF_RETURN_POINT);
 
     else
-        g_arch_instruction_set_flag(instr, AIF_RETURN_POINT);   /* FIXME : jump inconnu ! */
+    {
+        /**
+         * On fait un saut mais on ne sait pas vers où !
+         *
+         * Dans tous les cas, le flot d'exécution ne continue pas naturellement
+         * vers l'instruction suivante, donc on marque le branchement comme
+         * étant un point de retour.
+         */
+        g_arch_instruction_set_flag(instr, AIF_RETURN_POINT);
+
+    }
 
     g_object_unref(G_OBJECT(reg));
 
diff --git a/plugins/arm/v7/opdefs/A8827_bx.d b/plugins/arm/v7/opdefs/A8827_bx.d
index 64ad628..337728f 100644
--- a/plugins/arm/v7/opdefs/A8827_bx.d
+++ b/plugins/arm/v7/opdefs/A8827_bx.d
@@ -52,6 +52,7 @@
 	@hooks {
 
 		fetch = help_fetching_with_instruction_bx_from_thumb
+		link = handle_armv7_conditional_branch_from_register
 
 	}
 
@@ -83,7 +84,8 @@
 
 	@hooks {
 
-		fetch = help_fetching_with_instruction_bx_from_thumb
+		fetch = help_fetching_with_instruction_bx_from_arm
+		link = handle_armv7_conditional_branch_from_register
 
 	}
 
diff --git a/plugins/arm/v7/opdefs/A8828_bxj.d b/plugins/arm/v7/opdefs/A8828_bxj.d
index 7c6ddb5..f3a1bb7 100644
--- a/plugins/arm/v7/opdefs/A8828_bxj.d
+++ b/plugins/arm/v7/opdefs/A8828_bxj.d
@@ -49,6 +49,13 @@
 
 	}
 
+	@hooks {
+
+		fetch = help_fetching_with_instruction_bx_from_thumb
+		link = handle_armv7_conditional_branch_from_register
+
+	}
+
 }
 
 @encoding (A1) {
@@ -75,5 +82,12 @@
 
 	}
 
+	@hooks {
+
+		fetch = help_fetching_with_instruction_bx_from_arm
+		link = handle_armv7_conditional_branch_from_register
+
+	}
+
 }
 
diff --git a/tests/analysis/disass/Makefile b/tests/analysis/disass/Makefile
index 6f12036..8155642 100644
--- a/tests/analysis/disass/Makefile
+++ b/tests/analysis/disass/Makefile
@@ -1,10 +1,13 @@
 
-EXECUTABLES=hello
+EXECUTABLES=hello endofname
 
 all: $(EXECUTABLES)
 
 hello: hello.c
 	$(ARM_CROSS)gcc $< -o $@
 
+endofname: endofname.c
+	$(ARM_CROSS)gcc $< -o $@
+
 clean:
 	rm -f $(EXECUTABLES)
diff --git a/tests/analysis/disass/armv7.py b/tests/analysis/disass/armv7.py
new file mode 100644
index 0000000..4befdf6
--- /dev/null
+++ b/tests/analysis/disass/armv7.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+
+# S'assure du bon fonctionnement des blocs basiques
+
+
+from chrysacase import ChrysalideTestCase
+from pychrysalide.analysis.contents import FileContent
+from pychrysalide.analysis import LoadedBinary
+from pychrysalide.format.elf import ElfFormat
+import os
+import sys
+
+
+class TestARMv7(ChrysalideTestCase):
+    """TestCase for ARMv7."""
+
+    @classmethod
+    def setUpClass(cls):
+
+        super(TestARMv7, cls).setUpClass()
+
+        cls.log('Compile binary "endofname" if needed...')
+
+        fullname = sys.modules[cls.__module__].__file__
+        dirpath = os.path.dirname(fullname)
+
+        os.system('make -C %s endofname > /dev/null 2>&1' % dirpath)
+
+
+    @classmethod
+    def tearDownClass(cls):
+
+        super(TestARMv7, cls).tearDownClass()
+
+        cls.log('Delete built binaries...')
+
+        fullname = sys.modules[cls.__module__].__file__
+        dirpath = os.path.dirname(fullname)
+
+        os.system('make -C %s clean > /dev/null 2>&1' % dirpath)
+
+
+    def testBranchLR(self):
+        """Ensure some bx instructions are marked as return points."""
+
+        fullname = sys.modules[self.__class__.__module__].__file__
+        filename = os.path.basename(fullname)
+
+        baselen = len(fullname) - len(filename)
+
+        cnt = FileContent(fullname[:baselen] + 'endofname')
+        self.assertIsNotNone(cnt)
+
+        fmt = ElfFormat(cnt)
+        self.assertIsNotNone(fmt)
+
+        binary = LoadedBinary(fmt)
+        self.assertIsNotNone(binary)
+
+        binary.analyze_and_wait()
+
+        sym = fmt.find_symbol_by_label('endofname')
+        self.assertIsNotNone(sym)
+
+        block = list(sym.basic_blocks)[1]
+
+        self.assertEqual(len(block.boundaries[1].destinations), 0)
diff --git a/tests/analysis/disass/endofname.c b/tests/analysis/disass/endofname.c
new file mode 100644
index 0000000..ebe473d
--- /dev/null
+++ b/tests/analysis/disass/endofname.c
@@ -0,0 +1,99 @@
+
+#include <stdio.h>
+
+
+/**
+ * On reproduit un code similaire à celui d'un fichier de Busybox (busybox-1.30.0/libbb/libbb/endofname.c).
+ *
+ * La commande de compilation est :
+ *
+ * arm-linux-gnueabi-gcc     -I../include -I../libbb  -include ../include/autoconf.h -D_GNU_SOURCE -DNDEBUG   -fno-builtin-strlen -finline-limit=0 -fomit-frame-pointer -ffunction-sections -fdata-sections -fno-guess-branch-probability -funsigned-char -static-libgcc -falign-functions=1 -falign-jumps=1 -falign-labels=1 -falign-loops=1 -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-builtin-printf -Os -E  -c -o a.out endofname.c
+ *
+ */
+
+
+/*
+static __attribute__ ((always_inline)) __inline__ int bb_ascii_isalnum(unsigned char a)
+{
+    unsigned char b = a - '0';
+    if (b <= 9)
+        return (b <= 9);
+    b = (a|0x20) - 'a';
+    return b <= 'z' - 'a';
+}
+
+const char *endofname(const char *name)
+{
+    if (!((*name) == '_' || ((unsigned char)((((unsigned char)(*name))|0x20) - 'a') <= ('z' - 'a'))))
+        return name;
+    while (*++name) {
+        if (!((*name) == '_' || bb_ascii_isalnum((unsigned char)(*name))))
+            break;
+    }
+    return name;
+}
+*/
+
+
+void __attribute__ ((naked)) endofname(const char *name)
+{
+    /*
+      83bc:       e5d03000        ldrb    r3, [r0]
+      83c0:       e353005f        cmp     r3, #95 ; 0x5f
+      83c4:       0a000003        beq     83d8 <endofname+0x1c>
+      83c8:       e3833020        orr     r3, r3, #32
+      83cc:       e2433061        sub     r3, r3, #97     ; 0x61
+      83d0:       e3530019        cmp     r3, #25
+      83d4:       812fff1e        bxhi    lr
+      83d8:       e5f03001        ldrb    r3, [r0, #1]!
+      83dc:       e3530000        cmp     r3, #0
+      83e0:       0a000005        beq     83fc <endofname+0x40>
+      83e4:       e353005f        cmp     r3, #95 ; 0x5f
+      83e8:       0afffffa        beq     83d8 <endofname+0x1c>
+      83ec:       e2432030        sub     r2, r3, #48     ; 0x30
+      83f0:       e3520009        cmp     r2, #9
+      83f4:       9afffff7        bls     83d8 <endofname+0x1c>
+      83f8:       eafffff2        b       83c8 <endofname+0xc>
+      83fc:       e12fff1e        bx      lr
+    */
+
+asm (
+
+     "ldrb    r3, [r0]"         "\n"
+     "cmp     r3, #95"          "\n"
+     "beq     .Lbl_83d8%="      "\n"
+
+     ".Lbl_83c8%=:"             "\n"
+
+     "orr     r3, r3, #32"      "\n"
+     "sub     r3, r3, #97"      "\n"
+     "cmp     r3, #25"          "\n"
+     "bxhi    lr"               "\n"
+
+     ".Lbl_83d8%=:"             "\n"
+
+     "ldrb    r3, [r0, #1]!"    "\n"
+     "cmp     r3, #0"           "\n"
+     "beq     .Lbl_83fc%="      "\n"
+     "cmp     r3, #95"          "\n"
+     "beq     .Lbl_83d8%="      "\n"
+     "sub     r2, r3, #48"      "\n"
+     "cmp     r2, #9"           "\n"
+     "bls     .Lbl_83d8%="      "\n"
+     "b       .Lbl_83c8%="      "\n"
+
+     ".Lbl_83fc%=:"             "\n"
+
+     "bx      lr"               "\n"
+
+     ::"r" (name));
+
+}
+
+int main(int argc, char **argv)
+{
+    endofname(argv[0]);
+
+    return 0;
+
+}
-- 
cgit v0.11.2-87-g4458