From 5b17d8077f608406d0a1a936bdb4704efc40bf21 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard Date: Fri, 18 Jan 2019 22:04:47 +0100 Subject: Implemented "a new algorithm for identifying loops in decompilation". --- python/wmzc.py | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 python/wmzc.py diff --git a/python/wmzc.py b/python/wmzc.py new file mode 100644 index 0000000..957e247 --- /dev/null +++ b/python/wmzc.py @@ -0,0 +1,236 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + + +import argparse +import sys + +# from pychrysalide.features import * +from pychrysalide.analysis.contents import FileContent +from pychrysalide.analysis import StudyProject +from pychrysalide.arch import ArchInstruction +from pychrysalide.core import wait_for_all_global_works + + +def link_type_to_str(t): + + links = [ getattr(ArchInstruction, a) for a in dir(ArchInstruction) if a.startswith('ILT_') ] + + return str(links[links.index(t)])[4:] + + +def stringify_block(blk): + + first, last = blk.boundaries + + starting = '*' if len(first.sources) == 0 else ' ' + + desc = '%s Block @ 0x%x: %s - %s' % (starting, first.range.addr.phys, first.keyword, last.keyword) + + for db, dt in blk.destinations: + desc += ' |-> 0x%x (%s)' % (db.boundaries[0].range.addr.phys, link_type_to_str(dt)) + + return desc + + +def find_by_addr(grp, addr): + + found = None + + for g in grp: + + first = g._bb.boundaries[0] + + if first.range.addr.phys == addr.phys: + found = g + break + + return found + + +class EnhancedBlock(): + + def __init__(self, bb, maxlen): + + self._bb = bb + self._maxlen = maxlen + + self._traversed = False + self._dfsp_pos = 0 + self._iloop_header = None + + self._irreducible = False + + + def __str__(self): + + if self._iloop_header is None: + loop_header = ' ' * self._maxlen + else: + first, last = self._iloop_header._bb.boundaries + loop_header = '0x%x' % first.range.addr.phys + + desc = ' %s loop=%s ||' % ('I' if self._irreducible else '-', loop_header) + + desc += stringify_block(self._bb) + + return desc + + + def get_successors(self, grp): + + result = [] + + for db, dt in self._bb.destinations: + + succ = find_by_addr(grp, db.boundaries[0].range.addr) + + if succ: + result.append(succ) + + return result + + + def tag_lhead(self, h): + + if self == h or h == None: + return + + cur1 = self + cur2 = h + + while cur1._iloop_header != None: + + ih = cur1._iloop_header + + if ih == cur2: + return + + if ih._dfsp_pos < cur2._dfsp_pos: + + cur1._iloop_header = cur2 + cur1 = cur2 + cur2 = ih + + else: + + cur1 = ih + + cur1._iloop_header = cur2 + + + def trav_loops_DFS(self, grp, pos): + + self._traversed = True + self._dfsp_pos = pos + + for b in self.get_successors(grp): + + # Case A: new + if not(b._traversed): + + nh = b.trav_loops_DFS(grp, pos + 1) + self.tag_lhead(nh) + + else: + + # b in DFSP(self) + if b._dfsp_pos > 0: + + # case(B) + # Mark b as a loop header; + self.tag_lhead(b) + + # Case C, do nothing + elif b._iloop_header is None: + + pass + + else: + + h = b._iloop_header + + # h in DFSP(self) + if h._dfsp_pos > 0: + + # Case D + self.tag_lhead(h) + + # h not in DFSP(self) + else: + + # Case E, reentry + + b._irreducible = True + + # Mark b and (self,b) as re-entry; + # Mark the loop of h as irreducible; + + while h._iloop_header != None: + + h = h._iloop_header; + + # h in DFSP(self) + if h._dfsp_pos > 0: + self.tag_lhead(h) + break; + + # Mark the loop of h as irreducible; + + # Clear self's DFSP position + self._dfsp_pos = 0 + + return self._iloop_header + + +if __name__ == '__main__': + + title = '%s - Implement "a new algorithm for identifying loops in decompilation".' % sys.argv[0] + + parser = argparse.ArgumentParser(description=title, add_help=False) + + parser.add_argument('-h', '--help', action='store_true', help='Display the command line options understood by %s.' % sys.argv[0]) + + parser.add_argument('binfile', type=str, help='The object file to be examined') + parser.add_argument('fname', type=str, help='The analyzed function to process') + + args = parser.parse_args() + + if args.help: + parser.print_help() + sys.exit(1) + + prj = StudyProject() + + cnt = FileContent(args.binfile) + + prj.discover(cnt) + + wait_for_all_global_works() + + binary = prj.contents[0] + + sym = binary.format.find_symbol_by_label(args.fname) + + if not(sym): + print('Function "%s" not found!' % args.fname) + sys.exit(1) + + maxlen = 0 + + for bb in sym.basic_blocks: + + loc = '0x%x' % bb.boundaries[0].range.addr.phys + + if len(loc) > maxlen: + maxlen = len(loc) + + elist = [] + + for bb in sym.basic_blocks: + elist.append(EnhancedBlock(bb, maxlen)) + + elist[0].trav_loops_DFS(elist, 1) + + for e in elist: + print(e) -- cgit v0.11.2-87-g4458