#!/usr/bin/python # -*- coding: utf-8 -*- import argparse import sys # from pychrysalide.features import * from pychrysalide.analysis import BinRoutine from pychrysalide.analysis import LoadedBinary from pychrysalide.analysis import StudyProject from pychrysalide.analysis.contents import FileContent from pychrysalide.arch import ArchInstruction from pychrysalide.arch import vmpa from pychrysalide.core import wait_for_all_global_works from pychrysalide.format import FlatFormat from pychrysalide.glibext import BinPortion def link_type_to_str(t): links = [ getattr(ArchInstruction, a) for a in dir(ArchInstruction) if a.startswith('ILT_') ] return str(links[links.index(t)])[4:] def stringify_block(blk): first, last = blk.boundaries starting = '*' if len(first.sources) == 0 else ' ' desc = '%s Block @ 0x%x: %s - %s' % (starting, first.range.addr.phys, first.keyword, last.keyword) for db, dt in blk.destinations: desc += ' |-> 0x%x (%s)' % (db.boundaries[0].range.addr.phys, link_type_to_str(dt)) return desc def find_by_addr(grp, addr): found = None for g in grp: first = g._bb.boundaries[0] if first.range.addr.phys == addr.phys: found = g break return found class EnhancedBlock(): def __init__(self, bb, maxlen): self._bb = bb self._maxlen = maxlen self._traversed = False self._dfsp_pos = 0 self._iloop_header = None self._irreducible = False def __str__(self): if self._iloop_header is None: loop_header = ' ' * self._maxlen else: first, last = self._iloop_header._bb.boundaries loop_header = '0x%x' % first.range.addr.phys desc = ' %s loop=%s ||' % ('I' if self._irreducible else '-', loop_header) desc += stringify_block(self._bb) return desc def get_successors(self, grp): result = [] for db, dt in self._bb.destinations: succ = find_by_addr(grp, db.boundaries[0].range.addr) if succ: result.append(succ) return result def tag_lhead(self, h): if self == h or h == None: return cur1 = self cur2 = h while cur1._iloop_header != None: ih = cur1._iloop_header if ih == cur2: return if ih._dfsp_pos < cur2._dfsp_pos: cur1._iloop_header = cur2 cur1 = cur2 cur2 = ih else: cur1 = ih cur1._iloop_header = cur2 def trav_loops_DFS(self, grp, pos): self._traversed = True self._dfsp_pos = pos for b in self.get_successors(grp): # Case A: new if not(b._traversed): nh = b.trav_loops_DFS(grp, pos + 1) self.tag_lhead(nh) else: # b in DFSP(self) if b._dfsp_pos > 0: # case(B) # Mark b as a loop header; self.tag_lhead(b) # Case C, do nothing elif b._iloop_header is None: pass else: h = b._iloop_header # h in DFSP(self) if h._dfsp_pos > 0: # Case D self.tag_lhead(h) # h not in DFSP(self) else: # Case E, reentry b._irreducible = True # Mark b and (self,b) as re-entry; # Mark the loop of h as irreducible; while h._iloop_header != None: h = h._iloop_header; # h in DFSP(self) if h._dfsp_pos > 0: self.tag_lhead(h) break; # Mark the loop of h as irreducible; # Clear self's DFSP position self._dfsp_pos = 0 return self._iloop_header if __name__ == '__main__': title = '%s - Implement "a new algorithm for identifying loops in decompilation".' % sys.argv[0] parser = argparse.ArgumentParser(description=title, add_help=False) parser.add_argument('-h', '--help', action='store_true', help='Display the command line options understood by %s.' % sys.argv[0]) parser.add_argument('binfile', type=str, help='The object file to be examined') parser.add_argument('target', type=str, help='The analyzed function to process (name or address)') args = parser.parse_args() if args.help: parser.print_help() sys.exit(1) target = args.target cnt = FileContent(args.binfile) if target.startswith('0x'): fmt = FlatFormat(cnt) fmt.set_machine('armv7') base = vmpa(0, int(target, 16) & ~0x1) p = BinPortion(BinPortion.BPC_CODE, base, cnt.size) p.rights = BinPortion.PAC_READ | BinPortion.PAC_EXEC print(p.range) fmt.register_user_portion(p) sym = BinRoutine() sym.name = 'code' sym.range = p.range fmt.add_symbol(sym) fmt.register_code_point(int(target, 16), True) binary = LoadedBinary(fmt) binary.analyze_and_wait() target = sym.name else: prj = StudyProject() prj.discover(cnt) wait_for_all_global_works() binary = prj.contents[0] sym = binary.format.find_symbol_by_label(target) if not(sym): print('Function "%s" not found!' % target) sys.exit(1) maxlen = 0 for bb in sym.basic_blocks: loc = '0x%x' % bb.boundaries[0].range.addr.phys if len(loc) > maxlen: maxlen = len(loc) elist = [] for bb in sym.basic_blocks: elist.append(EnhancedBlock(bb, maxlen)) elist[0].trav_loops_DFS(elist, 1) for e in elist: print(e)