summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCyrille Bagard <nocbos@gmail.com>2019-01-18 21:04:47 (GMT)
committerCyrille Bagard <nocbos@gmail.com>2019-01-18 21:04:47 (GMT)
commit5b17d8077f608406d0a1a936bdb4704efc40bf21 (patch)
treee996fd6fbe94bf2ee3f33122ec61ef1a98ea0b8f
parent5202ac214f8a35f151c0afba9323e4ae9586eef8 (diff)
Implemented "a new algorithm for identifying loops in decompilation".
-rw-r--r--python/wmzc.py236
1 files changed, 236 insertions, 0 deletions
diff --git a/python/wmzc.py b/python/wmzc.py
new file mode 100644
index 0000000..957e247
--- /dev/null
+++ b/python/wmzc.py
@@ -0,0 +1,236 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+
+import argparse
+import sys
+
+# from pychrysalide.features import *
+from pychrysalide.analysis.contents import FileContent
+from pychrysalide.analysis import StudyProject
+from pychrysalide.arch import ArchInstruction
+from pychrysalide.core import wait_for_all_global_works
+
+
+def link_type_to_str(t):
+
+ links = [ getattr(ArchInstruction, a) for a in dir(ArchInstruction) if a.startswith('ILT_') ]
+
+ return str(links[links.index(t)])[4:]
+
+
+def stringify_block(blk):
+
+ first, last = blk.boundaries
+
+ starting = '*' if len(first.sources) == 0 else ' '
+
+ desc = '%s Block @ 0x%x: %s - %s' % (starting, first.range.addr.phys, first.keyword, last.keyword)
+
+ for db, dt in blk.destinations:
+ desc += ' |-> 0x%x (%s)' % (db.boundaries[0].range.addr.phys, link_type_to_str(dt))
+
+ return desc
+
+
+def find_by_addr(grp, addr):
+
+ found = None
+
+ for g in grp:
+
+ first = g._bb.boundaries[0]
+
+ if first.range.addr.phys == addr.phys:
+ found = g
+ break
+
+ return found
+
+
+class EnhancedBlock():
+
+ def __init__(self, bb, maxlen):
+
+ self._bb = bb
+ self._maxlen = maxlen
+
+ self._traversed = False
+ self._dfsp_pos = 0
+ self._iloop_header = None
+
+ self._irreducible = False
+
+
+ def __str__(self):
+
+ if self._iloop_header is None:
+ loop_header = ' ' * self._maxlen
+ else:
+ first, last = self._iloop_header._bb.boundaries
+ loop_header = '0x%x' % first.range.addr.phys
+
+ desc = ' %s loop=%s ||' % ('I' if self._irreducible else '-', loop_header)
+
+ desc += stringify_block(self._bb)
+
+ return desc
+
+
+ def get_successors(self, grp):
+
+ result = []
+
+ for db, dt in self._bb.destinations:
+
+ succ = find_by_addr(grp, db.boundaries[0].range.addr)
+
+ if succ:
+ result.append(succ)
+
+ return result
+
+
+ def tag_lhead(self, h):
+
+ if self == h or h == None:
+ return
+
+ cur1 = self
+ cur2 = h
+
+ while cur1._iloop_header != None:
+
+ ih = cur1._iloop_header
+
+ if ih == cur2:
+ return
+
+ if ih._dfsp_pos < cur2._dfsp_pos:
+
+ cur1._iloop_header = cur2
+ cur1 = cur2
+ cur2 = ih
+
+ else:
+
+ cur1 = ih
+
+ cur1._iloop_header = cur2
+
+
+ def trav_loops_DFS(self, grp, pos):
+
+ self._traversed = True
+ self._dfsp_pos = pos
+
+ for b in self.get_successors(grp):
+
+ # Case A: new
+ if not(b._traversed):
+
+ nh = b.trav_loops_DFS(grp, pos + 1)
+ self.tag_lhead(nh)
+
+ else:
+
+ # b in DFSP(self)
+ if b._dfsp_pos > 0:
+
+ # case(B)
+ # Mark b as a loop header;
+ self.tag_lhead(b)
+
+ # Case C, do nothing
+ elif b._iloop_header is None:
+
+ pass
+
+ else:
+
+ h = b._iloop_header
+
+ # h in DFSP(self)
+ if h._dfsp_pos > 0:
+
+ # Case D
+ self.tag_lhead(h)
+
+ # h not in DFSP(self)
+ else:
+
+ # Case E, reentry
+
+ b._irreducible = True
+
+ # Mark b and (self,b) as re-entry;
+ # Mark the loop of h as irreducible;
+
+ while h._iloop_header != None:
+
+ h = h._iloop_header;
+
+ # h in DFSP(self)
+ if h._dfsp_pos > 0:
+ self.tag_lhead(h)
+ break;
+
+ # Mark the loop of h as irreducible;
+
+ # Clear self's DFSP position
+ self._dfsp_pos = 0
+
+ return self._iloop_header
+
+
+if __name__ == '__main__':
+
+ title = '%s - Implement "a new algorithm for identifying loops in decompilation".' % sys.argv[0]
+
+ parser = argparse.ArgumentParser(description=title, add_help=False)
+
+ parser.add_argument('-h', '--help', action='store_true', help='Display the command line options understood by %s.' % sys.argv[0])
+
+ parser.add_argument('binfile', type=str, help='The object file to be examined')
+ parser.add_argument('fname', type=str, help='The analyzed function to process')
+
+ args = parser.parse_args()
+
+ if args.help:
+ parser.print_help()
+ sys.exit(1)
+
+ prj = StudyProject()
+
+ cnt = FileContent(args.binfile)
+
+ prj.discover(cnt)
+
+ wait_for_all_global_works()
+
+ binary = prj.contents[0]
+
+ sym = binary.format.find_symbol_by_label(args.fname)
+
+ if not(sym):
+ print('Function "%s" not found!' % args.fname)
+ sys.exit(1)
+
+ maxlen = 0
+
+ for bb in sym.basic_blocks:
+
+ loc = '0x%x' % bb.boundaries[0].range.addr.phys
+
+ if len(loc) > maxlen:
+ maxlen = len(loc)
+
+ elist = []
+
+ for bb in sym.basic_blocks:
+ elist.append(EnhancedBlock(bb, maxlen))
+
+ elist[0].trav_loops_DFS(elist, 1)
+
+ for e in elist:
+ print(e)