| // Copyright (c) 2012-2017, Najjar Chedy |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| |
| |
| var _ = require('underscore-node'), |
| logger = require('./logger').logger; |
| |
| |
| var InstructionType_jmp = 0; |
| var InstructionType_conditionalJmpInst = 1; |
| var InstructionType_notRetInst = 2; |
| var InstructionType_retInst = 3; |
| |
| |
| // this mess is intended to be ported to another file. |
| function separateCodeFromData(asmArr) { |
| |
| var jmpLabelRegex = /\.L{1}\d+:/; |
| |
| var isFunctionName = function(x){ |
| return !x.text.includes('.') && |
| !x.text.includes(' for ');// deal with typeinfo name for, typeinfo for, vtable for |
| }; |
| |
| var isCode = function(x){ |
| return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || |
| isFunctionName(x)); |
| }; |
| |
| return _.chain(asmArr) |
| .map(_.clone) |
| .filter(isCode) |
| .value(); |
| } |
| |
| var gccX86 = { |
| filterData: separateCodeFromData, |
| isFunctionEnd:function(x) { return ((x[0] !== ' ') && (x[0] !== '.') && |
| (x.indexOf(':') !== -1)) ;}, |
| |
| isBasicBlockEnd: function(inst, prevInst) { return inst[0] === "." || prevInst.includes(" ret");} , |
| |
| getInstructionType:function(inst) { |
| if(inst.includes("jmp")) return InstructionType_jmp; |
| else if(inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; |
| else if(!inst.includes(" ret")) return InstructionType_notRetInst; |
| else return InstructionType_retInst; |
| }, |
| |
| extractNodeName:function(inst) { |
| var name = inst.match(/\.L{1}\d+/); |
| return name + ":"; |
| }, |
| |
| isJmpInstruction: function(x) { var trimed = x.trim(); return ( trimed[0] === 'j');} |
| }; |
| |
| function separateCodeFromData_(asmArr) { |
| |
| var jmpLabelRegex = /\.LBB\d+_\d+:/; |
| var isFunctionName = function(x){ |
| var functionNameRegex = /((\w|\(|\))*): # @\1/; |
| return functionNameRegex.test(x); |
| }; |
| |
| var isCode = function(x){ |
| return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || |
| isFunctionName(x.text)); |
| }; |
| |
| var removeComments = function(x) { |
| var pos = x.text.indexOf("#"); |
| if(pos !== -1) |
| x.text = x.text.substring(0, pos-1); |
| return x; |
| }; |
| |
| return _.chain(asmArr) |
| .map(_.clone) |
| .filter(isCode) |
| .map(removeComments) |
| .value(); |
| } |
| |
| var clangX86 = { |
| filterData: separateCodeFromData_, |
| isFunctionEnd:function(x) { return ((x[0] !== ' ') && (x[0] !== '.') && |
| (x.indexOf(':') !== -1)) ;}, |
| |
| isBasicBlockEnd: function(x) { return x[0] === ".";} , |
| |
| getInstructionType:function(inst) { |
| if(inst.includes("jmp")) return InstructionType_jmp; |
| else if(inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; |
| else if(!inst.includes(" ret")) return InstructionType_notRetInst; |
| else return InstructionType_retInst; |
| }, |
| |
| extractNodeName:function(inst) { |
| var name = inst.match(/\.LBB\d+_\d+/); |
| return name + ":"; |
| }, |
| |
| isJmpInstruction: function(x) { var trimed = x.trim(); return ( trimed[0] === 'j');} |
| }; |
| |
| function ControlFlowGraph(compiler){ |
| if(compiler.includes("clang")) |
| this.rules = clangX86; |
| else |
| this.rules = gccX86; |
| } |
| |
| ControlFlowGraph.prototype.splitToFunctions = function (asmArr, isEnd) { |
| if(asmArr.length === 0) return []; |
| var result = []; |
| var first = 0; |
| var last = asmArr.length; |
| var fnRange = {start:first, end:null}; |
| ++first; |
| while(first != last) { |
| if(isEnd(asmArr[first].text)) { |
| fnRange.end = first; |
| result.push(_.clone(fnRange)); |
| fnRange.start = first; |
| } |
| ++first; |
| } |
| |
| fnRange.end = last; |
| result.push(_.clone(fnRange)); |
| return result; |
| }; |
| |
| |
| ControlFlowGraph.prototype.splitToBasicBlocks = function (asmArr, range, isEnd, isJmp) { |
| var first = range.start; |
| var last = range.end; |
| if(first == last) return []; |
| var functionName = asmArr[first].text; |
| ++first; |
| |
| var rangeBb = {nameId:functionName.substr(0,50) , start:first , end:null, actionPos:[]}; |
| var result = []; |
| |
| var newRangeWith = function (oldRange, nameId, start) { |
| return {nameId: nameId, start: start, actionPos: [], end: oldRange.end}; |
| }; |
| |
| while(first != last) { |
| var inst = asmArr[first].text; |
| if(isEnd(inst, asmArr[first-1]? asmArr[first-1].text:"")) { |
| rangeBb.end = first; |
| result.push(_.clone(rangeBb)); |
| ++first; |
| //inst is expected to be .L*: where * in 1,2,... |
| rangeBb = newRangeWith(rangeBb, inst, first); |
| } |
| else if(isJmp(inst)) { |
| rangeBb.actionPos.push(first); |
| } |
| ++first; |
| } |
| |
| rangeBb.end = last; |
| result.push(_.clone(rangeBb)); |
| return result; |
| }; |
| |
| ControlFlowGraph.prototype.splitToCanonicalBasicBlock = function (basicBlock) { |
| var actionPos = basicBlock.actionPos; |
| var actPosSz = actionPos.length; |
| if (actionPos[actPosSz-1] + 1 == basicBlock.end){ |
| --actPosSz; |
| } |
| |
| if(actPosSz === 0) |
| return [{ |
| nameId: basicBlock.nameId, |
| start: basicBlock.start, |
| end: basicBlock.end |
| }]; |
| else if(actPosSz == 1) |
| return [ |
| {nameId: basicBlock.nameId,start: basicBlock.start,end:actionPos[0]+1}, |
| {nameId: basicBlock.nameId+"@"+ (actionPos[0]+1),start: actionPos[0]+1,end: basicBlock.end} |
| ]; |
| else { |
| var first = 0; |
| var last = actPosSz; |
| var blockName = basicBlock.nameId; |
| var tmp = {nameId:blockName, start:basicBlock.start, end:actionPos[first]+1 }; |
| var result = []; |
| result.push(_.clone(tmp)); |
| while(first != last-1) { |
| tmp.nameId = blockName + "@" + (actionPos[first]+1); |
| tmp.start = actionPos[first]+1; |
| ++first; |
| tmp.end = actionPos[first]+1; |
| result.push(_.clone(tmp)); |
| } |
| |
| tmp = {nameId:blockName + "@" + (actionPos[first] + 1), start:actionPos[first] + 1, end:basicBlock.end }; |
| result.push(_.clone(tmp)); |
| |
| return result; |
| |
| } |
| |
| }; |
| |
| |
| ControlFlowGraph.prototype.concatInstructions = function (asmArr, first, last) { |
| return _.chain(asmArr.slice(first, last)) |
| .map(function (x) { return x.text.substr(0, 50); }) |
| .value() |
| .join("\n"); |
| }; |
| |
| |
| ControlFlowGraph.prototype.makeNodes = function (asmArr, arrOfCanonicalBasicBlock) { |
| var node = {}; |
| var nodes = []; |
| |
| _.each(arrOfCanonicalBasicBlock, _.bind(function(elm){ |
| logger.debug("node name:"); |
| logger.debug(elm.nameId); |
| node.id = elm.nameId; |
| node.label = elm.nameId + ((elm.nameId.indexOf(":") !== -1)? "":":") +"\n"+ |
| this.concatInstructions(asmArr,elm.start, elm.end); |
| node.color = "#99ccff"; |
| node.shape = 'box'; |
| nodes.push(_.clone(node)); |
| }, this)); |
| return nodes; |
| }; |
| |
| ControlFlowGraph.prototype.makeEdges = function (asmArr, arrOfCanonicalBasicBlock, instructionType, extractNodeNameFromInstruction) { |
| var edge = {}; |
| var edges = []; |
| |
| var setEdge = function(edge, sourceNode, targetNode, color) { |
| edge.from = sourceNode; |
| edge.to = targetNode; |
| edge.arrows = "to"; |
| edge.color = color; |
| }; |
| var isBasicBlockEnd = this.rules.isBasicBlockEnd; |
| |
| var hasName = function(asmArr, cbb){ |
| return isBasicBlockEnd(asmArr[cbb.end].text, ""); |
| }; |
| |
| |
| |
| var generateName = function(name, suffix){ |
| var pos = name.indexOf("@"); |
| if(pos == -1) |
| return name + "@" + suffix; |
| |
| return name.substring(0, pos+1)+ suffix; |
| }; |
| |
| // note: x.end-1 possible value: jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow |
| |
| _.each(arrOfCanonicalBasicBlock, function(x){ |
| var targetNode; |
| var lastInst = asmArr[x.end-1].text; |
| switch(instructionType(lastInst)) { |
| case InstructionType_jmp: |
| { |
| |
| //we have to deal only with jmp destination, jmp instruction are always taken. |
| //edge from jump inst |
| logger.debug("jmp"); |
| targetNode = extractNodeNameFromInstruction(lastInst); |
| setEdge(edge, x.nameId, targetNode, 'blue'); |
| |
| edges.push(_.clone(edge)); |
| logger.debug(edge); |
| } |
| break; |
| case InstructionType_conditionalJmpInst: |
| { |
| logger.debug("condit jmp"); |
| //deal with : branch taken, branch not taken |
| targetNode = extractNodeNameFromInstruction(lastInst); |
| setEdge(edge, x.nameId, targetNode, 'green'); |
| edges.push(_.clone(edge)); |
| logger.debug(edge); |
| |
| targetNode = hasName(asmArr,x)? asmArr[x.end].text: generateName(x.nameId, x.end); |
| setEdge(edge, x.nameId, targetNode, "red"); |
| edges.push(_.clone(edge)); |
| |
| logger.debug(edge); |
| } |
| break; |
| case InstructionType_notRetInst: |
| { |
| //precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block) |
| //note : asmArr[x.end] expected to be .L*:(name of a basic block) |
| // this .L*: has to be exactly after the last instruction in the current canocial basic block |
| if(asmArr[x.end]){ |
| targetNode = asmArr[x.end].text; |
| setEdge(edge, x.nameId, targetNode, 'grey'); |
| edges.push(_.clone(edge)); |
| logger.debug("not ret inst"); |
| logger.debug(edge); |
| }else { |
| logger.debug(x); |
| } |
| } |
| break; |
| case InstructionType_retInst: |
| logger.debug("expect ret instruction or it's variants(rep ret): "+ lastInst); |
| break; |
| } |
| }); |
| logger.debug(edges); |
| return edges; |
| }; |
| |
| |
| ControlFlowGraph.prototype.generateCfgStructure = function (asmArr){ |
| var code = this.rules.filterData(asmArr); |
| var funcs = this.splitToFunctions(code, this.rules.isFunctionEnd); |
| if (!funcs.length) { |
| return funcs; |
| } |
| var result = {}; |
| _.each(funcs, _.bind(function(rng){ |
| var basicBlocks = this.splitToBasicBlocks(code, rng, this.rules.isBasicBlockEnd, |
| this.rules.isJmpInstruction); |
| var arrOfCanonicalBasicBlock = []; |
| _.each(basicBlocks, _.bind(function(elm){ |
| var tmp = this.splitToCanonicalBasicBlock(elm); |
| arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp); |
| }, this)); |
| |
| result[code[rng.start].text] = { |
| nodes:this.makeNodes(code, arrOfCanonicalBasicBlock), |
| edges: this.makeEdges(code, arrOfCanonicalBasicBlock, |
| this.rules.getInstructionType, |
| this.rules.extractNodeName) |
| }; |
| }, this)); |
| |
| return result; |
| }; |
| |
| module.exports.ControlFlowGraph = ControlFlowGraph; |
| |