| // Copyright (c) 2012-2018, Najjar Chedy |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| var _ = require('underscore-node'), |
| logger = require('./logger').logger; |
| |
| var InstructionType_jmp = 0; |
| var InstructionType_conditionalJmpInst = 1; |
| var InstructionType_notRetInst = 2; |
| var InstructionType_retInst = 3; |
| |
| var isFunctionName = function (x) { |
| return x.text.trim().indexOf('.') !== 0 && |
| !x.text.includes(' for ');// deal with typeinfo name for, typeinfo for, vtable for |
| }; |
| |
| // this mess is intended to be ported to another file. |
| function separateCodeFromData(asmArr) { |
| |
| var jmpLabelRegex = /\.L{1}\d+:/; |
| |
| var isCode = function (x) { |
| return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || |
| isFunctionName(x)); |
| }; |
| |
| return _.chain(asmArr) |
| .map(_.clone) |
| .filter(isCode) |
| .value(); |
| } |
| |
| var gccX86 = { |
| filterData: separateCodeFromData, |
| isFunctionEnd: function (x) { |
| return ((x[0] !== ' ') && (x[0] !== '.') && |
| (x.indexOf(':') !== -1)); |
| }, |
| |
| isBasicBlockEnd: function (inst, prevInst) { |
| return inst[0] === '.' || prevInst.includes(' ret'); |
| }, |
| |
| getInstructionType: function (inst) { |
| if (inst.includes('jmp')) return InstructionType_jmp; |
| else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; |
| else if (!inst.includes(' ret')) return InstructionType_notRetInst; |
| else return InstructionType_retInst; |
| }, |
| |
| extractNodeName: function (inst) { |
| var name = inst.match(/\.L{1}\d+/); |
| return name + ':'; |
| }, |
| |
| isJmpInstruction: function (x) { |
| var trimed = x.trim(); |
| return ( trimed[0] === 'j'); |
| } |
| }; |
| |
| function separateCodeFromData_(asmArr) { |
| |
| var jmpLabelRegex = /\.LBB\d+_\d+:/; |
| |
| var isCode = function (x) { |
| return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || |
| isFunctionName(x)); |
| }; |
| |
| var removeComments = function (x) { |
| var pos = x.text.indexOf('#'); |
| if (pos !== -1) |
| x.text = x.text.substring(0, pos - 1); |
| return x; |
| }; |
| |
| return _.chain(asmArr) |
| .map(_.clone) |
| .filter(isCode) |
| .map(removeComments) |
| .value(); |
| } |
| |
| var clangX86 = { |
| filterData: separateCodeFromData_, |
| isFunctionEnd: function (x) { |
| return ((x[0] !== ' ') && (x[0] !== '.') && |
| (x.indexOf(':') !== -1)); |
| }, |
| |
| isBasicBlockEnd: function (inst, prevInst) { |
| return inst[0] === '.' || prevInst.includes(' ret'); |
| }, |
| |
| getInstructionType: function (inst) { |
| if (inst.includes('jmp')) return InstructionType_jmp; |
| else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; |
| else if (!inst.includes(' ret')) return InstructionType_notRetInst; |
| else return InstructionType_retInst; |
| }, |
| |
| extractNodeName: function (inst) { |
| var name = inst.match(/\.LBB\d+_\d+/); |
| return name + ':'; |
| }, |
| |
| isJmpInstruction: function (x) { |
| var trimed = x.trim(); |
| return ( trimed[0] === 'j'); |
| } |
| }; |
| |
| function ControlFlowGraph(compiler) { |
| if (compiler.includes('clang')) |
| this.rules = clangX86; |
| else |
| this.rules = gccX86; |
| } |
| |
| ControlFlowGraph.prototype.splitToFunctions = function (asmArr, isEnd) { |
| if (asmArr.length === 0) return []; |
| var result = []; |
| var first = 1; |
| var last = asmArr.length; |
| var fnRange = {start: 0, end: null}; |
| while (first !== last) { |
| if (isEnd(asmArr[first].text)) { |
| fnRange.end = first; |
| result.push(_.clone(fnRange)); |
| fnRange.start = first; |
| } |
| ++first; |
| } |
| |
| fnRange.end = last; |
| result.push(_.clone(fnRange)); |
| return result; |
| }; |
| |
| ControlFlowGraph.prototype.splitToBasicBlocks = function (asmArr, range, isEnd, isJmp) { |
| var first = range.start; |
| var last = range.end; |
| if (first === last) return []; |
| var functionName = asmArr[first].text; |
| ++first; |
| |
| var rangeBb = {nameId: functionName.substr(0, 50), start: first, end: null, actionPos: []}; |
| var result = []; |
| |
| var newRangeWith = function (oldRange, nameId, start) { |
| return {nameId: nameId, start: start, actionPos: [], end: oldRange.end}; |
| }; |
| |
| while (first < last) { |
| var inst = asmArr[first].text; |
| if (isEnd(inst, asmArr[first - 1] ? asmArr[first - 1].text : '')) { |
| rangeBb.end = first; |
| result.push(_.clone(rangeBb)); |
| //inst is expected to be .L*: where * in 1,2,... |
| rangeBb = newRangeWith(rangeBb, inst, first + 1); |
| } |
| else if (isJmp(inst)) { |
| rangeBb.actionPos.push(first); |
| } |
| ++first; |
| } |
| |
| rangeBb.end = last; |
| result.push(_.clone(rangeBb)); |
| return result; |
| }; |
| |
| ControlFlowGraph.prototype.splitToCanonicalBasicBlock = function (basicBlock) { |
| var actionPos = basicBlock.actionPos; |
| var actPosSz = actionPos.length; |
| if (actionPos[actPosSz - 1] + 1 === basicBlock.end) { |
| --actPosSz; |
| } |
| |
| if (actPosSz === 0) |
| return [{ |
| nameId: basicBlock.nameId, |
| start: basicBlock.start, |
| end: basicBlock.end |
| }]; |
| else if (actPosSz === 1) |
| return [ |
| {nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1}, |
| {nameId: basicBlock.nameId + '@' + (actionPos[0] + 1), start: actionPos[0] + 1, end: basicBlock.end} |
| ]; |
| else { |
| var first = 0; |
| var last = actPosSz; |
| var blockName = basicBlock.nameId; |
| var tmp = {nameId: blockName, start: basicBlock.start, end: actionPos[first] + 1}; |
| var result = []; |
| result.push(_.clone(tmp)); |
| while (first !== last - 1) { |
| tmp.nameId = blockName + '@' + (actionPos[first] + 1); |
| tmp.start = actionPos[first] + 1; |
| ++first; |
| tmp.end = actionPos[first] + 1; |
| result.push(_.clone(tmp)); |
| } |
| |
| tmp = {nameId: blockName + '@' + (actionPos[first] + 1), start: actionPos[first] + 1, end: basicBlock.end}; |
| result.push(_.clone(tmp)); |
| |
| return result; |
| |
| } |
| |
| }; |
| |
| ControlFlowGraph.prototype.concatInstructions = function (asmArr, first, last) { |
| return _.chain(asmArr.slice(first, last)) |
| .map(function (x) { |
| return x.text.substr(0, 50); |
| }) |
| .value() |
| .join('\n'); |
| }; |
| |
| ControlFlowGraph.prototype.makeNodes = function (asmArr, arrOfCanonicalBasicBlock) { |
| var node = {}; |
| var nodes = []; |
| |
| _.each(arrOfCanonicalBasicBlock, _.bind(function (elm) { |
| logger.debug('node name:'); |
| logger.debug(elm.nameId); |
| node.id = elm.nameId; |
| node.label = elm.nameId + ((elm.nameId.indexOf(':') !== -1) ? '' : ':') + '\n' + |
| this.concatInstructions(asmArr, elm.start, elm.end); |
| node.color = '#99ccff'; |
| node.shape = 'box'; |
| nodes.push(_.clone(node)); |
| }, this)); |
| return nodes; |
| }; |
| |
| ControlFlowGraph.prototype.makeEdges = function (asmArr, arrOfCanonicalBasicBlock, instructionType, extractNodeNameFromInstruction) { |
| var edge = {}; |
| var edges = []; |
| |
| var setEdge = function (edge, sourceNode, targetNode, color) { |
| edge.from = sourceNode; |
| edge.to = targetNode; |
| edge.arrows = 'to'; |
| edge.color = color; |
| }; |
| var isBasicBlockEnd = this.rules.isBasicBlockEnd; |
| |
| var hasName = function (asmArr, cbb) { |
| var asm = asmArr[cbb.end]; |
| return asm ? isBasicBlockEnd(asm.text, '') : false; |
| }; |
| |
| var generateName = function (name, suffix) { |
| var pos = name.indexOf('@'); |
| if (pos === -1) |
| return name + '@' + suffix; |
| |
| return name.substring(0, pos + 1) + suffix; |
| }; |
| |
| // note: x.end-1 possible value: jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow |
| |
| _.each(arrOfCanonicalBasicBlock, function (x) { |
| var targetNode; |
| var lastInst = asmArr[x.end - 1].text; |
| switch (instructionType(lastInst)) { |
| case InstructionType_jmp: { |
| |
| //we have to deal only with jmp destination, jmp instruction are always taken. |
| //edge from jump inst |
| logger.debug('jmp'); |
| targetNode = extractNodeNameFromInstruction(lastInst); |
| setEdge(edge, x.nameId, targetNode, 'blue'); |
| |
| edges.push(_.clone(edge)); |
| logger.debug(edge); |
| } |
| break; |
| case InstructionType_conditionalJmpInst: { |
| logger.debug('condit jmp'); |
| //deal with : branch taken, branch not taken |
| targetNode = extractNodeNameFromInstruction(lastInst); |
| setEdge(edge, x.nameId, targetNode, 'green'); |
| edges.push(_.clone(edge)); |
| logger.debug(edge); |
| |
| targetNode = hasName(asmArr, x) ? asmArr[x.end].text : generateName(x.nameId, x.end); |
| setEdge(edge, x.nameId, targetNode, 'red'); |
| edges.push(_.clone(edge)); |
| |
| logger.debug(edge); |
| } |
| break; |
| case InstructionType_notRetInst: { |
| //precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block) |
| //note : asmArr[x.end] expected to be .L*:(name of a basic block) |
| // this .L*: has to be exactly after the last instruction in the current canocial basic block |
| if (asmArr[x.end]) { |
| targetNode = asmArr[x.end].text; |
| setEdge(edge, x.nameId, targetNode, 'grey'); |
| edges.push(_.clone(edge)); |
| logger.debug('not ret inst'); |
| logger.debug(edge); |
| } else { |
| logger.debug(x); |
| } |
| } |
| break; |
| case InstructionType_retInst: |
| logger.debug('expect ret instruction or it\'s variants(rep ret): ' + lastInst); |
| break; |
| } |
| }); |
| logger.debug(edges); |
| return edges; |
| }; |
| |
| |
| ControlFlowGraph.prototype.generateCfgStructure = function (asmArr) { |
| var code = this.rules.filterData(asmArr); |
| var funcs = this.splitToFunctions(code, this.rules.isFunctionEnd); |
| if (!funcs.length) { |
| return funcs; |
| } |
| var result = {}; |
| _.each(funcs, _.bind(function (rng) { |
| var basicBlocks = this.splitToBasicBlocks(code, rng, this.rules.isBasicBlockEnd, |
| this.rules.isJmpInstruction); |
| var arrOfCanonicalBasicBlock = []; |
| _.each(basicBlocks, _.bind(function (elm) { |
| var tmp = this.splitToCanonicalBasicBlock(elm); |
| arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp); |
| }, this)); |
| |
| result[code[rng.start].text] = { |
| nodes: this.makeNodes(code, arrOfCanonicalBasicBlock), |
| edges: this.makeEdges(code, arrOfCanonicalBasicBlock, |
| this.rules.getInstructionType, |
| this.rules.extractNodeName) |
| }; |
| }, this)); |
| |
| return result; |
| }; |
| |
| module.exports.ControlFlowGraph = ControlFlowGraph; |
| |