blob: dc99ead61ddbd51f0b40b0b79994bdc2c802a487 [file] [log] [blame] [raw]
// Copyright (c) 2012-2017, Najjar Chedy
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
var _ = require('underscore-node'),
logger = require('./logger').logger;
var InstructionType_jmp = 0;
var InstructionType_conditionalJmpInst = 1;
var InstructionType_notRetInst = 2;
var InstructionType_retInst = 3;
// this mess is intended to be ported to another file.
function separateCodeFromData(asmArr) {
var jmpLabelRegex = /\.L{1}\d+:/;
var isFunctionName = function(x){
return !x.text.includes('.') &&
!x.text.includes(' for ');// deal with typeinfo name for, typeinfo for, vtable for
};
var isCode = function(x){
return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) ||
isFunctionName(x));
};
return _.chain(asmArr)
.map(_.clone)
.filter(isCode)
.value();
}
var gccX86 = {
filterData: separateCodeFromData,
isFunctionEnd:function(x) { return ((x[0] !== ' ') && (x[0] !== '.') &&
(x.indexOf(':') !== -1)) ;},
isBasicBlockEnd: function(inst, prevInst) { return inst[0] === "." || prevInst.includes(" ret");} ,
getInstructionType:function(inst) {
if(inst.includes("jmp")) return InstructionType_jmp;
else if(inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst;
else if(!inst.includes(" ret")) return InstructionType_notRetInst;
else return InstructionType_retInst;
},
extractNodeName:function(inst) {
var name = inst.match(/\.L{1}\d+/);
return name + ":";
},
isJmpInstruction: function(x) { var trimed = x.trim(); return ( trimed[0] === 'j');}
};
function separateCodeFromData_(asmArr) {
var jmpLabelRegex = /\.LBB\d+_\d+:/;
var isFunctionName = function(x){
var functionNameRegex = /((\w|\(|\))*): # @\1/;
return functionNameRegex.test(x);
};
var isCode = function(x){
return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) ||
isFunctionName(x.text));
};
var removeComments = function(x) {
var pos = x.text.indexOf("#");
if(pos !== -1)
x.text = x.text.substring(0, pos-1);
return x;
};
return _.chain(asmArr)
.map(_.clone)
.filter(isCode)
.map(removeComments)
.value();
}
var clangX86 = {
filterData: separateCodeFromData_,
isFunctionEnd:function(x) { return ((x[0] !== ' ') && (x[0] !== '.') &&
(x.indexOf(':') !== -1)) ;},
isBasicBlockEnd: function(x) { return x[0] === ".";} ,
getInstructionType:function(inst) {
if(inst.includes("jmp")) return InstructionType_jmp;
else if(inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst;
else if(!inst.includes(" ret")) return InstructionType_notRetInst;
else return InstructionType_retInst;
},
extractNodeName:function(inst) {
var name = inst.match(/\.LBB\d+_\d+/);
return name + ":";
},
isJmpInstruction: function(x) { var trimed = x.trim(); return ( trimed[0] === 'j');}
};
function ControlFlowGraph(compiler){
if(compiler.includes("clang"))
this.rules = clangX86;
else
this.rules = gccX86;
}
ControlFlowGraph.prototype.splitToFunctions = function (asmArr, isEnd) {
if(asmArr.length === 0) return [];
var result = [];
var first = 0;
var last = asmArr.length;
var fnRange = {start:first, end:null};
++first;
while(first != last) {
if(isEnd(asmArr[first].text)) {
fnRange.end = first;
result.push(_.clone(fnRange));
fnRange.start = first;
}
++first;
}
fnRange.end = last;
result.push(_.clone(fnRange));
return result;
};
ControlFlowGraph.prototype.splitToBasicBlocks = function (asmArr, range, isEnd, isJmp) {
var first = range.start;
var last = range.end;
if(first == last) return [];
var functionName = asmArr[first].text;
++first;
var rangeBb = {nameId:functionName.substr(0,50) , start:first , end:null, actionPos:[]};
var result = [];
var newRangeWith = function (oldRange, nameId, start) {
return {nameId: nameId, start: start, actionPos: [], end: oldRange.end};
};
while(first != last) {
var inst = asmArr[first].text;
if(isEnd(inst, asmArr[first-1]? asmArr[first-1].text:"")) {
rangeBb.end = first;
result.push(_.clone(rangeBb));
++first;
//inst is expected to be .L*: where * in 1,2,...
rangeBb = newRangeWith(rangeBb, inst, first);
}
else if(isJmp(inst)) {
rangeBb.actionPos.push(first);
}
++first;
}
rangeBb.end = last;
result.push(_.clone(rangeBb));
return result;
};
ControlFlowGraph.prototype.splitToCanonicalBasicBlock = function (basicBlock) {
var actionPos = basicBlock.actionPos;
var actPosSz = actionPos.length;
if (actionPos[actPosSz-1] + 1 == basicBlock.end){
--actPosSz;
}
if(actPosSz === 0)
return [{
nameId: basicBlock.nameId,
start: basicBlock.start,
end: basicBlock.end
}];
else if(actPosSz == 1)
return [
{nameId: basicBlock.nameId,start: basicBlock.start,end:actionPos[0]+1},
{nameId: basicBlock.nameId+"@"+ (actionPos[0]+1),start: actionPos[0]+1,end: basicBlock.end}
];
else {
var first = 0;
var last = actPosSz;
var blockName = basicBlock.nameId;
var tmp = {nameId:blockName, start:basicBlock.start, end:actionPos[first]+1 };
var result = [];
result.push(_.clone(tmp));
while(first != last-1) {
tmp.nameId = blockName + "@" + (actionPos[first]+1);
tmp.start = actionPos[first]+1;
++first;
tmp.end = actionPos[first]+1;
result.push(_.clone(tmp));
}
tmp = {nameId:blockName + "@" + (actionPos[first] + 1), start:actionPos[first] + 1, end:basicBlock.end };
result.push(_.clone(tmp));
return result;
}
};
ControlFlowGraph.prototype.concatInstructions = function (asmArr, first, last) {
return _.chain(asmArr.slice(first, last))
.map(function (x) { return x.text.substr(0, 50); })
.value()
.join("\n");
};
ControlFlowGraph.prototype.makeNodes = function (asmArr, arrOfCanonicalBasicBlock) {
var node = {};
var nodes = [];
_.each(arrOfCanonicalBasicBlock, _.bind(function(elm){
logger.debug("node name:");
logger.debug(elm.nameId);
node.id = elm.nameId;
node.label = elm.nameId + ((elm.nameId.indexOf(":") !== -1)? "":":") +"\n"+
this.concatInstructions(asmArr,elm.start, elm.end);
node.color = "#99ccff";
node.shape = 'box';
nodes.push(_.clone(node));
}, this));
return nodes;
};
ControlFlowGraph.prototype.makeEdges = function (asmArr, arrOfCanonicalBasicBlock, instructionType, extractNodeNameFromInstruction) {
var edge = {};
var edges = [];
var setEdge = function(edge, sourceNode, targetNode, color) {
edge.from = sourceNode;
edge.to = targetNode;
edge.arrows = "to";
edge.color = color;
};
var isBasicBlockEnd = this.rules.isBasicBlockEnd;
var hasName = function(asmArr, cbb){
return isBasicBlockEnd(asmArr[cbb.end].text, "");
};
var generateName = function(name, suffix){
var pos = name.indexOf("@");
if(pos == -1)
return name + "@" + suffix;
return name.substring(0, pos+1)+ suffix;
};
// note: x.end-1 possible value: jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow
_.each(arrOfCanonicalBasicBlock, function(x){
var targetNode;
var lastInst = asmArr[x.end-1].text;
switch(instructionType(lastInst)) {
case InstructionType_jmp:
{
//we have to deal only with jmp destination, jmp instruction are always taken.
//edge from jump inst
logger.debug("jmp");
targetNode = extractNodeNameFromInstruction(lastInst);
setEdge(edge, x.nameId, targetNode, 'blue');
edges.push(_.clone(edge));
logger.debug(edge);
}
break;
case InstructionType_conditionalJmpInst:
{
logger.debug("condit jmp");
//deal with : branch taken, branch not taken
targetNode = extractNodeNameFromInstruction(lastInst);
setEdge(edge, x.nameId, targetNode, 'green');
edges.push(_.clone(edge));
logger.debug(edge);
targetNode = hasName(asmArr,x)? asmArr[x.end].text: generateName(x.nameId, x.end);
setEdge(edge, x.nameId, targetNode, "red");
edges.push(_.clone(edge));
logger.debug(edge);
}
break;
case InstructionType_notRetInst:
{
//precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block)
//note : asmArr[x.end] expected to be .L*:(name of a basic block)
// this .L*: has to be exactly after the last instruction in the current canocial basic block
if(asmArr[x.end]){
targetNode = asmArr[x.end].text;
setEdge(edge, x.nameId, targetNode, 'grey');
edges.push(_.clone(edge));
logger.debug("not ret inst");
logger.debug(edge);
}else {
logger.debug(x);
}
}
break;
case InstructionType_retInst:
logger.debug("expect ret instruction or it's variants(rep ret): "+ lastInst);
break;
}
});
logger.debug(edges);
return edges;
};
ControlFlowGraph.prototype.generateCfgStructure = function (asmArr){
var code = this.rules.filterData(asmArr);
var funcs = this.splitToFunctions(code, this.rules.isFunctionEnd);
if (!funcs.length) {
return funcs;
}
var result = {};
_.each(funcs, _.bind(function(rng){
var basicBlocks = this.splitToBasicBlocks(code, rng, this.rules.isBasicBlockEnd,
this.rules.isJmpInstruction);
var arrOfCanonicalBasicBlock = [];
_.each(basicBlocks, _.bind(function(elm){
var tmp = this.splitToCanonicalBasicBlock(elm);
arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp);
}, this));
result[code[rng.start].text] = {
nodes:this.makeNodes(code, arrOfCanonicalBasicBlock),
edges: this.makeEdges(code, arrOfCanonicalBasicBlock,
this.rules.getInstructionType,
this.rules.extractNodeName)
};
}, this));
return result;
};
module.exports.ControlFlowGraph = ControlFlowGraph;