blob: 96666eb6afb2c534927cdb16b35a2bb990929235 [file] [log] [blame] [raw]
// Copyright (c) 2012-2017, Najjar Chedy
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
var _ = require('underscore-node'),
logger = require('./logger').logger;
var InstructionType_jmp = 0;
var InstructionType_conditionalJmpInst = 1;
var InstructionType_notRetInst = 2;
var InstructionType_retInst = 3;
function ControlFlowGraph(rules){
this.rules = rules;
}
function separateCodeFromData(asmArr) {
var isCode = function(asmArr, index){
return (((asmArr[index].source !== null) &&
(asmArr[index+1].source !== null)) ||
((asmArr[index].source === null) &&
(asmArr[index+1].source !== null)) ||
((asmArr[index].source !== null) &&
(asmArr[index+1].source === null)));
};
var code = [];
var first = 0;
var last = asmArr.length;
if(first == last) return code;
while(first != last-1){
if(isCode(asmArr, first))
code.push(JSON.parse(JSON.stringify(asmArr[first])));
++first;
}
if(asmArr[first].source){
code.push(JSON.parse(JSON.stringify(asmArr[first])));
}
return code;
}
var gccX86 = {
filterData: separateCodeFromData,
isFunctionEnd:function(x) { return ((x[0] != ' ') && (x[0] != '.') &&
(x.indexOf(':') != -1)) ;},
isBasicBlockEnd: function(x) { return x[0] == ".";} ,
getInstructionType:function(inst) {
inst = inst.trim();
if(inst.includes("jmp")) return InstructionType_jmp;
else if(inst[0] === 'j') return InstructionType_conditionalJmpInst;
else if(!inst.includes("ret")) return InstructionType_notRetInst;
else return InstructionType_retInst;
},
extractNodeName:function(inst) {
var name = inst.match(/.L\d+/);
return name + ":";
},
isJmpInstruction: function(x) { var trimed = x.trim(); return ( trimed[0] == 'j');}
};
ControlFlowGraph.prototype.splitToFunctions = function (asmArr, isEnd) {
if(asmArr.length === 0) return [];
var result = [];
var first = 0;
var last = asmArr.length;
var fnRange = {start:first, end:null};
++first;
while(first != last) {
if(isEnd(asmArr[first].text)) {
fnRange.end = first;
result.push(JSON.parse(JSON.stringify(fnRange)));
fnRange.start = first;
}
++first;
}
fnRange.end = last;
result.push(fnRange);//possiblitiy of bug?! because didn't clone(JSON.parse(JSON.stringify(...))
return result;
};
ControlFlowGraph.prototype.splitToBasicBlocks = function (asmArr, range, isEnd, isJmp) {
var first = range.start;
var last = range.end;
if(first == last) return [];
++first;
var rangeBb = {nameId:"start" , start:first , end:null, actionPos:[]};
var result = [];
var resetRangeWith = function(rangeBb, nameId, start) {
rangeBb.nameId = nameId;//.L1:
rangeBb.start = start;//after .L1:
rangeBb.actionPos = [];
};
while(first != last) {
var inst = asmArr[first].text;
if(isEnd(inst)) {
rangeBb.end = first;
result.push(JSON.parse(JSON.stringify(rangeBb)));
++first;//risk of a bug.
//inst is expected to be .L*: where * in 1,2,...
resetRangeWith(rangeBb, inst, first);
}
else if(isJmp(inst)) {
rangeBb.actionPos.push(first);
}
++first;
}
rangeBb.end = last;
result.push(JSON.parse(JSON.stringify(rangeBb)));
return result;
};
ControlFlowGraph.prototype.splitToCanonicalBasicBlock = function (basicBlock) {
var actionPos = basicBlock.actionPos;
var actPosSz = actionPos.length;
if (actionPos[actPosSz-1] + 1 == basicBlock.end){
--actPosSz;
}
if(actPosSz === 0)
return [{
nameId: basicBlock.nameId,
start: basicBlock.start,
end: basicBlock.end
}];
else if(actPosSz == 1)
return [
{nameId: basicBlock.nameId,start: basicBlock.start,end:actionPos[0]+1},
{nameId: basicBlock.nameId+"@"+ (actionPos[0]+1),start: actionPos[0]+1,end: basicBlock.end}
];
else {
var first = 0;
var last = actPosSz;
var block_name = basicBlock.nameId;
var tmp = {nameId:block_name, start:basicBlock.start, end:actionPos[first]+1 };
var result = [];
result.push(JSON.parse(JSON.stringify(tmp)));
while(first != last-1) {
tmp.nameId = block_name + "@" + (actionPos[first]+1);
tmp.start = actionPos[first]+1;
++first;
tmp.end = actionPos[first]+1;
result.push(JSON.parse(JSON.stringify(tmp)));
}
tmp = {nameId:block_name + "@" + (actionPos[first] + 1), start:actionPos[first] + 1, end:basicBlock.end };
result.push(JSON.parse(JSON.stringify(tmp)));
return result;
}
};
/*
ControlFlowGraph.prototype.concatInstructions = function (asmArr, first, last) {
var makeShorter = function(instruction){
return instruction.substr(0, 50);
};
if(first == last) return "";//if last -1 is changed to last this line is no longuer needed
var result = "";
//TODO: include \n for long templated instructions
while(first != last-1) {//added to delete last \n and handle the last concat outside loop
logger.debug(asmArr[first].text);
result += makeShorter(asmArr[first].text) + "\n";
++first;
}
//last concat withou \n
result += makeShorter(asmArr[first].text);
logger.debug(asmArr[first].text);
return result;
};
*/
ControlFlowGraph.prototype.concatInstructions = function (asmArr, first, last) {
return _.chain(asmArr.slice(first, last))
.map(function (x) { return x.text.substr(0, 50); })
.value()
.join("\n");
};
ControlFlowGraph.prototype.makeNodes = function (asmArr, arrOfCanonicalBasicBlock) {
var node = {};
var nodes = [];
_.each(arrOfCanonicalBasicBlock, _.bind(function(x){
logger.debug("node name:");
logger.debug(x.nameId);
node.id = x.nameId;
node.label = x.nameId + ((x.nameId.indexOf(":") !== -1)? "":":") +"\n"+
this.concatInstructions(asmArr,x.start, x.end);
node.color = "#99ccff";//"#FFAFAF";
node.shape = 'box';
nodes.push(JSON.parse(JSON.stringify(node)));
}, this));
return nodes;
};
ControlFlowGraph.prototype.makeEdges = function (asmArr, arrOfCanonicalBasicBlock, instructionType, extractNodeNameFromInstruction) {
var edge = {};
var edges = [];
var setEdge = function(edge, sourceNode, targetNode, color) {
edge.from = sourceNode;
edge.to = targetNode;
edge.arrows = "to";
edge.color = color;
};
var hasName = function(asmArr, cbb){
return gccX86.isBasicBlockEnd(asmArr[cbb.end].text);
};
var lastInstruction = function(asmArr, cbb){
while(asmArr[cbb.end-1].text === ""){
--cbb.end;
}
return asmArr[cbb.end-1].text;
};
var generateName = function(name, suffix){
var pos = name.indexOf("@");
if(pos == -1)
return name + "@" + suffix;
return name.substring(0, pos+1)+ suffix;
};
// note: x.end-1 possible value: jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow
//for(var x of arrOfCanonicalBasicBlock) {
_.each(arrOfCanonicalBasicBlock, function(x){
var targetNode;
var lastInst = lastInstruction(asmArr, x);//asmArr[x.end-1].text;
switch(instructionType(lastInst)) {
case InstructionType_jmp:
{
//we have to deal only with jmp destination, jmp instruction are always taken.
//edge from jump inst
logger.debug("jmp");
targetNode = extractNodeNameFromInstruction(lastInst);
setEdge(edge, x.nameId, targetNode, 'blue');
edges.push(JSON.parse(JSON.stringify(edge)));
logger.debug(edge);
}
break;
case InstructionType_conditionalJmpInst:
{
logger.debug("condit jmp");
//deal with : branche taken, branch not taken
targetNode = extractNodeNameFromInstruction(lastInst);
setEdge(edge, x.nameId, targetNode, 'green');
edges.push(JSON.parse(JSON.stringify(edge)));
logger.debug(edge);
targetNode = hasName(asmArr,x)? asmArr[x.end].text: generateName(x.nameId, x.end);
setEdge(edge, x.nameId, targetNode, "red");
edges.push(JSON.parse(JSON.stringify(edge)));
logger.debug(edge);
}
break;
case InstructionType_notRetInst:
{
//precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block)
//note : asmArr[x.end] expected to be .L*:(name of a basic block)
// this .L*: has to be exactly after the last instruction in the current canocial basic block
var nextNodeName = asmArr[x.end].text;
setEdge(edge, x.nameId, nextNodeName, 'grey');
edges.push(JSON.parse(JSON.stringify(edge)));
logger.debug("not ret inst");
logger.debug(edge);
}
break;
default:
logger.debug("expect ret instruction or it's variants(rep ret): "+ lastInst);
break;
}
});
logger.debug(edges);
return edges;
};
ControlFlowGraph.prototype.generateCfgStructure = function (asmArr){
var rules = gccX86;
var self = this;
var code = rules.filterData(asmArr);
var funcs = self.splitToFunctions(code, rules.isFunctionEnd);
if (!funcs.length) {
return funcs;
}
var result = {};
//for (var rng of funcs) {
_.each(funcs, _.bind(function(rng){
var basicBlocks = self.splitToBasicBlocks(code, rng, rules.isBasicBlockEnd,
rules.isJmpInstruction);
var arrOfCanonicalBasicBlock = [];
//for (var elm of basicBlocks) {
_.each(basicBlocks, function(elm){
var tmp = self.splitToCanonicalBasicBlock(elm);
arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp);
});
result[code[rng.start].text] = {
nodes:this.makeNodes(code, arrOfCanonicalBasicBlock),
edges: this.makeEdges(code, arrOfCanonicalBasicBlock,
rules.getInstructionType,
rules.extractNodeName)
};
}, this));
return result;
};
module.exports.ControlFlowGraph = ControlFlowGraph;