blob: 6db2f7a16a4d27499bffe2ca8084d0983db4d1c4 [file] [log] [blame] [raw]
// Copyright (c) 2012-2017, Najjar Chedy
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
function ControlFlowGraph(rules){
this.rules = rules;
}
function seperateCodeFromData(asmArr) {
var isCode = function(asmArr, index){
return (((asmArr[index].source != null) &&
(asmArr[index+1].source != null)) ||
((asmArr[index].source == null) &&
(asmArr[index+1].source != null)) ||
((asmArr[index].source != null) &&
(asmArr[index+1].source == null)));
}
var code = [];
var first = 0;
var last = asmArr.length;
if(first == last) return code;
while(first != last-1){
if(isCode(asmArr, first))
code.push(JSON.parse(JSON.stringify(asmArr[first])));
++first;
}
if(asmArr[first].source){
code.push(JSON.parse(JSON.stringify(asmArr[first])));
}
return code;
}
var gccX86 = {
filterData: seperateCodeFromData,
isFunctionEnd:function(x) { return ((x[0] != ' ') && (x[0] != '.')
&& (x.indexOf(':') != -1)) ;},
isBasicBlockEnd: function(x) { return x[0] == ".";} ,
getInstructionType:function(inst) {
inst = inst.trim();
if (inst === "") return -1;
else if(inst.includes("jmp")) return 0;
else if(inst[0] === 'j') return 1;
else if(!inst.includes("ret")) return 2;
else return 3;
},
extractNodeName:function(inst) {
var name = inst.match(/.L\d+/);
return name + ":";
},
isJmpInstruction: function(x) { var trimed = x.trim(); return ( trimed[0] == 'j');}
};
ControlFlowGraph.prototype.splitToFunctions = function (asmArr, isEnd) {
if(asmArr.length == 0) return [];
var result = [];
var first = 0;
var last = asmArr.length;
var fnRange = {start:first, end:null};
++first;
while(first != last) {
if(isEnd(asmArr[first].text)) {
fnRange["end"] = first;
result.push(JSON.parse(JSON.stringify(fnRange)));
fnRange["start"] = first;
}
++first;
}
fnRange["end"] = last;
result.push(fnRange);//possiblitiy of bug?! because didn't clone(JSON.parse(JSON.stringify(...))
return result;
};
ControlFlowGraph.prototype.splitToBasicBlocks = function (asmArr, range, isEnd, isJmp) {
var first = range["start"];
var last = range["end"];
if(first == last) return [];
++first;
var rangeBb = {nameId:"start" , start:first , end:null, actionPos:[]};
var result = [];
var resetRangeWith = function(rangeBb, nameId, start) {
rangeBb["nameId"] = nameId;//.L1:
rangeBb["start"] = start;//after .L1:
rangeBb["actionPos"] = [];
}
while(first != last) {
var inst = asmArr[first].text;
if(isEnd(inst)) {
rangeBb["end"] = first;
result.push(JSON.parse(JSON.stringify(rangeBb)));
++first;//risk of a bug.
//inst is expected to be .L*: where * in 1,2,...
resetRangeWith(rangeBb, inst, first);
}
else if(isJmp(inst)) {
rangeBb["actionPos"].push(first);
}
++first
}
rangeBb["end"] = last;
result.push(JSON.parse(JSON.stringify(rangeBb)));
return result;
};
ControlFlowGraph.prototype.splitToCanonicalBasicBlock = function (basic_block) {
var actionPos = basic_block["actionPos"];
var actPosSz = actionPos.length;
if (actionPos[actPosSz-1] + 1 == basic_block.end){
--actPosSz;
}
if(actPosSz == 0)
return [{
nameId: basic_block.nameId,
start: basic_block.start,
end: basic_block.end
}];
else if(actPosSz == 1)
return [
{nameId: basic_block.nameId,start: basic_block.start,end:actionPos[0]+1},
{nameId: basic_block.nameId+"@"+ (actionPos[0]+1),start: actionPos[0]+1,end: basic_block.end}
];
else {
var first = 0;
var last = actPosSz;
var block_name = basic_block.nameId;
var tmp = {nameId:block_name, start:basic_block.start, end:actionPos[first]+1 };
var result = [];
result.push(JSON.parse(JSON.stringify(tmp)));
while(first != last-1) {
tmp["nameId"] = block_name + "@" + (actionPos[first]+1);
tmp["start"] = actionPos[first]+1;
++first;
tmp["end"] = actionPos[first]+1;
result.push(JSON.parse(JSON.stringify(tmp)));
}
tmp = {nameId:block_name + "@" + (actionPos[first] + 1), start:actionPos[first] + 1, end:basic_block.end };
result.push(JSON.parse(JSON.stringify(tmp)));
return result;
}
};
ControlFlowGraph.prototype.concatInstructions = function (asmArr, first, last) {
if(first == last) return "";//if last -1 is changed to last this line is no longuer needed
console.log("look");
var result = "";
while(first != last-1) {//added to delete last \n and handle the last concat outside loop
console.log(asmArr[first].text);
result += asmArr[first].text + "\n";
++first;
}
//last concat withou \n
result += asmArr[first].text;
console.log(asmArr[first].text);
return result;
};
ControlFlowGraph.prototype.makeNodes = function (asmArr, arrOfCanonicalBasicBlock) {
var node = {};
var nodes = [];
for(let x of arrOfCanonicalBasicBlock) {
console.log("node name:")
console.log(x.nameId);
node["id"] = x.nameId;
node["label"] = x.nameId + ((x.nameId.indexOf(":") != -1)? "":":") +"\n"
+ this.concatInstructions(asmArr,x.start, x.end);
node["color"] = "#99ccff";//"#FFAFAF";
node["shape"] = 'box';
nodes.push(JSON.parse(JSON.stringify(node)));
}
return nodes;
};
ControlFlowGraph.prototype.makeEdges = function (asmArr, arrOfCanonicalBasicBlock, instructionType, extractNodeNameFromInstruction) {
const jmpInst = 0;
const conditionalJmpInst = 1;
const notRetInst = 2;
var edge = {};
var edges = [];
var setEdge = function(edge, sourceNode, targetNode, color) {
edge["from"] = sourceNode;
edge["to"] = targetNode;
edge["arrows"] = "to";
edge["color"] = color;
}
var hasName = function(asmArr, cbb){
return gccX86.isBasicBlockEnd(asmArr[cbb.end].text);
}
var lastInstruction = function(asmArr, cbb){
while(asmArr[cbb.end-1].text === ""){
--cbb.end;
}
return asmArr[cbb.end-1].text;
}
var generateName = function(name, suffix){
var pos = name.indexOf("@");
if(pos == -1)
return name + "@" + suffix;
return name.substring(0, pos+1)+ suffix;
}
// note: x.end-1 possible value: jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow
for(let x of arrOfCanonicalBasicBlock) {
var lastInst = lastInstruction(asmArr, x);//asmArr[x.end-1].text;
switch(instructionType(lastInst)) {
case jmpInst:
{
//we have to deal only with jmp destination, jmp instruction are always taken.
//edge from jump inst
console.log("jmp");
var targetNode = extractNodeNameFromInstruction(lastInst);
setEdge(edge, x.nameId, targetNode, 'blue');
edges.push(JSON.parse(JSON.stringify(edge)));
console.log(edge);
}
break;
case conditionalJmpInst:
{
console.log("condit jmp");
//deal with : branche taken, branch not taken
targetNode = extractNodeNameFromInstruction(lastInst);
setEdge(edge, x.nameId, targetNode, 'green');
edges.push(JSON.parse(JSON.stringify(edge)));
console.log(edge);
targetNode = hasName(asmArr,x)? asmArr[x.end].text: generateName(x.nameId, x.end);
setEdge(edge, x.nameId, targetNode, "red");
edges.push(JSON.parse(JSON.stringify(edge)));
console.log(edge);
}
break;
case notRetInst:
{
//precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block)
//note : asmArr[x.end] expected to be .L*:(name of a basic block)
// this .L*: has to be exactly after the last instruction in the current canocial basic block
var nextNodeName = asmArr[x.end].text;
setEdge(edge, x.nameId, nextNodeName, 'grey');
edges.push(JSON.parse(JSON.stringify(edge)));
console.log("not ret inst");
console.log(edge);
}
break;
default:
console.log("expect ret instruction or it's variants(rep ret): "+ lastInst);
break;
}
}
console.log(edges);
return edges;
};
ControlFlowGraph.prototype.generateCFGStructure = function (asmArr){
var rules = gccX86;
var self = this;
var code = rules.filterData(asmArr);
var funcs = self.splitToFunctions(code, rules.isFunctionEnd);
if (!funcs.length) {
return funcs;
}
var functionsNodes = [];
var functionsEdges = [];
for (let rng of funcs) {
var basic_blocks = self.splitToBasicBlocks(code, rng, rules.isBasicBlockEnd,
rules.isJmpInstruction);
var arrOfCanonicalBasicBlock = [];
for (let elm of basic_blocks) {
var tmp = self.splitToCanonicalBasicBlock(elm);
arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp);
}
functionsNodes.push({name:code[rng.start].text, nodes:self.makeNodes(code, arrOfCanonicalBasicBlock)});
functionsEdges.push({name:code[rng.start].text, edges:self.makeEdges(code, arrOfCanonicalBasicBlock,
rules.getInstructionType,
rules.extractNodeName)});
}
return {n: functionsNodes, e:functionsEdges };
};
module.exports.ControlFlowGraph = ControlFlowGraph;