blob: 9305b6a7e5744f1bd7225cf728f8bff16c92b177 [file] [log] [blame] [raw]
// Copyright (c) 2017, Najjar Chedy
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
const _ = require('underscore'),
logger = require('./logger').logger,
utils = require('./utils');
const InstructionType_jmp = 0;
const InstructionType_conditionalJmpInst = 1;
const InstructionType_notRetInst = 2;
const InstructionType_retInst = 3;
// deal with typeinfo name for, typeinfo for, vtable for
const isFunctionName = x => x.text.trim().indexOf('.') !== 0 && !x.text.includes(' for ');
const gccX86 = {
filterData: asmArr => {
const jmpLabelRegex = /\.L\d+:/;
const isCode = x => x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || isFunctionName(x));
return _.chain(asmArr)
.map(_.clone)
.filter(isCode)
.value();
},
isFunctionEnd: x => x[0] !== ' ' && x[0] !== '.' && x.indexOf(':') !== -1,
isBasicBlockEnd: (inst, prevInst) => inst[0] === '.' || prevInst.includes(' ret'),
getInstructionType: inst => {
if (inst.includes('jmp')) return InstructionType_jmp;
else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst;
else if (!inst.includes(' ret')) return InstructionType_notRetInst;
else return InstructionType_retInst;
},
extractNodeName: inst => inst.match(/\.L\d+/) + ':',
isJmpInstruction: x => x.trim()[0] === 'j'
};
const clangX86 = {
filterData: asmArr => {
const jmpLabelRegex = /\.LBB\d+_\d+:/;
const isCode = x => x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || isFunctionName(x));
const removeComments = x => {
const pos = x.text.indexOf('#');
if (pos !== -1)
x.text = utils.trimRight(x.text.substring(0, pos));
return x;
};
return _.chain(asmArr)
.map(_.clone)
.filter(isCode)
.map(removeComments)
.value();
},
isFunctionEnd: x => x[0] !== ' ' && x[0] !== '.' && x.indexOf(':') !== -1,
isBasicBlockEnd: (inst, prevInst) => inst[0] === '.' || prevInst.includes(' ret'),
getInstructionType: function (inst) {
if (inst.includes('jmp')) return InstructionType_jmp;
else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst;
else if (!inst.includes(' ret')) return InstructionType_notRetInst;
else return InstructionType_retInst;
},
extractNodeName: inst => inst.match(/\.LBB\d+_\d+/) + ':',
isJmpInstruction: x => x.trim()[0] === 'j'
};
function splitToFunctions(asmArr, isEnd) {
if (asmArr.length === 0) return [];
const result = [];
let first = 1;
const last = asmArr.length;
const fnRange = {start: 0, end: null};
while (first !== last) {
if (isEnd(asmArr[first].text)) {
fnRange.end = first;
result.push(_.clone(fnRange));
fnRange.start = first;
}
++first;
}
fnRange.end = last;
result.push(_.clone(fnRange));
return result;
}
function splitToBasicBlocks(asmArr, range, isEnd, isJmp) {
let first = range.start;
const last = range.end;
if (first === last) return [];
const functionName = asmArr[first].text;
++first;
let rangeBb = {nameId: functionName.substr(0, 50), start: first, end: null, actionPos: []};
const result = [];
const newRangeWith = function (oldRange, nameId, start) {
return {nameId: nameId, start: start, actionPos: [], end: oldRange.end};
};
while (first < last) {
const inst = asmArr[first].text;
if (isEnd(inst, asmArr[first - 1] ? asmArr[first - 1].text : '')) {
rangeBb.end = first;
result.push(_.clone(rangeBb));
//inst is expected to be .L*: where * in 1,2,...
rangeBb = newRangeWith(rangeBb, inst, first + 1);
}
else if (isJmp(inst)) {
rangeBb.actionPos.push(first);
}
++first;
}
rangeBb.end = last;
result.push(_.clone(rangeBb));
return result;
}
function splitToCanonicalBasicBlock(basicBlock) {
const actionPos = basicBlock.actionPos;
let actPosSz = actionPos.length;
if (actionPos[actPosSz - 1] + 1 === basicBlock.end) {
--actPosSz;
}
if (actPosSz === 0)
return [{
nameId: basicBlock.nameId,
start: basicBlock.start,
end: basicBlock.end
}];
else if (actPosSz === 1)
return [
{nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1},
{nameId: basicBlock.nameId + '@' + (actionPos[0] + 1), start: actionPos[0] + 1, end: basicBlock.end}
];
else {
let first = 0;
const last = actPosSz;
const blockName = basicBlock.nameId;
let tmp = {nameId: blockName, start: basicBlock.start, end: actionPos[first] + 1};
const result = [];
result.push(_.clone(tmp));
while (first !== last - 1) {
tmp.nameId = blockName + '@' + (actionPos[first] + 1);
tmp.start = actionPos[first] + 1;
++first;
tmp.end = actionPos[first] + 1;
result.push(_.clone(tmp));
}
tmp = {nameId: blockName + '@' + (actionPos[first] + 1), start: actionPos[first] + 1, end: basicBlock.end};
result.push(_.clone(tmp));
return result;
}
}
function concatInstructions(asmArr, first, last) {
return _.chain(asmArr.slice(first, last))
.map(x => x.text.substr(0, 50))
.value()
.join('\n');
}
function makeNodes(asms, arrOfCanonicalBasicBlock) {
return _.map(arrOfCanonicalBasicBlock, e => {
return {
id: e.nameId,
label: `${e.nameId}${e.nameId.indexOf(':') !== -1 ? '' : ':'}\n${concatInstructions(asms, e.start, e.end)}`,
color: '#99ccff',
shape: 'box'
};
});
}
function makeEdges(asmArr, arrOfCanonicalBasicBlock, rules) {
const edge = {};
const edges = [];
const setEdge = function (edge, sourceNode, targetNode, color) {
edge.from = sourceNode;
edge.to = targetNode;
edge.arrows = 'to';
edge.color = color;
};
const isBasicBlockEnd = rules.isBasicBlockEnd;
const hasName = function (asmArr, cbb) {
const asm = asmArr[cbb.end];
return asm ? isBasicBlockEnd(asm.text, '') : false;
};
const generateName = function (name, suffix) {
const pos = name.indexOf('@');
if (pos === -1)
return name + '@' + suffix;
return name.substring(0, pos + 1) + suffix;
};
/* note: x.end-1 possible values:
jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow
*/
_.each(arrOfCanonicalBasicBlock, function (x) {
let targetNode;
const lastInst = asmArr[x.end - 1].text;
switch (rules.getInstructionType(lastInst)) {
case InstructionType_jmp: {
//we have to deal only with jmp destination, jmp instruction are always taken.
//edge from jump inst
targetNode = rules.extractNodeName(lastInst);
setEdge(edge, x.nameId, targetNode, 'blue');
edges.push(_.clone(edge));
break;
}
case InstructionType_conditionalJmpInst: {
//deal with : branch taken, branch not taken
targetNode = rules.extractNodeName(lastInst);
setEdge(edge, x.nameId, targetNode, 'green');
edges.push(_.clone(edge));
targetNode = hasName(asmArr, x) ? asmArr[x.end].text : generateName(x.nameId, x.end);
setEdge(edge, x.nameId, targetNode, 'red');
edges.push(_.clone(edge));
logger.debug(edge);
break;
}
case InstructionType_notRetInst: {
//precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block)
//note : asmArr[x.end] expected to be .L*:(name of a basic block)
// this .L*: has to be exactly after the last instruction in the current canonical basic block
if (asmArr[x.end]) {
targetNode = asmArr[x.end].text;
setEdge(edge, x.nameId, targetNode, 'grey');
edges.push(_.clone(edge));
}
break;
}
case InstructionType_retInst:
break;
}
});
logger.debug(edges);
return edges;
}
function isLLVMBased(compilerType, version) {
return version.includes('clang') ||
version.includes('LLVM') ||
version.includes('rustc') ||
compilerType === 'swift' ||
compilerType === 'zig' ||
compilerType === 'ispc';
}
function generateCfgStructure(compilerType, version, asmArr) {
const rules = isLLVMBased(compilerType, version) ? clangX86 : gccX86;
const code = rules.filterData(asmArr);
const funcs = splitToFunctions(code, rules.isFunctionEnd);
if (!funcs.length) {
return funcs;
}
const result = {};
_.each(funcs, _.bind(function (rng) {
const basicBlocks = splitToBasicBlocks(code, rng, rules.isBasicBlockEnd, rules.isJmpInstruction);
let arrOfCanonicalBasicBlock = [];
_.each(basicBlocks, _.bind(function (elm) {
const tmp = splitToCanonicalBasicBlock(elm);
arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp);
}, this));
result[code[rng.start].text] = {
nodes: makeNodes(code, arrOfCanonicalBasicBlock),
edges: makeEdges(code, arrOfCanonicalBasicBlock, rules)
};
}, this));
return result;
}
module.exports.generateStructure = generateCfgStructure;