|  | // Copyright (c) 2017, Najjar Chedy | 
|  | // All rights reserved. | 
|  | // | 
|  | // Redistribution and use in source and binary forms, with or without | 
|  | // modification, are permitted provided that the following conditions are met: | 
|  | // | 
|  | //     * Redistributions of source code must retain the above copyright notice, | 
|  | //       this list of conditions and the following disclaimer. | 
|  | //     * Redistributions in binary form must reproduce the above copyright | 
|  | //       notice, this list of conditions and the following disclaimer in the | 
|  | //       documentation and/or other materials provided with the distribution. | 
|  | // | 
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|  | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|  | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | 
|  | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|  | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|  | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|  | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|  | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|  | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|  | // POSSIBILITY OF SUCH DAMAGE. | 
|  |  | 
|  | import _ from 'underscore'; | 
|  |  | 
|  | import { logger } from './logger'; | 
|  | import * as utils from './utils'; | 
|  |  | 
|  | const InstructionType_jmp = 0; | 
|  | const InstructionType_conditionalJmpInst = 1; | 
|  | const InstructionType_notRetInst = 2; | 
|  | const InstructionType_retInst = 3; | 
|  |  | 
|  | // deal with typeinfo name for, typeinfo for, vtable for | 
|  | const isFunctionName = x => x.text.trim().indexOf('.') !== 0; | 
|  |  | 
|  | function getAsmDirective(txt) { | 
|  | const pattern = /^\s*(\.[^ L]\S*)/; | 
|  | const match = txt.match(pattern); | 
|  | if (match !== null) { | 
|  | return match[1]; | 
|  | } | 
|  | return null; | 
|  | } | 
|  |  | 
|  | function filterTextSection(data) { | 
|  | let useCurrentSection = true; | 
|  | const result = []; | 
|  | for (let i in data) { | 
|  | const x = data[i]; | 
|  | const directive = getAsmDirective(x.text); | 
|  | if (directive != null) { | 
|  | if (directive === '.text' || directive === '.data') { | 
|  | useCurrentSection = directive === '.text'; | 
|  | } else if (directive === '.section') { | 
|  | // Only patttern match for now. | 
|  | // Extracting section name would require adjusting demangling code | 
|  | // as demangled name could contain various symbols including ','. | 
|  | useCurrentSection = /\.section\s*"?\.text/.test(x.text); | 
|  | } else if (useCurrentSection) { | 
|  | result.push(x); | 
|  | } | 
|  | } else if (useCurrentSection) { | 
|  | result.push(x); | 
|  | } | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | const gccX86 = { | 
|  | filterData: asmArr => { | 
|  | const jmpLabelRegex = /\.L\d+:/; | 
|  | const isCode = x => x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || isFunctionName(x)); | 
|  | return _.chain(filterTextSection(asmArr)) | 
|  | .map(_.clone) | 
|  | .filter(isCode) | 
|  | .value(); | 
|  | }, | 
|  | isFunctionEnd: x => x[0] !== ' ' && x[0] !== '.' && x.includes(':'), | 
|  |  | 
|  | isBasicBlockEnd: (inst, prevInst) => inst[0] === '.' || prevInst.includes(' ret'), | 
|  |  | 
|  | getInstructionType: inst => { | 
|  | if (inst.includes('jmp')) return InstructionType_jmp; | 
|  | else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; | 
|  | else if (!inst.includes(' ret')) return InstructionType_notRetInst; | 
|  | else return InstructionType_retInst; | 
|  | }, | 
|  |  | 
|  | extractNodeName: inst => inst.match(/\.L\d+/) + ':', | 
|  |  | 
|  | isJmpInstruction: x => x.trim()[0] === 'j', | 
|  | }; | 
|  |  | 
|  | const clangX86 = { | 
|  | filterData: asmArr => { | 
|  | const jmpLabelRegex = /\.LBB\d+_\d+:/; | 
|  | const isCode = x => x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || isFunctionName(x)); | 
|  |  | 
|  | const removeComments = x => { | 
|  | const pos = x.text.indexOf('#'); | 
|  | if (pos !== -1) | 
|  | x.text = utils.trimRight(x.text.substring(0, pos)); | 
|  | return x; | 
|  | }; | 
|  |  | 
|  | return _.chain(filterTextSection(asmArr)) | 
|  | .map(_.clone) | 
|  | .filter(isCode) | 
|  | .map(removeComments) | 
|  | .value(); | 
|  | }, | 
|  | isFunctionEnd: x => x[0] !== ' ' && x[0] !== '.' && x.includes(':'), | 
|  |  | 
|  | isBasicBlockEnd: (inst, prevInst) => inst[0] === '.' || prevInst.includes(' ret'), | 
|  |  | 
|  | getInstructionType: function (inst) { | 
|  | if (inst.includes('jmp')) return InstructionType_jmp; | 
|  | else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; | 
|  | else if (!inst.includes(' ret')) return InstructionType_notRetInst; | 
|  | else return InstructionType_retInst; | 
|  | }, | 
|  |  | 
|  | extractNodeName: inst => inst.match(/\.LBB\d+_\d+/) + ':', | 
|  |  | 
|  | isJmpInstruction: x => x.trim()[0] === 'j', | 
|  | }; | 
|  |  | 
|  | function splitToFunctions(asmArr, isEnd) { | 
|  | if (asmArr.length === 0) return []; | 
|  | const result = []; | 
|  | let first = 1; | 
|  | const last = asmArr.length; | 
|  | const fnRange = {start: 0, end: null}; | 
|  | while (first !== last) { | 
|  | if (isEnd(asmArr[first].text)) { | 
|  | fnRange.end = first; | 
|  | result.push(_.clone(fnRange)); | 
|  | fnRange.start = first; | 
|  | } | 
|  | ++first; | 
|  | } | 
|  |  | 
|  | fnRange.end = last; | 
|  | result.push(_.clone(fnRange)); | 
|  | return result; | 
|  | } | 
|  |  | 
|  | function splitToBasicBlocks(asmArr, range, isEnd, isJmp) { | 
|  | let first = range.start; | 
|  | const last = range.end; | 
|  | if (first === last) return []; | 
|  | const functionName = asmArr[first].text; | 
|  | ++first; | 
|  |  | 
|  | let rangeBb = {nameId: functionName.substr(0, 50), start: first, end: null, actionPos: []}; | 
|  | const result = []; | 
|  |  | 
|  | const newRangeWith = function (oldRange, nameId, start) { | 
|  | return {nameId: nameId, start: start, actionPos: [], end: oldRange.end}; | 
|  | }; | 
|  |  | 
|  | while (first < last) { | 
|  | const inst = asmArr[first].text; | 
|  | if (isEnd(inst, asmArr[first - 1] ? asmArr[first - 1].text : '')) { | 
|  | rangeBb.end = first; | 
|  | result.push(_.clone(rangeBb)); | 
|  | //inst is expected to be .L*: where * in 1,2,... | 
|  | rangeBb = newRangeWith(rangeBb, inst, first + 1); | 
|  | } else if (isJmp(inst)) { | 
|  | rangeBb.actionPos.push(first); | 
|  | } | 
|  | ++first; | 
|  | } | 
|  |  | 
|  | rangeBb.end = last; | 
|  | result.push(_.clone(rangeBb)); | 
|  | return result; | 
|  | } | 
|  |  | 
|  | function splitToCanonicalBasicBlock(basicBlock) { | 
|  | const actionPos = basicBlock.actionPos; | 
|  | let actPosSz = actionPos.length; | 
|  | if (actionPos[actPosSz - 1] + 1 === basicBlock.end) { | 
|  | --actPosSz; | 
|  | } | 
|  |  | 
|  | if (actPosSz === 0) | 
|  | return [{ | 
|  | nameId: basicBlock.nameId, | 
|  | start: basicBlock.start, | 
|  | end: basicBlock.end, | 
|  | }]; | 
|  | else if (actPosSz === 1) | 
|  | return [ | 
|  | {nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1}, | 
|  | {nameId: basicBlock.nameId + '@' + (actionPos[0] + 1), start: actionPos[0] + 1, end: basicBlock.end}, | 
|  | ]; | 
|  | else { | 
|  | let first = 0; | 
|  | const last = actPosSz; | 
|  | const blockName = basicBlock.nameId; | 
|  | let tmp = {nameId: blockName, start: basicBlock.start, end: actionPos[first] + 1}; | 
|  | const result = []; | 
|  | result.push(_.clone(tmp)); | 
|  | while (first !== last - 1) { | 
|  | tmp.nameId = blockName + '@' + (actionPos[first] + 1); | 
|  | tmp.start = actionPos[first] + 1; | 
|  | ++first; | 
|  | tmp.end = actionPos[first] + 1; | 
|  | result.push(_.clone(tmp)); | 
|  | } | 
|  |  | 
|  | tmp = {nameId: blockName + '@' + (actionPos[first] + 1), start: actionPos[first] + 1, end: basicBlock.end}; | 
|  | result.push(_.clone(tmp)); | 
|  |  | 
|  | return result; | 
|  |  | 
|  | } | 
|  | } | 
|  |  | 
|  | function concatInstructions(asmArr, first, last) { | 
|  | return _.chain(asmArr.slice(first, last)) | 
|  | .map(x => x.text.substr(0, 50)) | 
|  | .value() | 
|  | .join('\n'); | 
|  | } | 
|  |  | 
|  | function makeNodes(asms, arrOfCanonicalBasicBlock) { | 
|  | return _.map(arrOfCanonicalBasicBlock, e => { | 
|  | return { | 
|  | id: e.nameId, | 
|  | label: `${e.nameId}${e.nameId.includes(':') ? '' : ':'}\n${concatInstructions(asms, e.start, e.end)}`, | 
|  | color: '#99ccff', | 
|  | shape: 'box', | 
|  | }; | 
|  | }); | 
|  | } | 
|  |  | 
|  | function makeEdges(asmArr, arrOfCanonicalBasicBlock, rules) { | 
|  | const edge = {}; | 
|  | const edges = []; | 
|  |  | 
|  | const setEdge = function (edge, sourceNode, targetNode, color) { | 
|  | edge.from = sourceNode; | 
|  | edge.to = targetNode; | 
|  | edge.arrows = 'to'; | 
|  | edge.color = color; | 
|  | }; | 
|  | const isBasicBlockEnd = rules.isBasicBlockEnd; | 
|  |  | 
|  | const hasName = function (asmArr, cbb) { | 
|  | const asm = asmArr[cbb.end]; | 
|  | return asm ? isBasicBlockEnd(asm.text, '') : false; | 
|  | }; | 
|  |  | 
|  | const generateName = function (name, suffix) { | 
|  | const pos = name.indexOf('@'); | 
|  | if (pos === -1) | 
|  | return name + '@' + suffix; | 
|  |  | 
|  | return name.substring(0, pos + 1) + suffix; | 
|  | }; | 
|  | /* note: x.end-1 possible values: | 
|  | jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow | 
|  | */ | 
|  |  | 
|  | _.each(arrOfCanonicalBasicBlock, function (x) { | 
|  | let targetNode; | 
|  | const lastInst = asmArr[x.end - 1].text; | 
|  | switch (rules.getInstructionType(lastInst)) { | 
|  | case InstructionType_jmp: { | 
|  | //we have to deal only with jmp destination, jmp instruction are always taken. | 
|  | //edge from jump inst | 
|  | targetNode = rules.extractNodeName(lastInst); | 
|  | setEdge(edge, x.nameId, targetNode, 'blue'); | 
|  | edges.push(_.clone(edge)); | 
|  | break; | 
|  | } | 
|  | case InstructionType_conditionalJmpInst: { | 
|  | //deal with : branch taken, branch not taken | 
|  | targetNode = rules.extractNodeName(lastInst); | 
|  | setEdge(edge, x.nameId, targetNode, 'green'); | 
|  | edges.push(_.clone(edge)); | 
|  | targetNode = hasName(asmArr, x) ? asmArr[x.end].text : generateName(x.nameId, x.end); | 
|  | setEdge(edge, x.nameId, targetNode, 'red'); | 
|  | edges.push(_.clone(edge)); | 
|  | logger.debug(edge); | 
|  | break; | 
|  | } | 
|  | case InstructionType_notRetInst: { | 
|  | //precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block) | 
|  | //note : asmArr[x.end] expected to be .L*:(name of a basic block) | 
|  | //       this .L*: has to be exactly after the last instruction in the current canonical basic block | 
|  | if (asmArr[x.end]) { | 
|  | targetNode = asmArr[x.end].text; | 
|  | setEdge(edge, x.nameId, targetNode, 'grey'); | 
|  | edges.push(_.clone(edge)); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case InstructionType_retInst: | 
|  | break; | 
|  | } | 
|  | }); | 
|  | logger.debug(edges); | 
|  | return edges; | 
|  | } | 
|  |  | 
|  | function isLLVMBased(compilerType, version) { | 
|  | return version.includes('clang') || | 
|  | version.includes('LLVM') || | 
|  | version.includes('rustc') || | 
|  | compilerType === 'swift' || | 
|  | compilerType === 'zig' || | 
|  | compilerType === 'ispc'; | 
|  | } | 
|  |  | 
|  | export function generateStructure(compilerType, version, asmArr) { | 
|  | const rules = isLLVMBased(compilerType, version) ? clangX86 : gccX86; | 
|  | const code = rules.filterData(asmArr); | 
|  | const funcs = splitToFunctions(code, rules.isFunctionEnd); | 
|  | if (funcs.length === 0) { | 
|  | return funcs; | 
|  | } | 
|  | const result = {}; | 
|  | _.each(funcs, _.bind(function (rng) { | 
|  | const basicBlocks = splitToBasicBlocks(code, rng, rules.isBasicBlockEnd, rules.isJmpInstruction); | 
|  | let arrOfCanonicalBasicBlock = []; | 
|  | _.each(basicBlocks, _.bind(function (elm) { | 
|  | const tmp = splitToCanonicalBasicBlock(elm); | 
|  | arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp); | 
|  | }, this)); | 
|  |  | 
|  | result[code[rng.start].text] = { | 
|  | nodes: makeNodes(code, arrOfCanonicalBasicBlock), | 
|  | edges: makeEdges(code, arrOfCanonicalBasicBlock, rules), | 
|  | }; | 
|  | }, this)); | 
|  |  | 
|  | return result; | 
|  | } |