blob: f5d3b454fe5e6720b4964e0570eac4d8a9975b0f [file] [log] [blame] [raw]
// Copyright (c) 2012-2016, Matt Godbolt
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
(function () {
var _ = require('underscore-node');
var tabsRe = /\t/g;
function expandTabs(line) {
var extraChars = 0;
return line.replace(tabsRe, function (match, offset) {
var total = offset + extraChars;
var spacesNeeded = (total + 8) & 7;
extraChars += spacesNeeded - 1;
return " ".substr(spacesNeeded);
});
}
var labelFind = /[.a-zA-Z_][a-zA-Z0-9$_.]*/g;
var dataDefn = /\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)/;
var fileFind = /^\s*\.file\s+(\d+)\s+"([^"]+)".*/;
var hasOpcode = /^\s*([a-zA-Z$_][a-zA-Z0-9$_.]*:\s*)?[a-zA-Z].*/;
var labelDef = /^([.a-zA-Z_][a-zA-Z0-9$_.]+):/;
function findUsedLabels(asmLines, filterDirectives) {
var labelsUsed = {};
var weakUsages = {};
var currentLabel = "";
// Scan through looking for definite label usages (ones used by opcodes),
// and ones that are weakly used: that is, their use is conditional on another label.
// For example:
// .foo: .string "moo"
// .baz: .quad .foo
// mov eax, .baz
// In this case, the '.baz' is used by an opcode, and so is strongly used.
// The '.foo' is weakly used by .baz.
asmLines.forEach(function (line) {
var match = line.match(labelDef);
if (match)
currentLabel = match[1];
if (!line || line[0] === '.') return;
match = line.match(labelFind);
if (!match) return;
if (!filterDirectives || line.match(hasOpcode)) {
// Only count a label as used if it's used by an opcode, or else we're not filtering directives.
match.forEach(function (label) {
labelsUsed[label] = true;
});
} else if (currentLabel) {
// Note any "weak" usages by this label; that is, usages that are only
// interesting if the currentLabel is used by an opcode.
if (!weakUsages[currentLabel]) weakUsages[currentLabel] = [];
match.forEach(function (label) {
weakUsages[currentLabel].push(label);
});
}
});
// Now follow the chains of used labels, marking any weak references they refer
// to as also used. We iteratively do this until either no new labels are found,
// or we hit a limit (only here to prevent a pathological case from hanging).
function markUsed(label) { labelsUsed[label] = true; }
var MaxLabelIterations = 10;
for (var iter = 0; iter < MaxLabelIterations; ++iter) {
var toAdd = [];
_.each(labelsUsed, function (t, label) { // jshint ignore:line
_.each(weakUsages[label], function (nowused) {
if (labelsUsed[nowused]) return;
toAdd.push(nowused);
});
});
if (!toAdd) break;
_.each(toAdd, markUsed);
}
return labelsUsed;
}
function parseFiles(asmLines) {
var files = {};
asmLines.forEach(function (line) {
var match = line.match(fileFind);
if (match) {
files[parseInt(match[1])] = match[2];
}
});
return files;
}
function processAsm(asm, filters) {
if (asm.match(/^; Listing generated by Microsoft/)) return processClAsm(asm, filters);
if (filters.binary) return processBinaryAsm(asm, filters);
var result = [];
var asmLines = asm.split("\n");
var labelsUsed = findUsedLabels(asmLines, filters.directives);
var files = parseFiles(asmLines);
var prevLabel = "";
var directive = /^\s*\..*$/;
var commentOnly = /^\s*(#|@|\/\/).*/;
var sourceTag = /^\s*\.loc\s+(\d+)\s+(\d+).*/;
var stdInLooking = /.*<stdin>|-/;
var endBlock = /\.(cfi_endproc|data|text|section)/;
var source = null;
asmLines.forEach(function (line) {
var match;
if (line.trim() === "") {
result.push({text: "", source: null});
return;
}
if (!!(match = line.match(sourceTag))) {
source = null;
var file = files[parseInt(match[1])];
if (file && file.match(stdInLooking)) {
source = parseInt(match[2]);
}
}
if (line.match(endBlock)) {
source = null;
prevLabel = null;
}
if (filters.commentOnly && line.match(commentOnly)) return;
match = line.match(labelDef);
if (match) {
// It's a label definition.
if (labelsUsed[match[1]] === undefined) {
// It's an unused label.
if (filters.labels) return;
} else {
// A used label.
prevLabel = match;
}
}
if (!match && filters.directives) {
// Check for directives only if it wasn't a label; the regexp would
// otherwise misinterpret labels as directives.
if (line.match(dataDefn) && prevLabel) {
// We're defining data that's being used somewhere.
} else {
if (line.match(directive)) return;
}
}
var hasOpcodeMatch = line.match(hasOpcode);
line = expandTabs(line);
result.push({text: line, source: hasOpcodeMatch ? source : null});
});
return result;
}
var binaryHideFuncRe = null;
var maxAsmLines = 500;
function initialise(compilerProps) {
var pattern = compilerProps('binaryHideFuncRe');
binaryHideFuncRe = new RegExp(pattern);
maxAsmLines = compilerProps('maxLinesOfAsm', maxAsmLines);
}
function isUserFunction(func) {
return !func.match(binaryHideFuncRe);
}
function processBinaryAsm(asm, filters) {
var result = [];
var asmLines = asm.split("\n");
var asmOpcodeRe = /^\s*([0-9a-f]+):\s*(([0-9a-f][0-9a-f] ?)+)\s*(.*)/;
var lineRe = /^(\/[^:]+):([0-9]+).*/;
var labelRe = /^([0-9a-f]+)\s+<([^>]+)>:$/;
var destRe = /.*\s([0-9a-f]+)\s+<([^>]+)>$/;
var source = null;
var func = null;
// Handle "error" documents.
if (asmLines.length === 1 && asmLines[0][0] === '<') {
return [{text: asmLines[0], source: null}];
}
asmLines.forEach(function (line) {
if (result.length >= maxAsmLines) {
if (result.length == maxAsmLines) {
result.push({text: "[truncated; too many lines]", source: null});
}
return;
}
var match = line.match(lineRe);
if (match) {
source = parseInt(match[2]);
return;
}
match = line.match(labelRe);
if (match) {
func = match[2];
if (isUserFunction(func)) {
result.push({text: func + ":", source: null});
}
return;
}
if (!func || !isUserFunction(func)) return;
match = line.match(asmOpcodeRe);
if (match) {
var address = parseInt(match[1], 16);
var opcodes = match[2].split(" ").filter(function (x) {
return x;
}).map(function (x) {
return parseInt(x, 16);
});
var disassembly = " " + match[4];
var links = null;
var destMatch = line.match(destRe);
if (destMatch) {
links = [{
offset: disassembly.indexOf(destMatch[1]),
length: destMatch[1].length,
to: parseInt(destMatch[1], 16)
}];
}
result.push({opcodes: opcodes, address: address, text: disassembly, source: source, links: links});
} else {
//result.push({text: line, source: null});
}
});
return result;
}
// TODO: dedupe with the above code
// TODO: support weak refs etc
function processClAsm(asm, filters) {
var asmLines = asm.split(/\r?\n/);
var labelsUsed = {};
var prevLabel = "";
// With FAsc we rely on seeing a bunch of opcodes on a line to detect an instruction
var hasOpcode = /^\s*([0-9a-f]+\s+)+[a-zA-Z].*/;
asmLines.forEach(function (line) {
if (line === "" || line[0] === ".") return;
var match = line.match(labelFind);
if (match && (!filters.directives || line.match(hasOpcode))) {
// Only count a label as used if it's used by an opcode, or else we're not filtering directives.
match.forEach(function (label) {
labelsUsed[label] = true;
// console.log("used label:", label);
});
}
});
var directive = /^\s*(\.|([_A-Z]+\b))/;
var labelDefinition = /^([a-zA-Z0-9$_.]+):/; // NB not same as outer labelDef TODO dedupe
var commentOnly = /^\s*([#@;]|\/\/).*/;
var endBlock = /^[^ ]+\s+ENDP/;
var fileFind = /^; File\s+(.*)$/;
var inMain = false;
var sourceTag = /^;\s*([0-9]+)\s*:/;
var gccExplorerDir = /\\gcc-explorer-compiler/; // has to match part of the path in compile.js (ugly)
var ignoreAll = /^\s*include listing\.inc$/;
var source = null;
var result = [];
function demangle(line) {
// Anything identifier-looking with a "@@" in the middle, and a comment at the end
// is treated as a mangled name. The comment will be used to replace the identifier.
var mangledIdentifier = /\?[^ ]+@@[^ ]+/;
var match, comment;
if (!(match = line.match(mangledIdentifier))) return line;
if (!(comment = line.match(/([^;]+);\s*(.*)/))) return line;
return comment[1].trimRight().replace(match[0], comment[2]);
}
function AddrOpcoder() {
var self = this;
this.opcodes = [];
this.offset = null;
var numberRe = /^\s+(([0-9a-f]+\b\s*)([0-9a-f][0-9a-f]\b\s*)*)(.*)/;
var prevOffset = -1;
var prevOpcodes = [];
this.hasOpcodes = function () {
return self.offset !== null;
};
this.onLine = function (line) {
var match = line.match(numberRe);
self.opcodes = [];
self.offset = null;
if (!match) {
prevOffset = -1;
return line;
}
var restOfLine = match[4];
var numbers = match[1].split(/\s+/).filter(function (x) {
return x;
}).map(function (x) {
return parseInt(x, 16);
});
// If restOfLine is empty, we should accumulate offset opcodes...
if (restOfLine === "") {
if (prevOffset < 0) {
// First in a batch of opcodes, so first is the offset
prevOffset = numbers[0];
prevOpcodes = numbers.splice(1);
} else {
prevOpcodes = prevOpcodes.concat(numbers);
}
} else {
if (prevOffset >= 0) {
// we had something from a prior line
self.offset = prevOffset;
self.opcodes = prevOpcodes.concat(numbers);
prevOffset = -1;
} else {
self.offset = numbers[0];
self.opcodes = numbers.splice(1);
}
}
return restOfLine;
};
}
var addrOpcoder = new AddrOpcoder();
function add(obj) {
var lastWasEmpty = result.length === 0 || result[result.length - 1].text === "";
if (obj.text === "" && lastWasEmpty) return;
if (filters.binary && addrOpcoder.hasOpcodes()) {
obj.opcodes = addrOpcoder.opcodes;
obj.address = addrOpcoder.offset;
}
result.push(obj);
}
asmLines.forEach(function (line) {
var match;
if (!!line.match(ignoreAll)) return;
line = addrOpcoder.onLine(line);
if (line.trim() === "") {
add({text: "", source: null});
return;
}
line = demangle(line);
if (!!(match = line.match(fileFind))) {
inMain = !!match[1].match(gccExplorerDir);
return;
}
if (!!(match = line.match(sourceTag))) {
if (inMain)
source = parseInt(match[1]);
return;
}
if (line.match(endBlock)) {
source = null;
prevLabel = null;
}
if (filters.commentOnly && line.match(commentOnly)) return;
match = line.match(labelDefinition);
if (match) {
// It's a label definition.
if (labelsUsed[match[1]] === undefined) {
// It's an unused label.
if (filters.labels) return;
} else {
// A used label.
prevLabel = match;
}
}
if (!match && filters.directives) {
// Check for directives only if it wasn't a label; the regexp would
// otherwise misinterpret labels as directives.
if (line.match(dataDefn) && prevLabel) {
// We're defining data that's being used somewhere.
} else {
if (line.match(directive)) return;
}
}
var hasOpcodes = addrOpcoder.hasOpcodes();
if (hasOpcodes) line = " " + line; // Reintroduce some indentation
line = expandTabs(line);
add({text: line, source: hasOpcodes ? source : null});
});
return result;
}
exports.processAsm = processAsm;
exports.initialise = initialise;
}).call(this);