| // Copyright (c) 2012-2016, Matt Godbolt |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| |
| (function () { |
| var _ = require('underscore-node'); |
| var asmCl = require('./asm-cl'); |
| var tabsRe = /\t/g; |
| |
| function expandTabs(line) { |
| var extraChars = 0; |
| return line.replace(tabsRe, function (match, offset) { |
| var total = offset + extraChars; |
| var spacesNeeded = (total + 8) & 7; |
| extraChars += spacesNeeded - 1; |
| return " ".substr(spacesNeeded); |
| }); |
| } |
| |
| var labelFind = /[.a-zA-Z_][a-zA-Z0-9$_.]*/g; |
| var dataDefn = /\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)/; |
| var fileFind = /^\s*\.file\s+(\d+)\s+"([^"]+)".*/; |
| var hasOpcode = /^\s*([a-zA-Z$_][a-zA-Z0-9$_.]*:\s*)?[a-zA-Z].*/; |
| var labelDef = /^([.a-zA-Z_][a-zA-Z0-9$_.]+):/; |
| |
| function findUsedLabels(asmLines, filterDirectives) { |
| var labelsUsed = {}; |
| var weakUsages = {}; |
| var currentLabel = ""; |
| |
| // Scan through looking for definite label usages (ones used by opcodes), |
| // and ones that are weakly used: that is, their use is conditional on another label. |
| // For example: |
| // .foo: .string "moo" |
| // .baz: .quad .foo |
| // mov eax, .baz |
| // In this case, the '.baz' is used by an opcode, and so is strongly used. |
| // The '.foo' is weakly used by .baz. |
| asmLines.forEach(function (line) { |
| var match = line.match(labelDef); |
| if (match) |
| currentLabel = match[1]; |
| if (!line || line[0] === '.') return; |
| |
| match = line.match(labelFind); |
| if (!match) return; |
| |
| if (!filterDirectives || line.match(hasOpcode)) { |
| // Only count a label as used if it's used by an opcode, or else we're not filtering directives. |
| match.forEach(function (label) { |
| labelsUsed[label] = true; |
| }); |
| } else if (currentLabel) { |
| // Note any "weak" usages by this label; that is, usages that are only |
| // interesting if the currentLabel is used by an opcode. |
| if (!weakUsages[currentLabel]) weakUsages[currentLabel] = []; |
| match.forEach(function (label) { |
| weakUsages[currentLabel].push(label); |
| }); |
| } |
| }); |
| |
| // Now follow the chains of used labels, marking any weak references they refer |
| // to as also used. We iteratively do this until either no new labels are found, |
| // or we hit a limit (only here to prevent a pathological case from hanging). |
| var MaxLabelIterations = 10; |
| for (var iter = 0; iter < MaxLabelIterations; ++iter) { |
| var toAdd = []; |
| _.each(labelsUsed, function (t, label) { |
| _.each(weakUsages[label], function (nowused) { |
| if (labelsUsed[nowused]) return; |
| toAdd.push(nowused); |
| }); |
| }); |
| if (!toAdd) break; |
| _.each(toAdd, function (label) { |
| labelsUsed[label] = true; |
| }); |
| } |
| return labelsUsed; |
| } |
| |
| function parseFiles(asmLines) { |
| var files = {}; |
| asmLines.forEach(function (line) { |
| var match = line.match(fileFind); |
| if (match) { |
| files[parseInt(match[1])] = match[2]; |
| } |
| }); |
| return files; |
| } |
| |
| function processAsm(asm, filters) { |
| if (asm.match(/^; Listing generated by Microsoft/)) return processClAsm(asm, filters); |
| if (filters.binary) return processBinaryAsm(asm, filters); |
| |
| var result = []; |
| var asmLines = asm.split("\n"); |
| var labelsUsed = findUsedLabels(asmLines, filters.directives); |
| var files = parseFiles(asmLines); |
| var prevLabel = ""; |
| |
| var directive = /^\s*\..*$/; |
| var commentOnly = /^\s*(#|@|\/\/).*/; |
| var sourceTag = /^\s*\.loc\s+(\d+)\s+(\d+).*/; |
| var stdInLooking = /.*<stdin>|-/; |
| var endBlock = /\.(cfi_endproc|data|text|section)/; |
| var source = null; |
| asmLines.forEach(function (line) { |
| var match; |
| if (line.trim() === "") { |
| result.push({text: "", source: null}); |
| return; |
| } |
| if (!!(match = line.match(sourceTag))) { |
| source = null; |
| var file = files[parseInt(match[1])]; |
| if (file && file.match(stdInLooking)) { |
| source = parseInt(match[2]); |
| } |
| } |
| if (line.match(endBlock)) { |
| source = null; |
| prevLabel = null; |
| } |
| |
| if (filters.commentOnly && line.match(commentOnly)) return; |
| |
| match = line.match(labelDef); |
| if (match) { |
| // It's a label definition. |
| if (labelsUsed[match[1]] === undefined) { |
| // It's an unused label. |
| if (filters.labels) return; |
| } else { |
| // A used label. |
| prevLabel = match; |
| } |
| } |
| if (!match && filters.directives) { |
| // Check for directives only if it wasn't a label; the regexp would |
| // otherwise misinterpret labels as directives. |
| if (line.match(dataDefn) && prevLabel) { |
| // We're defining data that's being used somewhere. |
| } else { |
| if (line.match(directive)) return; |
| } |
| } |
| |
| var hasOpcodeMatch = line.match(hasOpcode); |
| line = expandTabs(line); |
| result.push({text: line, source: hasOpcodeMatch ? source : null}); |
| }); |
| return result; |
| } |
| |
| var binaryHideFuncRe = null; |
| var maxAsmLines = 500; |
| |
| function initialise(compilerProps) { |
| var pattern = compilerProps('binaryHideFuncRe'); |
| binaryHideFuncRe = new RegExp(pattern); |
| maxAsmLines = compilerProps('maxLinesOfAsm', maxAsmLines); |
| } |
| |
| function isUserFunction(func) { |
| return !func.match(binaryHideFuncRe); |
| } |
| |
| function processBinaryAsm(asm, filters) { |
| var result = []; |
| var asmLines = asm.split("\n"); |
| var asmOpcodeRe = /^\s*([0-9a-f]+):\s*(([0-9a-f][0-9a-f] ?)+)\s*(.*)/; |
| var lineRe = /^(\/[^:]+):([0-9]+).*/; |
| var labelRe = /^([0-9a-f]+)\s+<([^>]+)>:$/; |
| var destRe = /.*\s([0-9a-f]+)\s+<([^>]+)>$/; |
| var source = null; |
| var func = null; |
| |
| // Handle "error" documents. |
| if (asmLines.length === 1 && asmLines[0][0] === '<') { |
| return [{text: asmLines[0], source: null}]; |
| } |
| |
| asmLines.forEach(function (line) { |
| if (result.length >= maxAsmLines) { |
| if (result.length == maxAsmLines) { |
| result.push({text: "[truncated; too many lines]", source: null}); |
| } |
| return; |
| } |
| var match = line.match(lineRe); |
| if (match) { |
| source = parseInt(match[2]); |
| return; |
| } |
| |
| match = line.match(labelRe); |
| if (match) { |
| func = match[2]; |
| if (isUserFunction(func)) { |
| result.push({text: func + ":", source: null}); |
| } |
| return; |
| } |
| |
| if (!func || !isUserFunction(func)) return; |
| |
| match = line.match(asmOpcodeRe); |
| if (match) { |
| var address = parseInt(match[1], 16); |
| var opcodes = match[2].split(" ").filter(function (x) { |
| return x; |
| }).map(function (x) { |
| return parseInt(x, 16); |
| }); |
| var disassembly = " " + match[4]; |
| var links = null; |
| var destMatch = line.match(destRe); |
| if (destMatch) { |
| links = [{ |
| offset: disassembly.indexOf(destMatch[1]), |
| length: destMatch[1].length, |
| to: parseInt(destMatch[1], 16) |
| }]; |
| } |
| result.push({opcodes: opcodes, address: address, text: disassembly, source: source, links: links}); |
| } else { |
| //result.push({text: line, source: null}); |
| } |
| }); |
| return result; |
| } |
| |
| // parser changes? |
| // Assume opcode stripping, assume preserves initial whitespace. |
| // Opcode is either: |
| // * beginning of line shouty directive: INCLUDELIB or PUBLIC |
| // (maybe group DD in here?) |
| // * beginning of line label/definition |
| // name <whitespace> [directives] |
| // name <comma> <whitespace?> [directives] |
| // name <colon> |
| // name = value |
| // * whitespace and then assembly instructions |
| // -- padding 'npad' is maybe a directive? |
| |
| function processClAsm(asm, filters) { |
| var parser = new asmCl.ClParser(filters); |
| _.each(asm.split(/\r?\n/), function (line) { |
| parser.addLine(line); |
| }); |
| return parser.get(); |
| } |
| |
| // TODO: dedupe with the above code |
| // TODO: support weak refs etc |
| function processClAsm2(asm, filters) { |
| var asmLines = asm.split(/\r?\n/); |
| var labelsUsed = {}; |
| var prevLabel = ""; |
| // With FAsc we rely on seeing a bunch of opcodes on a line to detect an instruction |
| var hasOpcode = /^\s*([0-9a-f]+\s+)+[a-zA-Z].*/; |
| asmLines.forEach(function (line) { |
| if (line === "" || line[0] === ".") return; |
| var match = line.match(labelFind); |
| if (match && (!filters.directives || line.match(hasOpcode))) { |
| // Only count a label as used if it's used by an opcode, or else we're not filtering directives. |
| match.forEach(function (label) { |
| labelsUsed[label] = true; |
| // console.log("used label:", label); |
| }); |
| } |
| }); |
| |
| var labelDefinition = /^([a-zA-Z0-9$_.]+):/; // NB not same as outer labelDef TODO dedupe |
| // var directive = /^(([_a-zA-Z]+[^;]+[A-Z]+)|(\s+([A-Z]+.*)|(\..*)))$/; |
| // var directive = /^\s*(\.|([_A-Z]+\b))/; |
| // covers "dot" directives, and anything whose first word is SHOUTY |
| var shoutyDirective = /^\s*(\.|([_A-Z]+\b))/; |
| // covers any line which starts on the margin and ends with SHOUTING |
| // e.g. 'xdata ENDS' |
| var endDirective = /^[_a-zA-Z]+.*[A-Z]+$/; |
| // Directives we want to keep |
| var keepDirectives = /\b(ENDP|PROC)\b/; |
| var commentOnly = /^\s*([#@;]|\/\/).*/; |
| var proc = /.*\bPROC$/; |
| var endBlock = /^[^ ]+\s+ENDP/; |
| var fileFind = /^; File\s+(.*)$/; |
| var inMain = false; |
| var sourceTag = /^;\s*([0-9]+)\s*:/; |
| var gccExplorerDir = /\\gcc-explorer-compiler/; // has to match part of the path in compile.js (ugly) |
| var ignoreAll = /^\s*include listing\.inc$/; |
| var source = null; |
| var result = []; |
| |
| function demangle(line) { |
| // Anything identifier-looking with a "@@" in the middle, and a comment at the end |
| // is treated as a mangled name. The comment will be used to replace the identifier. |
| var mangledIdentifier = /\?[^ ]+@@[^ ]+/; |
| var match, comment; |
| if (!(match = line.match(mangledIdentifier))) return line; |
| if (!(comment = line.match(/([^;]+);\s*(.*)/))) return line; |
| return comment[1].trimRight().replace(match[0], comment[2]); |
| } |
| |
| function AddrOpcoder() { |
| var self = this; |
| this.opcodes = []; |
| this.offset = null; |
| var numberRe = /^\s+(([0-9a-f]+\b\s*)([0-9a-f][0-9a-f]\b\s*)*)(.*)/; |
| var prevOffset = -1; |
| var prevOpcodes = []; |
| this.hasOpcodes = function () { |
| return self.offset !== null; |
| }; |
| this.onLine = function (line) { |
| var match = line.match(numberRe); |
| self.opcodes = []; |
| self.offset = null; |
| if (!match) { |
| prevOffset = -1; |
| return line; |
| } |
| var restOfLine = match[4]; |
| var numbers = match[1].split(/\s+/).filter(function (x) { |
| return x; |
| }).map(function (x) { |
| return parseInt(x, 16); |
| }); |
| // If restOfLine is empty, we should accumulate offset opcodes... |
| if (restOfLine === "") { |
| if (prevOffset < 0) { |
| // First in a batch of opcodes, so first is the offset |
| prevOffset = numbers[0]; |
| prevOpcodes = numbers.splice(1); |
| } else { |
| prevOpcodes = prevOpcodes.concat(numbers); |
| } |
| } else { |
| if (prevOffset >= 0) { |
| // we had something from a prior line |
| self.offset = prevOffset; |
| self.opcodes = prevOpcodes.concat(numbers); |
| prevOffset = -1; |
| } else { |
| self.offset = numbers[0]; |
| self.opcodes = numbers.splice(1); |
| } |
| } |
| return restOfLine; |
| }; |
| } |
| |
| var addrOpcoder = new AddrOpcoder(); |
| |
| function add(obj) { |
| var lastWasEmpty = result.length === 0 || result[result.length - 1].text === ""; |
| if (obj.text === "" && lastWasEmpty) return; |
| if (filters.binary && addrOpcoder.hasOpcodes()) { |
| obj.opcodes = addrOpcoder.opcodes; |
| obj.address = addrOpcoder.offset; |
| } |
| result.push(obj); |
| } |
| |
| asmLines.forEach(function (line) { |
| var match; |
| if (!!line.match(ignoreAll)) return; |
| line = addrOpcoder.onLine(line); |
| if (line.trim() === "") { |
| add({text: "", source: null}); |
| return; |
| } |
| line = demangle(line); |
| |
| if (!!(match = line.match(fileFind))) { |
| inMain = !!match[1].match(gccExplorerDir); |
| return; |
| } |
| if (!!(match = line.match(sourceTag))) { |
| if (inMain) |
| source = parseInt(match[1]); |
| return; |
| } |
| if (line.match(endBlock)) { |
| source = null; |
| prevLabel = null; |
| } |
| |
| if (filters.commentOnly && line.match(commentOnly)) return; |
| |
| match = line.match(labelDefinition); |
| if (match) { |
| // It's a label definition. |
| if (labelsUsed[match[1]] === undefined) { |
| // It's an unused label. |
| if (filters.labels) return; |
| } else { |
| // A used label. |
| prevLabel = match; |
| } |
| } |
| var hasOpcodes = addrOpcoder.hasOpcodes(); |
| if (hasOpcodes) line = " " + line; // Reintroduce some indentation |
| |
| if (!match && filters.directives) { |
| // Check for directives only if it wasn't a label; the regexp would |
| // otherwise misinterpret labels as directives. |
| if (line.match(dataDefn) && prevLabel) { |
| // We're defining data that's being used somewhere. |
| } else if (line.match(proc)) { |
| // this is a directive indicating the beginning of a function. |
| } else { |
| match = line.match(shoutyDirective) || line.match(endDirective); |
| if (match && !line.match(keepDirectives)) { |
| console.log("DROPPING " + match); |
| return; |
| } |
| } |
| } |
| |
| line = expandTabs(line); |
| add({text: line, source: hasOpcodes ? source : null}); |
| }); |
| return result; |
| } |
| |
| exports.processAsm = processAsm; |
| exports.initialise = initialise; |
| |
| }).call(this); |