| // Copyright (c) 2015, Compiler Explorer Authors |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| |
| import _ from 'underscore'; |
| |
| import { AsmRegex } from './asmregex'; |
| import * as utils from './utils'; |
| |
| export class AsmParser extends AsmRegex { |
| constructor(compilerProps) { |
| super(); |
| |
| this.labelFindNonMips = /[.A-Z_a-z][\w$.]*/g; |
| // MIPS labels can start with a $ sign, but other assemblers use $ to mean literal. |
| this.labelFindMips = /[$.A-Z_a-z][\w$.]*/g; |
| this.mipsLabelDefinition = /^\$[\w$.]+:/; |
| this.dataDefn = /^\s*\.(string|asciz|ascii|[1248]?byte|short|x?word|long|quad|value|zero)/; |
| this.fileFind = /^\s*\.file\s+(\d+)\s+"([^"]+)"(\s+"([^"]+)")?.*/; |
| // Opcode expression here matches LLVM-style opcodes of the form `%blah = opcode` |
| this.hasOpcodeRe = /^\s*(%[$.A-Z_a-z][\w$.]*\s*=\s*)?[A-Za-z]/; |
| this.instructionRe = /^\s*[A-Za-z]+/; |
| this.identifierFindRe = /[$.@A-Z_a-z][\dA-z]*/g; |
| this.hasNvccOpcodeRe = /^\s*[@A-Za-z|]/; |
| this.definesFunction = /^\s*\.(type.*,\s*[#%@]function|proc\s+[.A-Z_a-z][\w$.]*:.*)$/; |
| this.definesGlobal = /^\s*\.(?:globa?l|GLB|export)\s*([.A-Z_a-z][\w$.]*)/; |
| this.definesWeak = /^\s*\.(?:weakext|weak)\s*([.A-Z_a-z][\w$.]*)/; |
| this.indentedLabelDef = /^\s*([$.A-Z_a-z][\w$.]*):/; |
| this.assignmentDef = /^\s*([$.A-Z_a-z][\w$.]*)\s*=/; |
| this.directive = /^\s*\..*$/; |
| this.startAppBlock = /\s*#APP.*/; |
| this.endAppBlock = /\s*#NO_APP.*/; |
| this.startAsmNesting = /\s*# Begin ASM.*/; |
| this.endAsmNesting = /\s*# End ASM.*/; |
| this.cudaBeginDef = /\.(entry|func)\s+(?:\([^)]*\)\s*)?([$.A-Z_a-z][\w$.]*)\($/; |
| this.cudaEndDef = /^\s*\)\s*$/; |
| |
| this.binaryHideFuncRe = null; |
| this.maxAsmLines = 5000; |
| if (compilerProps) { |
| const binaryHideFuncReValue = compilerProps('binaryHideFuncRe'); |
| if (binaryHideFuncReValue) { |
| this.binaryHideFuncRe = new RegExp(binaryHideFuncReValue); |
| } |
| |
| this.maxAsmLines = compilerProps('maxLinesOfAsm', this.maxAsmLines); |
| } |
| |
| this.asmOpcodeRe = /^\s*(?<address>[\da-f]+):\s*(?<opcodes>([\da-f]{2} ?)+)\s*(?<disasm>.*)/; |
| this.lineRe = /^(\/[^:]+):(?<line>\d+).*/; |
| |
| // labelRe is made very greedy as it's also used with demangled objdump |
| // output (eg. it can have c++ template with <>). |
| this.labelRe = /^([\da-f]+)\s+<(.+)>:$/; |
| this.destRe = /\s([\da-f]+)\s+<([^+>]+)(\+0x[\da-f]+)?>$/; |
| this.commentRe = /[#;]/; |
| this.instOpcodeRe = /(\.inst\.?\w?)\s*(.*)/; |
| } |
| |
| hasOpcode(line, inNvccCode) { |
| // Remove any leading label definition... |
| const match = line.match(this.labelDef); |
| if (match) { |
| line = line.substr(match[0].length); |
| } |
| // Strip any comments |
| line = line.split(this.commentRe, 1)[0]; |
| // .inst generates an opcode, so also counts |
| if (this.instOpcodeRe.test(line)) return true; |
| // Detect assignment, that's not an opcode... |
| if (this.assignmentDef.test(line)) return false; |
| if (inNvccCode) { |
| return !!this.hasNvccOpcodeRe.test(line); |
| } |
| return !!this.hasOpcodeRe.test(line); |
| } |
| |
| labelFindFor(asmLines) { |
| const isMips = _.any(asmLines, line => !!this.mipsLabelDefinition.test(line)); |
| return isMips ? this.labelFindMips : this.labelFindNonMips; |
| } |
| |
| findUsedLabels(asmLines, filterDirectives) { |
| const labelsUsed = {}; |
| const weakUsages = {}; |
| const labelFind = this.labelFindFor(asmLines); |
| // The current label set is the set of labels all pointing at the current code, so: |
| // foo: |
| // bar: |
| // add r0, r0, #1 |
| // in this case [foo, bar] would be the label set for the add instruction. |
| let currentLabelSet = []; |
| let inLabelGroup = false; |
| let inCustomAssembly = 0; |
| const startBlock = /\.cfi_startproc/; |
| const endBlock = /\.cfi_endproc/; |
| let inFunction = false; |
| |
| // Scan through looking for definite label usages (ones used by opcodes), |
| // and ones that are weakly used: that is, their use is conditional on another label. |
| // For example: |
| // .foo: .string "moo" |
| // .baz: .quad .foo |
| // mov eax, .baz |
| // In this case, the '.baz' is used by an opcode, and so is strongly used. |
| // The '.foo' is weakly used by .baz. |
| // Also, if we have random data definitions within a block of a function (between |
| // cfi_startproc and cfi_endproc), we assume they are strong usages. This covers things |
| // like jump tables embedded in ARM code. |
| // See https://github.com/compiler-explorer/compiler-explorer/issues/2788 |
| for (let line of asmLines) { |
| if (this.startAppBlock.test(line) || this.startAsmNesting.test(line)) { |
| inCustomAssembly++; |
| } else if (this.endAppBlock.test(line) || this.endAsmNesting.test(line)) { |
| inCustomAssembly--; |
| } else if (startBlock.test(line)) { |
| inFunction = true; |
| } else if (endBlock.test(line)) { |
| inFunction = false; |
| } |
| |
| if (inCustomAssembly > 0) |
| line = this.fixLabelIndentation(line); |
| |
| let match = line.match(this.labelDef); |
| if (match) { |
| if (inLabelGroup) |
| currentLabelSet.push(match[1]); |
| else |
| currentLabelSet = [match[1]]; |
| inLabelGroup = true; |
| } else { |
| inLabelGroup = false; |
| } |
| match = line.match(this.definesGlobal); |
| if (!match) |
| match = line.match(this.definesWeak); |
| if (!match) |
| match = line.match(this.cudaBeginDef); |
| if (match) { |
| labelsUsed[match[1]] = true; |
| } |
| |
| const definesFunction = line.match(this.definesFunction); |
| if (!definesFunction && (!line || line[0] === '.')) continue; |
| |
| match = line.match(labelFind); |
| if (!match) continue; |
| |
| if (!filterDirectives || this.hasOpcode(line, false) || definesFunction) { |
| // Only count a label as used if it's used by an opcode, or else we're not filtering directives. |
| for (const label of match) labelsUsed[label] = true; |
| } else { |
| // If we have a current label, then any subsequent opcode or data definition's labels are referred to |
| // weakly by that label. |
| const isDataDefinition = !!this.dataDefn.test(line); |
| const isOpcode = this.hasOpcode(line, false); |
| if (isDataDefinition || isOpcode) { |
| for (const currentLabel of currentLabelSet) { |
| if (inFunction && isDataDefinition) { |
| // Data definitions in the middle of code should be treated as if they were used strongly. |
| for (const label of match) labelsUsed[label] = true; |
| } else { |
| if (!weakUsages[currentLabel]) weakUsages[currentLabel] = []; |
| for (const label of match) weakUsages[currentLabel].push(label); |
| } |
| } |
| } |
| } |
| } |
| |
| // Now follow the chains of used labels, marking any weak references they refer |
| // to as also used. We iteratively do this until either no new labels are found, |
| // or we hit a limit (only here to prevent a pathological case from hanging). |
| function markUsed(label) { |
| labelsUsed[label] = true; |
| } |
| |
| const MaxLabelIterations = 10; |
| for (let iter = 0; iter < MaxLabelIterations; ++iter) { |
| let toAdd = []; |
| _.each(labelsUsed, (t, label) => { // jshint ignore:line |
| _.each(weakUsages[label], nowused => { |
| if (labelsUsed[nowused]) return; |
| toAdd.push(nowused); |
| }); |
| }); |
| if (!toAdd) break; |
| _.each(toAdd, markUsed); |
| } |
| return labelsUsed; |
| } |
| |
| parseFiles(asmLines) { |
| const files = {}; |
| for (const line of asmLines) { |
| const match = line.match(this.fileFind); |
| if (match) { |
| const lineNum = parseInt(match[1]); |
| if (match[4]) { |
| // Clang-style file directive '.file X "dir" "filename"' |
| files[lineNum] = match[2] + '/' + match[4]; |
| } else { |
| files[lineNum] = match[2]; |
| } |
| } |
| } |
| return files; |
| } |
| |
| // Remove labels which do not have a definition. |
| removeLabelsWithoutDefinition(asm, labelDefinitions) { |
| _.each(asm, obj => { |
| obj.labels = obj.labels.filter(label => labelDefinitions[label.name]); |
| }); |
| } |
| |
| // Get labels which are used in the given line. |
| getUsedLabelsInLine(line) { |
| const labelsInLine = []; |
| |
| // Strip any comments |
| const instruction = line.split(this.commentRe, 1)[0]; |
| |
| // Remove the instruction. |
| const params = instruction.replace(this.instructionRe, ''); |
| |
| const removedCol = instruction.length - params.length + 1; |
| params.replace(this.identifierFindRe, (label, index) => { |
| const startCol = removedCol + index; |
| labelsInLine.push({ |
| name: label, |
| range: { |
| startCol: startCol, |
| endCol: startCol + label.length, |
| }, |
| }); |
| }); |
| |
| return labelsInLine; |
| } |
| |
| processAsm(asmResult, filters) { |
| if (filters.binary) return this.processBinaryAsm(asmResult, filters); |
| |
| const startTime = process.hrtime.bigint(); |
| |
| if (filters.commentOnly) { |
| // Remove any block comments that start and end on a line if we're removing comment-only lines. |
| const blockComments = /^[\t ]*\/\*(\*(?!\/)|[^*])*\*\/\s*/gm; |
| asmResult = asmResult.replace(blockComments, ''); |
| } |
| |
| const asm = []; |
| const labelDefinitions = {}; |
| |
| let asmLines = utils.splitLines(asmResult); |
| const startingLineCount = asmLines.length; |
| if (filters.preProcessLines !== undefined) { |
| asmLines = filters.preProcessLines(asmLines); |
| } |
| |
| const labelsUsed = this.findUsedLabels(asmLines, filters.directives); |
| const files = this.parseFiles(asmLines); |
| let prevLabel = ''; |
| |
| // Lines matching the following pattern are considered comments: |
| // - starts with '#', '@', '//' or a single ';' (non repeated) |
| // - starts with ';;' and the first non-whitespace before end of line is not # |
| const commentOnly = /^\s*(((#|@|\/\/).*)|(\/\*.*\*\/)|(;\s*)|(;[^;].*)|(;;\s*[^\s#].*))$/; |
| |
| const commentOnlyNvcc = /^\s*(((#|;|\/\/).*)|(\/\*.*\*\/))$/; |
| const sourceTag = /^\s*\.loc\s+(\d+)\s+(\d+)\s+(.*)/; |
| const sourceD2Tag = /^\s*\.d2line\s+(\d+),?\s*(\d*).*/; |
| const source6502Dbg = /^\s*\.dbg\s+line,\s*"([^"]+)",\s*(\d+)/; |
| const source6502DbgEnd = /^\s*\.dbg\s+line[^,]/; |
| const sourceStab = /^\s*\.stabn\s+(\d+),0,(\d+),.*/; |
| const stdInLooking = /<stdin>|^-$|example\.[^/]+$|<source>/; |
| const endBlock = /\.(cfi_endproc|data|text|section)/; |
| let source = null; |
| let mayRemovePreviousLabel = true; |
| let keepInlineCode = false; |
| |
| let lastOwnSource = null; |
| const dontMaskFilenames = filters.dontMaskFilenames; |
| |
| function maybeAddBlank() { |
| const lastBlank = asm.length === 0 || asm[asm.length - 1].text === ''; |
| if (!lastBlank) |
| asm.push({text: '', source: null, labels: []}); |
| } |
| |
| function handleSource(line) { |
| let match = line.match(sourceTag); |
| if (match) { |
| const file = utils.maskRootdir(files[parseInt(match[1])]); |
| const sourceLine = parseInt(match[2]); |
| if (file) { |
| if (dontMaskFilenames) { |
| source = { |
| file: file, |
| line: sourceLine, |
| mainsource: !!stdInLooking.test(file), |
| }; |
| } else { |
| source = { |
| file: !stdInLooking.test(file) ? file : null, |
| line: sourceLine, |
| }; |
| } |
| const sourceCol = parseInt(match[3]); |
| if (!isNaN(sourceCol) && sourceCol !== 0) { |
| source.column = sourceCol; |
| } |
| } else { |
| source = null; |
| } |
| } else { |
| match = line.match(sourceD2Tag); |
| if (match) { |
| const sourceLine = parseInt(match[1]); |
| source = { |
| file: null, |
| line: sourceLine, |
| }; |
| } |
| } |
| } |
| |
| function handleStabs(line) { |
| const match = line.match(sourceStab); |
| if (!match) return; |
| // cf http://www.math.utah.edu/docs/info/stabs_11.html#SEC48 |
| switch (parseInt(match[1])) { |
| case 68: |
| source = {file: null, line: parseInt(match[2])}; |
| break; |
| case 132: |
| case 100: |
| source = null; |
| prevLabel = null; |
| break; |
| } |
| } |
| |
| function handle6502(line) { |
| const match = line.match(source6502Dbg); |
| if (match) { |
| const file = utils.maskRootdir(match[1]); |
| const sourceLine = parseInt(match[2]); |
| if (dontMaskFilenames) { |
| source = { |
| file: file, |
| line: sourceLine, |
| mainsource: !!stdInLooking.test(file), |
| }; |
| } else { |
| source = { |
| file: !stdInLooking.test(file) ? file : null, |
| line: sourceLine, |
| }; |
| } |
| } else if (source6502DbgEnd.test(line)) { |
| source = null; |
| } |
| } |
| |
| let inNvccDef = false; |
| let inNvccCode = false; |
| |
| let inCustomAssembly = 0; |
| |
| // TODO: Make this function smaller |
| // eslint-disable-next-line max-statements |
| for (let line of asmLines) { |
| if (line.trim() === '') { |
| maybeAddBlank(); |
| continue; |
| } |
| |
| if (this.startAppBlock.test(line) || this.startAsmNesting.test(line)) { |
| inCustomAssembly++; |
| } else if (this.endAppBlock.test(line) || this.endAsmNesting.test(line)) { |
| inCustomAssembly--; |
| } |
| |
| handleSource(line); |
| handleStabs(line); |
| handle6502(line); |
| |
| if (source && (source.file === null || source.mainsource)) { |
| lastOwnSource = source; |
| } |
| |
| if (endBlock.test(line) || (inNvccCode && /}/.test(line))) { |
| source = null; |
| prevLabel = null; |
| lastOwnSource = null; |
| } |
| |
| if (filters.libraryCode && !lastOwnSource && source && (source.file !== null) && !source.mainsource) { |
| if (mayRemovePreviousLabel && asm.length > 0) { |
| const lastLine = asm[asm.length - 1]; |
| |
| const labelDef = lastLine.text |
| ? lastLine.text.match(this.labelDef) : null; |
| |
| if (labelDef) { |
| asm.pop(); |
| keepInlineCode = false; |
| delete labelDefinitions[labelDef[1]]; |
| } else { |
| keepInlineCode = true; |
| } |
| mayRemovePreviousLabel = false; |
| } |
| |
| if (!keepInlineCode) { |
| continue; |
| } |
| } else { |
| mayRemovePreviousLabel = true; |
| } |
| |
| if (filters.commentOnly && |
| ((commentOnly.test(line) && !inNvccCode) || |
| (commentOnlyNvcc.test(line) && inNvccCode)) |
| ) { |
| continue; |
| } |
| |
| if (inCustomAssembly > 0) |
| line = this.fixLabelIndentation(line); |
| |
| let match = line.match(this.labelDef); |
| if (!match) match = line.match(this.assignmentDef); |
| if (!match) { |
| match = line.match(this.cudaBeginDef); |
| if (match) { |
| inNvccDef = true; |
| inNvccCode = true; |
| } |
| } |
| if (match) { |
| // It's a label definition. |
| if (labelsUsed[match[1]] === undefined) { |
| // It's an unused label. |
| if (filters.labels) { |
| continue; |
| } |
| } else { |
| // A used label. |
| prevLabel = match; |
| labelDefinitions[match[1]] = asm.length + 1; |
| } |
| } |
| if (inNvccDef) { |
| if (this.cudaEndDef.test(line)) |
| inNvccDef = false; |
| } else if (!match && filters.directives) { |
| // Check for directives only if it wasn't a label; the regexp would |
| // otherwise misinterpret labels as directives. |
| if (this.dataDefn.test(line) && prevLabel) { |
| // We're defining data that's being used somewhere. |
| } else { |
| // .inst generates an opcode, so does not count as a directive |
| if (this.directive.test(line) && !this.instOpcodeRe.test(line)) { |
| continue; |
| } |
| } |
| } |
| |
| line = utils.expandTabs(line); |
| const text = AsmRegex.filterAsmLine(line, filters); |
| |
| const labelsInLine = match ? [] : this.getUsedLabelsInLine(text); |
| |
| asm.push({ |
| text: text, |
| source: this.hasOpcode(line, inNvccCode) ? source : null, |
| labels: labelsInLine, |
| }); |
| } |
| |
| this.removeLabelsWithoutDefinition(asm, labelDefinitions); |
| |
| const endTime = process.hrtime.bigint(); |
| return { |
| asm: asm, |
| labelDefinitions: labelDefinitions, |
| parsingTime: ((endTime - startTime) / BigInt(1000000)).toString(), |
| filteredCount: startingLineCount - asm.length, |
| }; |
| } |
| |
| fixLabelIndentation(line) { |
| const match = line.match(this.indentedLabelDef); |
| if (match) { |
| return line.replace(/^\s+/, ''); |
| } else { |
| return line; |
| } |
| } |
| |
| isUserFunction(func) { |
| if (this.binaryHideFuncRe === null) return true; |
| |
| return !this.binaryHideFuncRe.test(func); |
| } |
| |
| processBinaryAsm(asmResult, filters) { |
| const startTime = process.hrtime.bigint(); |
| const asm = []; |
| const labelDefinitions = {}; |
| const dontMaskFilenames = filters.dontMaskFilenames; |
| |
| let asmLines = asmResult.split('\n'); |
| const startingLineCount = asmLines.length; |
| let source = null; |
| let func = null; |
| let mayRemovePreviousLabel = true; |
| |
| // Handle "error" documents. |
| if (asmLines.length === 1 && asmLines[0][0] === '<') { |
| return { |
| asm: [{text: asmLines[0], source: null}], |
| }; |
| } |
| |
| if (filters.preProcessBinaryAsmLines !== undefined) { |
| asmLines = filters.preProcessBinaryAsmLines(asmLines); |
| } |
| |
| for (const line of asmLines) { |
| const labelsInLine = []; |
| |
| if (asm.length >= this.maxAsmLines) { |
| if (asm.length === this.maxAsmLines) { |
| asm.push({ |
| text: '[truncated; too many lines]', |
| source: null, |
| labels: labelsInLine, |
| }); |
| } |
| continue; |
| } |
| let match = line.match(this.lineRe); |
| if (match) { |
| if (dontMaskFilenames) { |
| source = { |
| file: utils.maskRootdir(match[1]), |
| line: parseInt(match.groups.line), |
| mainsource: true, |
| }; |
| } else { |
| source = {file: null, line: parseInt(match.groups.line), mainsource: true}; |
| } |
| continue; |
| } |
| |
| match = line.match(this.labelRe); |
| if (match) { |
| func = match[2]; |
| if (this.isUserFunction(func)) { |
| asm.push({ |
| text: func + ':', |
| source: null, |
| labels: labelsInLine, |
| }); |
| labelDefinitions[func] = asm.length; |
| } |
| continue; |
| } |
| |
| if (func && line === `${func}():`) continue; |
| |
| if (!func || !this.isUserFunction(func)) continue; |
| |
| // note: normally the source.file will be null if it's code from example.ext |
| // but with filters.dontMaskFilenames it will be filled with the actual filename |
| // instead we can test source.mainsource in that situation |
| const isMainsource = source && ((source.file === null) || source.mainsource); |
| if (filters.libraryCode && !isMainsource) { |
| if (mayRemovePreviousLabel && asm.length > 0) { |
| const lastLine = asm[asm.length - 1]; |
| if (lastLine.text && this.labelDef.test(lastLine.text)) { |
| asm.pop(); |
| } |
| mayRemovePreviousLabel = false; |
| } |
| continue; |
| } else { |
| mayRemovePreviousLabel = true; |
| } |
| |
| match = line.match(this.asmOpcodeRe); |
| if (match) { |
| const address = parseInt(match.groups.address, 16); |
| const opcodes = match.groups.opcodes.split(' ').filter(x => !!x); |
| const disassembly = ' ' + AsmRegex.filterAsmLine(match.groups.disasm, filters); |
| const destMatch = line.match(this.destRe); |
| if (destMatch) { |
| const labelName = destMatch[2]; |
| const startCol = disassembly.indexOf(labelName) + 1; |
| labelsInLine.push({ |
| name: labelName, |
| range: { |
| startCol: startCol, |
| endCol: startCol + labelName.length, |
| }, |
| }); |
| } |
| asm.push({ |
| opcodes: opcodes, |
| address: address, |
| text: disassembly, |
| source: source, |
| labels: labelsInLine, |
| }); |
| } |
| } |
| |
| this.removeLabelsWithoutDefinition(asm, labelDefinitions); |
| |
| const endTime = process.hrtime.bigint(); |
| |
| return { |
| asm: asm, |
| labelDefinitions: labelDefinitions, |
| parsingTime: ((endTime - startTime) / BigInt(1000000)).toString(), |
| filteredCount: startingLineCount - asm.length, |
| }; |
| } |
| |
| process(asm, filters) { |
| return this.processAsm(asm, filters); |
| } |
| } |