lib/asm-parser.js - compiler-explorer - Rivoreo Source Code Repositories

 // Copyright (c) 2015, Compiler Explorer Authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //     * Redistributions of source code must retain the above copyright notice,
 //       this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above copyright
 //       notice, this list of conditions and the following disclaimer in the
 //       documentation and/or other materials provided with the distribution.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.

 import _ from 'underscore';

 import { AsmRegex } from './asmregex';
 import * as utils from './utils';

 export class AsmParser extends AsmRegex {
     constructor(compilerProps) {
         super();

         this.labelFindNonMips = /[.A-Z_a-z][\w$.]*/g;
         // MIPS labels can start with a $ sign, but other assemblers use $ to mean literal.
         this.labelFindMips = /[$.A-Z_a-z][\w$.]*/g;
         this.mipsLabelDefinition = /^\$[\w$.]+:/;
         this.dataDefn = /^\s*\.(string|asciz|ascii|[1248]?byte|short|x?word|long|quad|value|zero)/;
         this.fileFind = /^\s*\.file\s+(\d+)\s+"([^"]+)"(\s+"([^"]+)")?.*/;
         // Opcode expression here matches LLVM-style opcodes of the form `%blah = opcode`
         this.hasOpcodeRe = /^\s*(%[$.A-Z_a-z][\w$.]*\s*=\s*)?[A-Za-z]/;
         this.instructionRe = /^\s*[A-Za-z]+/;
         this.identifierFindRe = /[$.@A-Z_a-z][\dA-z]*/g;
         this.hasNvccOpcodeRe = /^\s*[@A-Za-z|]/;
         this.definesFunction = /^\s*\.(type.*,\s*[#%@]function|proc\s+[.A-Z_a-z][\w$.]*:.*)$/;
         this.definesGlobal = /^\s*\.(?:globa?l|GLB|export)\s*([.A-Z_a-z][\w$.]*)/;
         this.definesWeak = /^\s*\.(?:weakext|weak)\s*([.A-Z_a-z][\w$.]*)/;
         this.indentedLabelDef = /^\s*([$.A-Z_a-z][\w$.]*):/;
         this.assignmentDef = /^\s*([$.A-Z_a-z][\w$.]*)\s*=/;
         this.directive = /^\s*\..*$/;
         this.startAppBlock = /\s*#APP.*/;
         this.endAppBlock = /\s*#NO_APP.*/;
         this.startAsmNesting = /\s*# Begin ASM.*/;
         this.endAsmNesting = /\s*# End ASM.*/;
         this.cudaBeginDef = /\.(entry|func)\s+(?:\([^)]*\)\s*)?([$.A-Z_a-z][\w$.]*)\($/;
         this.cudaEndDef = /^\s*\)\s*$/;

         this.binaryHideFuncRe = null;
         this.maxAsmLines = 5000;
         if (compilerProps) {
             const binaryHideFuncReValue = compilerProps('binaryHideFuncRe');
             if (binaryHideFuncReValue) {
                 this.binaryHideFuncRe = new RegExp(binaryHideFuncReValue);
             }

             this.maxAsmLines = compilerProps('maxLinesOfAsm', this.maxAsmLines);
         }

         this.asmOpcodeRe = /^\s*(?<address>[\da-f]+):\s*(?<opcodes>([\da-f]{2} ?)+)\s*(?<disasm>.*)/;
         this.lineRe = /^(\/[^:]+):(?<line>\d+).*/;

         // labelRe is made very greedy as it's also used with demangled objdump
         // output (eg. it can have c++ template with <>).
         this.labelRe = /^([\da-f]+)\s+<(.+)>:$/;
         this.destRe = /\s([\da-f]+)\s+<([^+>]+)(\+0x[\da-f]+)?>$/;
         this.commentRe = /[#;]/;
         this.instOpcodeRe = /(\.inst\.?\w?)\s*(.*)/;
     }

     hasOpcode(line, inNvccCode) {
         // Remove any leading label definition...
         const match = line.match(this.labelDef);
         if (match) {
             line = line.substr(match[0].length);
         }
         // Strip any comments
         line = line.split(this.commentRe, 1)[0];
         // .inst generates an opcode, so also counts
         if (this.instOpcodeRe.test(line)) return true;
         // Detect assignment, that's not an opcode...
         if (this.assignmentDef.test(line)) return false;
         if (inNvccCode) {
             return !!this.hasNvccOpcodeRe.test(line);
         }
         return !!this.hasOpcodeRe.test(line);
     }

     labelFindFor(asmLines) {
         const isMips = _.any(asmLines, line => !!this.mipsLabelDefinition.test(line));
         return isMips ? this.labelFindMips : this.labelFindNonMips;
     }

     findUsedLabels(asmLines, filterDirectives) {
         const labelsUsed = {};
         const weakUsages = {};
         const labelFind = this.labelFindFor(asmLines);
         // The current label set is the set of labels all pointing at the current code, so:
         // foo:
         // bar:
         //    add r0, r0, #1
         // in this case [foo, bar] would be the label set for the add instruction.
         let currentLabelSet = [];
         let inLabelGroup = false;
         let inCustomAssembly = 0;
         const startBlock = /\.cfi_startproc/;
         const endBlock = /\.cfi_endproc/;
         let inFunction = false;

         // Scan through looking for definite label usages (ones used by opcodes),
         // and ones that are weakly used: that is, their use is conditional on another label.
         // For example:
         // .foo: .string "moo"
         // .baz: .quad .foo
         //       mov eax, .baz
         // In this case, the '.baz' is used by an opcode, and so is strongly used.
         // The '.foo' is weakly used by .baz.
         // Also, if we have random data definitions within a block of a function (between
         // cfi_startproc and cfi_endproc), we assume they are strong usages. This covers things
         // like jump tables embedded in ARM code.
         // See https://github.com/compiler-explorer/compiler-explorer/issues/2788
         for (let line of asmLines) {
             if (this.startAppBlock.test(line) || this.startAsmNesting.test(line)) {
                 inCustomAssembly++;
             } else if (this.endAppBlock.test(line) || this.endAsmNesting.test(line)) {
                 inCustomAssembly--;
             } else if (startBlock.test(line)) {
                 inFunction = true;
             } else if (endBlock.test(line)) {
                 inFunction = false;
             }

             if (inCustomAssembly > 0)
                 line = this.fixLabelIndentation(line);

             let match = line.match(this.labelDef);
             if (match) {
                 if (inLabelGroup)
                     currentLabelSet.push(match[1]);
                 else
                     currentLabelSet = [match[1]];
                 inLabelGroup = true;
             } else {
                 inLabelGroup = false;
             }
             match = line.match(this.definesGlobal);
             if (!match)
                 match = line.match(this.definesWeak);
             if (!match)
                 match = line.match(this.cudaBeginDef);
             if (match) {
                 labelsUsed[match[1]] = true;
             }

             const definesFunction = line.match(this.definesFunction);
             if (!definesFunction && (!line || line[0] === '.')) continue;

             match = line.match(labelFind);
             if (!match) continue;

             if (!filterDirectives || this.hasOpcode(line, false) || definesFunction) {
                 // Only count a label as used if it's used by an opcode, or else we're not filtering directives.
                 for (const label of match) labelsUsed[label] = true;
             } else {
                 // If we have a current label, then any subsequent opcode or data definition's labels are referred to
                 // weakly by that label.
                 const isDataDefinition = !!this.dataDefn.test(line);
                 const isOpcode = this.hasOpcode(line, false);
                 if (isDataDefinition || isOpcode) {
                     for (const currentLabel of currentLabelSet) {
                         if (inFunction && isDataDefinition) {
                             // Data definitions in the middle of code should be treated as if they were used strongly.
                             for (const label of match) labelsUsed[label] = true;
                         } else {
                             if (!weakUsages[currentLabel]) weakUsages[currentLabel] = [];
                             for (const label of match) weakUsages[currentLabel].push(label);
                         }
                     }
                 }
             }
         }

         // Now follow the chains of used labels, marking any weak references they refer
         // to as also used. We iteratively do this until either no new labels are found,
         // or we hit a limit (only here to prevent a pathological case from hanging).
         function markUsed(label) {
             labelsUsed[label] = true;
         }

         const MaxLabelIterations = 10;
         for (let iter = 0; iter < MaxLabelIterations; ++iter) {
             let toAdd = [];
             _.each(labelsUsed, (t, label) => { // jshint ignore:line
                 _.each(weakUsages[label], nowused => {
                     if (labelsUsed[nowused]) return;
                     toAdd.push(nowused);
                 });
             });
             if (!toAdd) break;
             _.each(toAdd, markUsed);
         }
         return labelsUsed;
     }

     parseFiles(asmLines) {
         const files = {};
         for (const line of asmLines) {
             const match = line.match(this.fileFind);
             if (match) {
                 const lineNum = parseInt(match[1]);
                 if (match[4]) {
                     // Clang-style file directive '.file X "dir" "filename"'
                     files[lineNum] = match[2] + '/' + match[4];
                 } else {
                     files[lineNum] = match[2];
                 }
             }
         }
         return files;
     }

     // Remove labels which do not have a definition.
     removeLabelsWithoutDefinition(asm, labelDefinitions) {
         _.each(asm, obj => {
             obj.labels = obj.labels.filter(label => labelDefinitions[label.name]);
         });
     }

     // Get labels which are used in the given line.
     getUsedLabelsInLine(line) {
         const labelsInLine = [];

         // Strip any comments
         const instruction = line.split(this.commentRe, 1)[0];

         // Remove the instruction.
         const params = instruction.replace(this.instructionRe, '');

         const removedCol = instruction.length - params.length + 1;
         params.replace(this.identifierFindRe, (label, index) => {
             const startCol = removedCol + index;
             labelsInLine.push({
                 name: label,
                 range: {
                     startCol: startCol,
                     endCol: startCol + label.length,
                 },
             });
         });

         return labelsInLine;
     }

     processAsm(asmResult, filters) {
         if (filters.binary) return this.processBinaryAsm(asmResult, filters);

         const startTime = process.hrtime.bigint();

         if (filters.commentOnly) {
             // Remove any block comments that start and end on a line if we're removing comment-only lines.
             const blockComments = /^[\t ]*\/\*(\*(?!\/)|[^*])*\*\/\s*/gm;
             asmResult = asmResult.replace(blockComments, '');
         }

         const asm = [];
         const labelDefinitions = {};

         let asmLines = utils.splitLines(asmResult);
         const startingLineCount = asmLines.length;
         if (filters.preProcessLines !== undefined) {
             asmLines = filters.preProcessLines(asmLines);
         }

         const labelsUsed = this.findUsedLabels(asmLines, filters.directives);
         const files = this.parseFiles(asmLines);
         let prevLabel = '';

         // Lines matching the following pattern are considered comments:
         // - starts with '#', '@', '//' or a single ';' (non repeated)
         // - starts with ';;' and the first non-whitespace before end of line is not #
         const commentOnly = /^\s*(((#|@|\/\/).*)|(\/\*.*\*\/)|(;\s*)|(;[^;].*)|(;;\s*[^\s#].*))$/;

         const commentOnlyNvcc = /^\s*(((#|;|\/\/).*)|(\/\*.*\*\/))$/;
         const sourceTag = /^\s*\.loc\s+(\d+)\s+(\d+)\s+(.*)/;
         const sourceD2Tag = /^\s*\.d2line\s+(\d+),?\s*(\d*).*/;
         const source6502Dbg = /^\s*\.dbg\s+line,\s*"([^"]+)",\s*(\d+)/;
         const source6502DbgEnd = /^\s*\.dbg\s+line[^,]/;
         const sourceStab = /^\s*\.stabn\s+(\d+),0,(\d+),.*/;
         const stdInLooking = /<stdin>|^-$|example\.[^/]+$|<source>/;
         const endBlock = /\.(cfi_endproc|data|text|section)/;
         let source = null;
         let mayRemovePreviousLabel = true;
         let keepInlineCode = false;

         let lastOwnSource = null;
         const dontMaskFilenames = filters.dontMaskFilenames;

         function maybeAddBlank() {
             const lastBlank = asm.length === 0 || asm[asm.length - 1].text === '';
             if (!lastBlank)
                 asm.push({text: '', source: null, labels: []});
         }

         function handleSource(line) {
             let match = line.match(sourceTag);
             if (match) {
                 const file = utils.maskRootdir(files[parseInt(match[1])]);
                 const sourceLine = parseInt(match[2]);
                 if (file) {
                     if (dontMaskFilenames) {
                         source = {
                             file: file,
                             line: sourceLine,
                             mainsource: !!stdInLooking.test(file),
                         };
                     } else {
                         source = {
                             file: !stdInLooking.test(file) ? file : null,
                             line: sourceLine,
                         };
                     }
                     const sourceCol = parseInt(match[3]);
                     if (!isNaN(sourceCol) && sourceCol !== 0) {
                         source.column = sourceCol;
                     }
                 } else {
                     source = null;
                 }
             } else {
                 match = line.match(sourceD2Tag);
                 if (match) {
                     const sourceLine = parseInt(match[1]);
                     source = {
                         file: null,
                         line: sourceLine,
                     };
                 }
             }
         }

         function handleStabs(line) {
             const match = line.match(sourceStab);
             if (!match) return;
             // cf http://www.math.utah.edu/docs/info/stabs_11.html#SEC48
             switch (parseInt(match[1])) {
                 case 68:
                     source = {file: null, line: parseInt(match[2])};
                     break;
                 case 132:
                 case 100:
                     source = null;
                     prevLabel = null;
                     break;
             }
         }

         function handle6502(line) {
             const match = line.match(source6502Dbg);
             if (match) {
                 const file = utils.maskRootdir(match[1]);
                 const sourceLine = parseInt(match[2]);
                 if (dontMaskFilenames) {
                     source = {
                         file: file,
                         line: sourceLine,
                         mainsource: !!stdInLooking.test(file),
                     };
                 } else {
                     source = {
                         file: !stdInLooking.test(file) ? file : null,
                         line: sourceLine,
                     };
                 }
             } else if (source6502DbgEnd.test(line)) {
                 source = null;
             }
         }

         let inNvccDef = false;
         let inNvccCode = false;

         let inCustomAssembly = 0;

         // TODO: Make this function smaller
         // eslint-disable-next-line max-statements
         for (let line of asmLines) {
             if (line.trim() === '') {
                 maybeAddBlank();
                 continue;
             }

             if (this.startAppBlock.test(line) || this.startAsmNesting.test(line)) {
                 inCustomAssembly++;
             } else if (this.endAppBlock.test(line) || this.endAsmNesting.test(line)) {
                 inCustomAssembly--;
             }

             handleSource(line);
             handleStabs(line);
             handle6502(line);

             if (source && (source.file === null || source.mainsource)) {
                 lastOwnSource = source;
             }

             if (endBlock.test(line) || (inNvccCode && /}/.test(line))) {
                 source = null;
                 prevLabel = null;
                 lastOwnSource = null;
             }

             if (filters.libraryCode && !lastOwnSource && source && (source.file !== null) && !source.mainsource) {
                 if (mayRemovePreviousLabel && asm.length > 0) {
                     const lastLine = asm[asm.length - 1];

                     const labelDef = lastLine.text
                         ? lastLine.text.match(this.labelDef) : null;

                     if (labelDef) {
                         asm.pop();
                         keepInlineCode = false;
                         delete labelDefinitions[labelDef[1]];
                     } else {
                         keepInlineCode = true;
                     }
                     mayRemovePreviousLabel = false;
                 }

                 if (!keepInlineCode) {
                     continue;
                 }
             } else {
                 mayRemovePreviousLabel = true;
             }

             if (filters.commentOnly &&
                 ((commentOnly.test(line) && !inNvccCode) ||
                     (commentOnlyNvcc.test(line) && inNvccCode))
             ) {
                 continue;
             }

             if (inCustomAssembly > 0)
                 line = this.fixLabelIndentation(line);

             let match = line.match(this.labelDef);
             if (!match) match = line.match(this.assignmentDef);
             if (!match) {
                 match = line.match(this.cudaBeginDef);
                 if (match) {
                     inNvccDef = true;
                     inNvccCode = true;
                 }
             }
             if (match) {
                 // It's a label definition.
                 if (labelsUsed[match[1]] === undefined) {
                     // It's an unused label.
                     if (filters.labels) {
                         continue;
                     }
                 } else {
                     // A used label.
                     prevLabel = match;
                     labelDefinitions[match[1]] = asm.length + 1;
                 }
             }
             if (inNvccDef) {
                 if (this.cudaEndDef.test(line))
                     inNvccDef = false;
             } else if (!match && filters.directives) {
                 // Check for directives only if it wasn't a label; the regexp would
                 // otherwise misinterpret labels as directives.
                 if (this.dataDefn.test(line) && prevLabel) {
                     // We're defining data that's being used somewhere.
                 } else {
                     // .inst generates an opcode, so does not count as a directive
                     if (this.directive.test(line) && !this.instOpcodeRe.test(line)) {
                         continue;
                     }
                 }
             }

             line = utils.expandTabs(line);
             const text = AsmRegex.filterAsmLine(line, filters);

             const labelsInLine = match ? [] : this.getUsedLabelsInLine(text);

             asm.push({
                 text: text,
                 source: this.hasOpcode(line, inNvccCode) ? source : null,
                 labels: labelsInLine,
             });
         }

         this.removeLabelsWithoutDefinition(asm, labelDefinitions);

         const endTime = process.hrtime.bigint();
         return {
             asm: asm,
             labelDefinitions: labelDefinitions,
             parsingTime: ((endTime - startTime) / BigInt(1000000)).toString(),
             filteredCount: startingLineCount - asm.length,
         };
     }

     fixLabelIndentation(line) {
         const match = line.match(this.indentedLabelDef);
         if (match) {
             return line.replace(/^\s+/, '');
         } else {
             return line;
         }
     }

     isUserFunction(func) {
         if (this.binaryHideFuncRe === null) return true;

         return !this.binaryHideFuncRe.test(func);
     }

     processBinaryAsm(asmResult, filters) {
         const startTime = process.hrtime.bigint();
         const asm = [];
         const labelDefinitions = {};
         const dontMaskFilenames = filters.dontMaskFilenames;

         let asmLines = asmResult.split('\n');
         const startingLineCount = asmLines.length;
         let source = null;
         let func = null;
         let mayRemovePreviousLabel = true;

         // Handle "error" documents.
         if (asmLines.length === 1 && asmLines[0][0] === '<') {
             return {
                 asm: [{text: asmLines[0], source: null}],
             };
         }

         if (filters.preProcessBinaryAsmLines !== undefined) {
             asmLines = filters.preProcessBinaryAsmLines(asmLines);
         }

         for (const line of asmLines) {
             const labelsInLine = [];

             if (asm.length >= this.maxAsmLines) {
                 if (asm.length === this.maxAsmLines) {
                     asm.push({
                         text: '[truncated; too many lines]',
                         source: null,
                         labels: labelsInLine,
                     });
                 }
                 continue;
             }
             let match = line.match(this.lineRe);
             if (match) {
                 if (dontMaskFilenames) {
                     source = {
                         file: utils.maskRootdir(match[1]),
                         line: parseInt(match.groups.line),
                         mainsource: true,
                     };
                 } else {
                     source = {file: null, line: parseInt(match.groups.line), mainsource: true};
                 }
                 continue;
             }

             match = line.match(this.labelRe);
             if (match) {
                 func = match[2];
                 if (this.isUserFunction(func)) {
                     asm.push({
                         text: func + ':',
                         source: null,
                         labels: labelsInLine,
                     });
                     labelDefinitions[func] = asm.length;
                 }
                 continue;
             }

             if (func && line === `${func}():`) continue;

             if (!func || !this.isUserFunction(func)) continue;

             // note: normally the source.file will be null if it's code from example.ext
             //  but with filters.dontMaskFilenames it will be filled with the actual filename
             //  instead we can test source.mainsource in that situation
             const isMainsource = source && ((source.file === null) || source.mainsource);
             if (filters.libraryCode && !isMainsource) {
                 if (mayRemovePreviousLabel && asm.length > 0) {
                     const lastLine = asm[asm.length - 1];
                     if (lastLine.text && this.labelDef.test(lastLine.text)) {
                         asm.pop();
                     }
                     mayRemovePreviousLabel = false;
                 }
                 continue;
             } else {
                 mayRemovePreviousLabel = true;
             }

             match = line.match(this.asmOpcodeRe);
             if (match) {
                 const address = parseInt(match.groups.address, 16);
                 const opcodes = match.groups.opcodes.split(' ').filter(x => !!x);
                 const disassembly = ' ' + AsmRegex.filterAsmLine(match.groups.disasm, filters);
                 const destMatch = line.match(this.destRe);
                 if (destMatch) {
                     const labelName = destMatch[2];
                     const startCol = disassembly.indexOf(labelName) + 1;
                     labelsInLine.push({
                         name: labelName,
                         range: {
                             startCol: startCol,
                             endCol: startCol + labelName.length,
                         },
                     });
                 }
                 asm.push({
                     opcodes: opcodes,
                     address: address,
                     text: disassembly,
                     source: source,
                     labels: labelsInLine,
                 });
             }
         }

         this.removeLabelsWithoutDefinition(asm, labelDefinitions);

         const endTime = process.hrtime.bigint();

         return {
             asm: asm,
             labelDefinitions: labelDefinitions,
             parsingTime: ((endTime - startTime) / BigInt(1000000)).toString(),
             filteredCount: startingLineCount - asm.length,
         };
     }

     process(asm, filters) {
         return this.processAsm(asm, filters);
     }
 }
	// Copyright (c) 2015, Compiler Explorer Authors
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// * Redistributions of source code must retain the above copyright notice,
	// this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.

	import _ from 'underscore';

	import { AsmRegex } from './asmregex';
	import * as utils from './utils';

	export class AsmParser extends AsmRegex {
	constructor(compilerProps) {
	super();

	this.labelFindNonMips = /[.A-Z_a-z][\w$.]*/g;
	// MIPS labels can start with a $ sign, but other assemblers use $ to mean literal.
	this.labelFindMips = /[$.A-Z_a-z][\w$.]*/g;
	this.mipsLabelDefinition = /^\$[\w$.]+:/;
	this.dataDefn = /^\s*\.(string\|asciz\|ascii\|[1248]?byte\|short\|x?word\|long\|quad\|value\|zero)/;
	this.fileFind = /^\s\.file\s+(\d+)\s+"([^"]+)"(\s+"([^"]+)")?./;
	// Opcode expression here matches LLVM-style opcodes of the form `%blah = opcode`
	this.hasOpcodeRe = /^\s(%[$.A-Z_a-z][\w$.]\s=\s)?[A-Za-z]/;
	this.instructionRe = /^\s*[A-Za-z]+/;
	this.identifierFindRe = /[$.@A-Z_a-z][\dA-z]*/g;
	this.hasNvccOpcodeRe = /^\s*[@A-Za-z\|]/;
	this.definesFunction = /^\s\.(type.,\s[#%@]function\|proc\s+[.A-Z_a-z][\w$.]:.*)$/;
	this.definesGlobal = /^\s\.(?:globa?l\|GLB\|export)\s([.A-Z_a-z][\w$.]*)/;
	this.definesWeak = /^\s\.(?:weakext\|weak)\s([.A-Z_a-z][\w$.]*)/;
	this.indentedLabelDef = /^\s([$.A-Z_a-z][\w$.]):/;
	this.assignmentDef = /^\s([$.A-Z_a-z][\w$.])\s*=/;
	this.directive = /^\s\..$/;
	this.startAppBlock = /\s#APP./;
	this.endAppBlock = /\s#NO_APP./;
	this.startAsmNesting = /\s# Begin ASM./;
	this.endAsmNesting = /\s# End ASM./;
	this.cudaBeginDef = /\.(entry\|func)\s+(?:\([^)]\)\s)?([$.A-Z_a-z][\w$.]*)\($/;
	this.cudaEndDef = /^\s\)\s$/;

	this.binaryHideFuncRe = null;
	this.maxAsmLines = 5000;
	if (compilerProps) {
	const binaryHideFuncReValue = compilerProps('binaryHideFuncRe');
	if (binaryHideFuncReValue) {
	this.binaryHideFuncRe = new RegExp(binaryHideFuncReValue);
	}

	this.maxAsmLines = compilerProps('maxLinesOfAsm', this.maxAsmLines);
	}

	this.asmOpcodeRe = /^\s(?<address>[\da-f]+):\s(?<opcodes>([\da-f]{2} ?)+)\s(?<disasm>.)/;
	this.lineRe = /^(\/[^:]+):(?<line>\d+).*/;

	// labelRe is made very greedy as it's also used with demangled objdump
	// output (eg. it can have c++ template with <>).
	this.labelRe = /^([\da-f]+)\s+<(.+)>:$/;
	this.destRe = /\s([\da-f]+)\s+<([^+>]+)(\+0x[\da-f]+)?>$/;
	this.commentRe = /[#;]/;
	this.instOpcodeRe = /(\.inst\.?\w?)\s(.)/;
	}

	hasOpcode(line, inNvccCode) {
	// Remove any leading label definition...
	const match = line.match(this.labelDef);
	if (match) {
	line = line.substr(match[0].length);
	}
	// Strip any comments
	line = line.split(this.commentRe, 1)[0];
	// .inst generates an opcode, so also counts
	if (this.instOpcodeRe.test(line)) return true;
	// Detect assignment, that's not an opcode...
	if (this.assignmentDef.test(line)) return false;
	if (inNvccCode) {
	return !!this.hasNvccOpcodeRe.test(line);
	}
	return !!this.hasOpcodeRe.test(line);
	}

	labelFindFor(asmLines) {
	const isMips = _.any(asmLines, line => !!this.mipsLabelDefinition.test(line));
	return isMips ? this.labelFindMips : this.labelFindNonMips;
	}

	findUsedLabels(asmLines, filterDirectives) {
	const labelsUsed = {};
	const weakUsages = {};
	const labelFind = this.labelFindFor(asmLines);
	// The current label set is the set of labels all pointing at the current code, so:
	// foo:
	// bar:
	// add r0, r0, #1
	// in this case [foo, bar] would be the label set for the add instruction.
	let currentLabelSet = [];
	let inLabelGroup = false;
	let inCustomAssembly = 0;
	const startBlock = /\.cfi_startproc/;
	const endBlock = /\.cfi_endproc/;
	let inFunction = false;

	// Scan through looking for definite label usages (ones used by opcodes),
	// and ones that are weakly used: that is, their use is conditional on another label.
	// For example:
	// .foo: .string "moo"
	// .baz: .quad .foo
	// mov eax, .baz
	// In this case, the '.baz' is used by an opcode, and so is strongly used.
	// The '.foo' is weakly used by .baz.
	// Also, if we have random data definitions within a block of a function (between
	// cfi_startproc and cfi_endproc), we assume they are strong usages. This covers things
	// like jump tables embedded in ARM code.
	// See https://github.com/compiler-explorer/compiler-explorer/issues/2788
	for (let line of asmLines) {
	if (this.startAppBlock.test(line) \|\| this.startAsmNesting.test(line)) {
	inCustomAssembly++;
	} else if (this.endAppBlock.test(line) \|\| this.endAsmNesting.test(line)) {
	inCustomAssembly--;
	} else if (startBlock.test(line)) {
	inFunction = true;
	} else if (endBlock.test(line)) {
	inFunction = false;
	}

	if (inCustomAssembly > 0)
	line = this.fixLabelIndentation(line);

	let match = line.match(this.labelDef);
	if (match) {
	if (inLabelGroup)
	currentLabelSet.push(match[1]);
	else
	currentLabelSet = [match[1]];
	inLabelGroup = true;
	} else {
	inLabelGroup = false;
	}
	match = line.match(this.definesGlobal);
	if (!match)
	match = line.match(this.definesWeak);
	if (!match)
	match = line.match(this.cudaBeginDef);
	if (match) {
	labelsUsed[match[1]] = true;
	}

	const definesFunction = line.match(this.definesFunction);
	if (!definesFunction && (!line \|\| line[0] === '.')) continue;

	match = line.match(labelFind);
	if (!match) continue;

	if (!filterDirectives \|\| this.hasOpcode(line, false) \|\| definesFunction) {
	// Only count a label as used if it's used by an opcode, or else we're not filtering directives.
	for (const label of match) labelsUsed[label] = true;
	} else {
	// If we have a current label, then any subsequent opcode or data definition's labels are referred to
	// weakly by that label.
	const isDataDefinition = !!this.dataDefn.test(line);
	const isOpcode = this.hasOpcode(line, false);
	if (isDataDefinition \|\| isOpcode) {
	for (const currentLabel of currentLabelSet) {
	if (inFunction && isDataDefinition) {
	// Data definitions in the middle of code should be treated as if they were used strongly.
	for (const label of match) labelsUsed[label] = true;
	} else {
	if (!weakUsages[currentLabel]) weakUsages[currentLabel] = [];
	for (const label of match) weakUsages[currentLabel].push(label);
	}
	}
	}
	}
	}

	// Now follow the chains of used labels, marking any weak references they refer
	// to as also used. We iteratively do this until either no new labels are found,
	// or we hit a limit (only here to prevent a pathological case from hanging).
	function markUsed(label) {
	labelsUsed[label] = true;
	}

	const MaxLabelIterations = 10;
	for (let iter = 0; iter < MaxLabelIterations; ++iter) {
	let toAdd = [];
	_.each(labelsUsed, (t, label) => { // jshint ignore:line
	_.each(weakUsages[label], nowused => {
	if (labelsUsed[nowused]) return;
	toAdd.push(nowused);
	});
	});
	if (!toAdd) break;
	_.each(toAdd, markUsed);
	}
	return labelsUsed;
	}

	parseFiles(asmLines) {
	const files = {};
	for (const line of asmLines) {
	const match = line.match(this.fileFind);
	if (match) {
	const lineNum = parseInt(match[1]);
	if (match[4]) {
	// Clang-style file directive '.file X "dir" "filename"'
	files[lineNum] = match[2] + '/' + match[4];
	} else {
	files[lineNum] = match[2];
	}
	}
	}
	return files;
	}

	// Remove labels which do not have a definition.
	removeLabelsWithoutDefinition(asm, labelDefinitions) {
	_.each(asm, obj => {
	obj.labels = obj.labels.filter(label => labelDefinitions[label.name]);
	});
	}

	// Get labels which are used in the given line.
	getUsedLabelsInLine(line) {
	const labelsInLine = [];

	// Strip any comments
	const instruction = line.split(this.commentRe, 1)[0];

	// Remove the instruction.
	const params = instruction.replace(this.instructionRe, '');

	const removedCol = instruction.length - params.length + 1;
	params.replace(this.identifierFindRe, (label, index) => {
	const startCol = removedCol + index;
	labelsInLine.push({
	name: label,
	range: {
	startCol: startCol,
	endCol: startCol + label.length,
	},
	});
	});

	return labelsInLine;
	}

	processAsm(asmResult, filters) {
	if (filters.binary) return this.processBinaryAsm(asmResult, filters);

	const startTime = process.hrtime.bigint();

	if (filters.commentOnly) {
	// Remove any block comments that start and end on a line if we're removing comment-only lines.
	const blockComments = /^[\t ]\/\(\(?!\/)\|[^])\\/\s*/gm;
	asmResult = asmResult.replace(blockComments, '');
	}

	const asm = [];
	const labelDefinitions = {};

	let asmLines = utils.splitLines(asmResult);
	const startingLineCount = asmLines.length;
	if (filters.preProcessLines !== undefined) {
	asmLines = filters.preProcessLines(asmLines);
	}

	const labelsUsed = this.findUsedLabels(asmLines, filters.directives);
	const files = this.parseFiles(asmLines);
	let prevLabel = '';

	// Lines matching the following pattern are considered comments:
	// - starts with '#', '@', '//' or a single ';' (non repeated)
	// - starts with ';;' and the first non-whitespace before end of line is not #
	const commentOnly = /^\s(((#\|@\|\/\/).)\|(\/\.\\/)\|(;\s)\|(;[^;].)\|(;;\s[^\s#].*))$/;

	const commentOnlyNvcc = /^\s(((#\|;\|\/\/).)\|(\/\.\*\/))$/;
	const sourceTag = /^\s\.loc\s+(\d+)\s+(\d+)\s+(.)/;
	const sourceD2Tag = /^\s\.d2line\s+(\d+),?\s(\d)./;
	const source6502Dbg = /^\s\.dbg\s+line,\s"([^"]+)",\s*(\d+)/;
	const source6502DbgEnd = /^\s*\.dbg\s+line[^,]/;
	const sourceStab = /^\s\.stabn\s+(\d+),0,(\d+),./;
	const stdInLooking = /<stdin>\|^-$\|example\.[^/]+$\|<source>/;
	const endBlock = /\.(cfi_endproc\|data\|text\|section)/;
	let source = null;
	let mayRemovePreviousLabel = true;
	let keepInlineCode = false;

	let lastOwnSource = null;
	const dontMaskFilenames = filters.dontMaskFilenames;

	function maybeAddBlank() {
	const lastBlank = asm.length === 0 \|\| asm[asm.length - 1].text === '';
	if (!lastBlank)
	asm.push({text: '', source: null, labels: []});
	}

	function handleSource(line) {
	let match = line.match(sourceTag);
	if (match) {
	const file = utils.maskRootdir(files[parseInt(match[1])]);
	const sourceLine = parseInt(match[2]);
	if (file) {
	if (dontMaskFilenames) {
	source = {
	file: file,
	line: sourceLine,
	mainsource: !!stdInLooking.test(file),
	};
	} else {
	source = {
	file: !stdInLooking.test(file) ? file : null,
	line: sourceLine,
	};
	}
	const sourceCol = parseInt(match[3]);
	if (!isNaN(sourceCol) && sourceCol !== 0) {
	source.column = sourceCol;
	}
	} else {
	source = null;
	}
	} else {
	match = line.match(sourceD2Tag);
	if (match) {
	const sourceLine = parseInt(match[1]);
	source = {
	file: null,
	line: sourceLine,
	};
	}
	}
	}

	function handleStabs(line) {
	const match = line.match(sourceStab);
	if (!match) return;
	// cf http://www.math.utah.edu/docs/info/stabs_11.html#SEC48
	switch (parseInt(match[1])) {
	case 68:
	source = {file: null, line: parseInt(match[2])};
	break;
	case 132:
	case 100:
	source = null;
	prevLabel = null;
	break;
	}
	}

	function handle6502(line) {
	const match = line.match(source6502Dbg);
	if (match) {
	const file = utils.maskRootdir(match[1]);
	const sourceLine = parseInt(match[2]);
	if (dontMaskFilenames) {
	source = {
	file: file,
	line: sourceLine,
	mainsource: !!stdInLooking.test(file),
	};
	} else {
	source = {
	file: !stdInLooking.test(file) ? file : null,
	line: sourceLine,
	};
	}
	} else if (source6502DbgEnd.test(line)) {
	source = null;
	}
	}

	let inNvccDef = false;
	let inNvccCode = false;

	let inCustomAssembly = 0;

	// TODO: Make this function smaller
	// eslint-disable-next-line max-statements
	for (let line of asmLines) {
	if (line.trim() === '') {
	maybeAddBlank();
	continue;
	}

	if (this.startAppBlock.test(line) \|\| this.startAsmNesting.test(line)) {
	inCustomAssembly++;
	} else if (this.endAppBlock.test(line) \|\| this.endAsmNesting.test(line)) {
	inCustomAssembly--;
	}

	handleSource(line);
	handleStabs(line);
	handle6502(line);

	if (source && (source.file === null \|\| source.mainsource)) {
	lastOwnSource = source;
	}

	if (endBlock.test(line) \|\| (inNvccCode && /}/.test(line))) {
	source = null;
	prevLabel = null;
	lastOwnSource = null;
	}

	if (filters.libraryCode && !lastOwnSource && source && (source.file !== null) && !source.mainsource) {
	if (mayRemovePreviousLabel && asm.length > 0) {
	const lastLine = asm[asm.length - 1];

	const labelDef = lastLine.text
	? lastLine.text.match(this.labelDef) : null;

	if (labelDef) {
	asm.pop();
	keepInlineCode = false;
	delete labelDefinitions[labelDef[1]];
	} else {
	keepInlineCode = true;
	}
	mayRemovePreviousLabel = false;
	}

	if (!keepInlineCode) {
	continue;
	}
	} else {
	mayRemovePreviousLabel = true;
	}

	if (filters.commentOnly &&
	((commentOnly.test(line) && !inNvccCode) \|\|
	(commentOnlyNvcc.test(line) && inNvccCode))
	) {
	continue;
	}

	if (inCustomAssembly > 0)
	line = this.fixLabelIndentation(line);

	let match = line.match(this.labelDef);
	if (!match) match = line.match(this.assignmentDef);
	if (!match) {
	match = line.match(this.cudaBeginDef);
	if (match) {
	inNvccDef = true;
	inNvccCode = true;
	}
	}
	if (match) {
	// It's a label definition.
	if (labelsUsed[match[1]] === undefined) {
	// It's an unused label.
	if (filters.labels) {
	continue;
	}
	} else {
	// A used label.
	prevLabel = match;
	labelDefinitions[match[1]] = asm.length + 1;
	}
	}
	if (inNvccDef) {
	if (this.cudaEndDef.test(line))
	inNvccDef = false;
	} else if (!match && filters.directives) {
	// Check for directives only if it wasn't a label; the regexp would
	// otherwise misinterpret labels as directives.
	if (this.dataDefn.test(line) && prevLabel) {
	// We're defining data that's being used somewhere.
	} else {
	// .inst generates an opcode, so does not count as a directive
	if (this.directive.test(line) && !this.instOpcodeRe.test(line)) {
	continue;
	}
	}
	}

	line = utils.expandTabs(line);
	const text = AsmRegex.filterAsmLine(line, filters);

	const labelsInLine = match ? [] : this.getUsedLabelsInLine(text);

	asm.push({
	text: text,
	source: this.hasOpcode(line, inNvccCode) ? source : null,
	labels: labelsInLine,
	});
	}

	this.removeLabelsWithoutDefinition(asm, labelDefinitions);

	const endTime = process.hrtime.bigint();
	return {
	asm: asm,
	labelDefinitions: labelDefinitions,
	parsingTime: ((endTime - startTime) / BigInt(1000000)).toString(),
	filteredCount: startingLineCount - asm.length,
	};
	}

	fixLabelIndentation(line) {
	const match = line.match(this.indentedLabelDef);
	if (match) {
	return line.replace(/^\s+/, '');
	} else {
	return line;
	}
	}

	isUserFunction(func) {
	if (this.binaryHideFuncRe === null) return true;

	return !this.binaryHideFuncRe.test(func);
	}

	processBinaryAsm(asmResult, filters) {
	const startTime = process.hrtime.bigint();
	const asm = [];
	const labelDefinitions = {};
	const dontMaskFilenames = filters.dontMaskFilenames;

	let asmLines = asmResult.split('\n');
	const startingLineCount = asmLines.length;
	let source = null;
	let func = null;
	let mayRemovePreviousLabel = true;

	// Handle "error" documents.
	if (asmLines.length === 1 && asmLines[0][0] === '<') {
	return {
	asm: [{text: asmLines[0], source: null}],
	};
	}

	if (filters.preProcessBinaryAsmLines !== undefined) {
	asmLines = filters.preProcessBinaryAsmLines(asmLines);
	}

	for (const line of asmLines) {
	const labelsInLine = [];

	if (asm.length >= this.maxAsmLines) {
	if (asm.length === this.maxAsmLines) {
	asm.push({
	text: '[truncated; too many lines]',
	source: null,
	labels: labelsInLine,
	});
	}
	continue;
	}
	let match = line.match(this.lineRe);
	if (match) {
	if (dontMaskFilenames) {
	source = {
	file: utils.maskRootdir(match[1]),
	line: parseInt(match.groups.line),
	mainsource: true,
	};
	} else {
	source = {file: null, line: parseInt(match.groups.line), mainsource: true};
	}
	continue;
	}

	match = line.match(this.labelRe);
	if (match) {
	func = match[2];
	if (this.isUserFunction(func)) {
	asm.push({
	text: func + ':',
	source: null,
	labels: labelsInLine,
	});
	labelDefinitions[func] = asm.length;
	}
	continue;
	}

	if (func && line === `${func}():`) continue;

	if (!func \|\| !this.isUserFunction(func)) continue;

	// note: normally the source.file will be null if it's code from example.ext
	// but with filters.dontMaskFilenames it will be filled with the actual filename
	// instead we can test source.mainsource in that situation
	const isMainsource = source && ((source.file === null) \|\| source.mainsource);
	if (filters.libraryCode && !isMainsource) {
	if (mayRemovePreviousLabel && asm.length > 0) {
	const lastLine = asm[asm.length - 1];
	if (lastLine.text && this.labelDef.test(lastLine.text)) {
	asm.pop();
	}
	mayRemovePreviousLabel = false;
	}
	continue;
	} else {
	mayRemovePreviousLabel = true;
	}

	match = line.match(this.asmOpcodeRe);
	if (match) {
	const address = parseInt(match.groups.address, 16);
	const opcodes = match.groups.opcodes.split(' ').filter(x => !!x);
	const disassembly = ' ' + AsmRegex.filterAsmLine(match.groups.disasm, filters);
	const destMatch = line.match(this.destRe);
	if (destMatch) {
	const labelName = destMatch[2];
	const startCol = disassembly.indexOf(labelName) + 1;
	labelsInLine.push({
	name: labelName,
	range: {
	startCol: startCol,
	endCol: startCol + labelName.length,
	},
	});
	}
	asm.push({
	opcodes: opcodes,
	address: address,
	text: disassembly,
	source: source,
	labels: labelsInLine,
	});
	}
	}

	this.removeLabelsWithoutDefinition(asm, labelDefinitions);

	const endTime = process.hrtime.bigint();

	return {
	asm: asm,
	labelDefinitions: labelDefinitions,
	parsingTime: ((endTime - startTime) / BigInt(1000000)).toString(),
	filteredCount: startingLineCount - asm.length,
	};
	}

	process(asm, filters) {
	return this.processAsm(asm, filters);
	}
	}