| // Copyright (c) 2022, Compiler Explorer Authors |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| |
| import _ from 'underscore'; |
| |
| import { |
| LLVMOptPipelineBackendOptions, |
| LLVMOptPipelineResults, |
| Pass, |
| } from '../../types/compilation/llvm-opt-pipeline-output.interfaces.js'; |
| import {ParseFiltersAndOutputOptions} from '../../types/features/filters.interfaces.js'; |
| import {ResultLine} from '../../types/resultline/resultline.interfaces.js'; |
| import {assert} from '../assert.js'; |
| |
| // Note(jeremy-rifkin): |
| // For now this filters out a bunch of metadata we aren't interested in |
| // Maybe at a later date we'll want to allow user-controllable filters |
| // It'd be helpful to better display annotations about branch weights |
| // and parse debug info too at some point. |
| |
| // Just a sanity check |
| function passesMatch(before: string, after: string) { |
| assert(before.startsWith('IR Dump Before ')); |
| assert(after.startsWith('IR Dump After ')); |
| before = before.slice('IR Dump Before '.length); |
| after = after.slice('IR Dump After '.length); |
| // Observed to happen in clang 13+ for LoopDeletionPass |
| // Also for SimpleLoopUnswitchPass |
| if (after.endsWith(' (invalidated)')) { |
| after = after.slice(0, after.length - ' (invalidated)'.length); |
| } |
| return before === after; |
| } |
| |
| // Ir Dump for a pass with raw lines |
| type PassDump = { |
| header: string; |
| affectedFunction: string | undefined; |
| machine: boolean; |
| lines: ResultLine[]; |
| }; |
| // Ir Dump for a pass with raw lines broken into affected functions (or "<loop>") |
| type SplitPassDump = { |
| header: string; |
| machine: boolean; |
| functions: Record<string, ResultLine[]>; |
| }; |
| |
| export class LlvmPassDumpParser { |
| filters: RegExp[]; |
| lineFilters: RegExp[]; |
| debugInfoFilters: RegExp[]; |
| debugInfoLineFilters: RegExp[]; |
| metadataLineFilters: RegExp[]; |
| irDumpHeader: RegExp; |
| machineCodeDumpHeader: RegExp; |
| functionDefine: RegExp; |
| machineFunctionBegin: RegExp; |
| functionEnd: RegExp; |
| //label: RegExp; |
| //instruction: RegExp; |
| |
| constructor(compilerProps) { |
| //this.maxIrLines = 5000; |
| //if (compilerProps) { |
| // this.maxIrLines = compilerProps('maxLinesOfAsm', this.maxIrLines); |
| //} |
| |
| this.filters = [ |
| /^; ModuleID = '.+'$/, // module id line |
| /^(source_filename|target datalayout|target triple) = ".+"$/, // module metadata |
| /^; Function Attrs: .+$/, // function attributes |
| /^declare .+$/, // declare directives |
| /^attributes #\d+ = { .+ }$/, // attributes directive |
| ]; |
| this.lineFilters = [ |
| /,? #\d+((?=( {)?$))/, // attribute annotation |
| ]; |
| |
| // Additional filters conditionally enabled by `filterDebugInfo` |
| this.debugInfoFilters = [ |
| /^\s+call void @llvm.dbg.+$/, // dbg calls |
| /^\s+DBG_.+$/, // dbg pseudo-instructions |
| /^(!\d+) = (?:distinct )?!DI([A-Za-z]+)\(([^)]+?)\)/, // meta |
| /^(!\d+) = (?:distinct )?!{.*}/, // meta |
| /^(![.A-Z_a-z-]+) = (?:distinct )?!{.*}/, // meta |
| ]; |
| this.debugInfoLineFilters = [ |
| /,? !dbg !\d+/, // instruction/function debug metadata |
| /,? debug-location !\d+/, // Direct source locations like 'example.cpp:8:1' not filtered |
| ]; |
| |
| // Conditionally enabled by `filterIRMetadata` |
| this.metadataLineFilters = [ |
| /,?(?: ![\d.A-Za-z]+){2}/, // any instruction metadata |
| ]; |
| |
| // Ir dump headers look like "*** IR Dump After XYZ ***" |
| // Machine dump headers look like "# *** IR Dump After XYZ ***:", possibly with a comment or "(function: xyz)" |
| // or "(loop: %x)" at the end |
| this.irDumpHeader = /^\*{3} (.+) \*{3}(?:\s+\((?:function: |loop: )(%?[\w$.]+)\))?(?:;.+)?$/; |
| this.machineCodeDumpHeader = /^# \*{3} (.+) \*{3}:$/; |
| // Ir dumps are "define T @_Z3fooi(...) . .. {" or "# Machine code for function _Z3fooi: <attributes>" |
| // Some interesting edge cases found when testing: |
| // `define internal %"struct.libassert::detail::assert_static_parameters"* @"_ZZ4mainENK3$_0clEv"( |
| // %class.anon* nonnull dereferenceable(1) %0) #5 align 2 !dbg !2 { ... }` |
| // `define internal void @__cxx_global_var_init.1() #0 section ".text.startup" {` |
| this.functionDefine = /^define .+ @([\w.]+|"[^"]+")\(.+$/; |
| this.machineFunctionBegin = /^# Machine code for function ([\w$.]+):.*$/; |
| // Functions end with either a closing brace or "# End machine code for function _Z3fooi." |
| this.functionEnd = /^(?:}|# End machine code for function ([\w$.]+).)$/; |
| // Either "123:" with a possible comment or something like "bb.3 (%ir-block.13):" |
| //this.label = /^(?:\d+:(\s+;.+)?|\w+.+:)$/; |
| //this.instruction = /^\s+.+$/; |
| } |
| |
| breakdownOutputIntoPassDumps(ir: ResultLine[]) { |
| // break down output by "*** IR Dump After XYZ ***" markers |
| const raw_passes: PassDump[] = []; |
| let pass: PassDump | null = null; |
| let lastWasBlank = false; // skip duplicate blank lines |
| for (const line of ir) { |
| // stop once the machine code passes start, can't handle these yet |
| //if (this.machineCodeDumpHeader.test(line.text)) { |
| // break; |
| //} |
| const irMatch = line.text.match(this.irDumpHeader); |
| const machineMatch = line.text.match(this.machineCodeDumpHeader); |
| const header = irMatch || machineMatch; |
| if (header) { |
| if (pass !== null) { |
| raw_passes.push(pass); |
| } |
| pass = { |
| header: header[1], |
| // in dump full module mode some headers are annotated for what function (or loop) they operate on |
| // if we aren't in full module mode or this is a header that isn't function/loop specific this will |
| // be undefined |
| affectedFunction: header[2], |
| machine: !!machineMatch, |
| lines: [], |
| }; |
| lastWasBlank = true; // skip leading newlines after the header |
| } else { |
| assert(pass); |
| if (line.text.trim() === '') { |
| if (!lastWasBlank) { |
| pass.lines.push(line); |
| } |
| lastWasBlank = true; |
| } else { |
| pass.lines.push(line); |
| lastWasBlank = false; |
| } |
| } |
| } |
| if (pass !== null) { |
| raw_passes.push(pass); |
| } |
| return raw_passes; |
| } |
| |
| breakdownPassDumpsIntoFunctions(dump: PassDump) { |
| // break up down dumps for each pass into functions (or absence of functions in the case of loop passes) |
| // we have three cases of ir dumps to consider: |
| // - Most passes dump a single function |
| // - Some passes dump every function |
| // - Some loop passes only dump loops - these are challenging to deal with |
| const pass: SplitPassDump = { |
| header: dump.header, |
| machine: dump.machine, |
| functions: {}, |
| }; |
| let func: { |
| name: string; |
| lines: ResultLine[]; |
| } | null = null; |
| for (const line of dump.lines) { |
| const irFnMatch = line.text.match(this.functionDefine); |
| const machineFnMatch = line.text.match(this.machineFunctionBegin); |
| // function define line |
| if (irFnMatch || machineFnMatch) { |
| // if the last function has not been closed... |
| assert(func === null); |
| func = { |
| // eslint-disable-next-line @typescript-eslint/no-non-null-assertion |
| name: (irFnMatch || machineFnMatch)![1], |
| lines: [line], // include the current line |
| }; |
| } else if (line.text.startsWith('; Preheader:')) { |
| // loop dump |
| // every line in this dump should be part of the loop, exit condition will be end of the for loop |
| assert(func === null); |
| func = { |
| name: '<loop>', |
| lines: [line], // include the current line |
| }; |
| } else { |
| // close function |
| if (this.functionEnd.test(line.text.trim())) { |
| // if not currently in a function |
| assert(func); |
| const {name, lines} = func; |
| lines.push(line); // include the } |
| // loop dumps can't be terminated with } |
| assert(name !== '<loop>'); |
| // somehow dumped twice? |
| assert(!(name in pass.functions)); |
| pass.functions[name] = lines; |
| func = null; |
| } else { |
| // lines outside a function definition |
| if (func === null) { |
| if (line.text.trim() === '') { |
| // may be a blank line |
| continue; |
| } else { |
| ///console.log('ignoring ------>', line.text); |
| // ignore |
| continue; |
| } |
| } |
| func.lines.push(line); |
| } |
| } |
| } |
| // unterminated function, either a loop dump or an error |
| if (func !== null) { |
| assert(func.name === '<loop>'); |
| // loop dumps must be alone |
| assert(Object.entries(pass.functions).length === 0); |
| pass.functions[func.name] = func.lines; |
| } |
| return pass; |
| } |
| |
| breakdownIntoPassDumpsByFunction(passDumps: SplitPassDump[]) { |
| // Currently we have an array of passes with a map of functions altered in each pass |
| // We want to transpose to a map of functions with an array of passes on the function |
| const passDumpsByFunction: Record<string, PassDump[]> = {}; |
| // I'm assuming loop dumps should correspond to the previous function dumped |
| let previousFunction: string | null = null; |
| for (const pass of passDumps) { |
| const {header, machine, functions} = pass; |
| const functionEntries = Object.entries(functions); |
| for (const [function_name, lines] of functionEntries) { |
| const name: string | null = function_name === '<loop>' ? previousFunction : function_name; |
| assert(name !== null, 'Loop dump without preceding dump'); |
| if (!(name in passDumpsByFunction)) { |
| passDumpsByFunction[name] = []; |
| } |
| passDumpsByFunction[name].push({ |
| header, |
| affectedFunction: undefined, |
| machine, |
| lines, |
| }); |
| } |
| if (functionEntries.length === 0) { |
| // This can happen as a result of "Printing <null> Function" |
| //throw 'Internal error during breakdownOutput (2)'; |
| } else if (functionEntries.length === 1) { |
| const name = functionEntries[0][0]; |
| if (name !== '<loop>') { |
| previousFunction = name; |
| } |
| } else if (!header.endsWith('(invalidated)')) { |
| // Issue #4195, before SimpleLoopUnswitchPass dumps just the loop but after can dump the full IR if the |
| // loop is invalidated. The next pass can also be loop-only and should be a loop in the same function |
| // so we preserve function name. |
| previousFunction = null; |
| } |
| } |
| return passDumpsByFunction; |
| } |
| |
| // used for full module dump mode |
| associateFullDumpsWithFunctions(passDumps: PassDump[]) { |
| // Currently we have an array of passes that'll have target annotations |
| const passDumpsByFunction: Record<string, PassDump[]> = {}; |
| // First figure out what all the functions are |
| for (const pass of passDumps) { |
| // If it starts with a % it's a loop |
| if (pass.affectedFunction && !pass.affectedFunction.startsWith('%')) { |
| passDumpsByFunction[pass.affectedFunction] = []; |
| } |
| } |
| passDumpsByFunction['<Full Module>'] = []; |
| // I'm assuming loop dumps should correspond to the previous function dumped |
| //const functions = Object.keys(passDumpsByFunction); |
| let previousFunction: string | null = null; |
| for (const pass of passDumps) { |
| const {header, affectedFunction, machine, lines} = pass; |
| if (affectedFunction) { |
| let fn = affectedFunction; |
| if (affectedFunction.startsWith('%')) { |
| assert(previousFunction !== null); |
| fn = previousFunction; |
| } |
| assert(fn in passDumpsByFunction); |
| [passDumpsByFunction[fn], passDumpsByFunction['<Full Module>']].map(entry => |
| entry.push({ |
| header: `${header} (${fn})`, |
| affectedFunction: fn, |
| machine, |
| lines, |
| }), |
| ); |
| previousFunction = fn; |
| } else { |
| // applies to everything |
| for (const [_, entry] of Object.entries(passDumpsByFunction)) { |
| entry.push({ |
| header, |
| affectedFunction: undefined, |
| machine, |
| lines, |
| }); |
| } |
| previousFunction = null; |
| } |
| } |
| return passDumpsByFunction; |
| } |
| |
| matchPassDumps(passDumpsByFunction: Record<string, PassDump[]>) { |
| // We have all the passes for each function, now we will go through each function and match the before/after |
| // dumps |
| const final_output: LLVMOptPipelineResults = {}; |
| for (const [function_name, passDumps] of Object.entries(passDumpsByFunction)) { |
| // I had a fantastic chunk2 method to iterate the passes in chunks of 2 but I've been foiled by an edge |
| // case: At least the "Delete dead loops" may only print a before dump and no after dump |
| const passes: Pass[] = []; |
| // i incremented appropriately later |
| for (let i = 0; i < passDumps.length; ) { |
| const pass: Pass = { |
| name: '', |
| machine: false, |
| after: [], |
| before: [], |
| irChanged: true, |
| }; |
| const current_dump = passDumps[i]; |
| const next_dump = i < passDumps.length - 1 ? passDumps[i + 1] : null; |
| if (current_dump.header.startsWith('IR Dump After ')) { |
| // An after dump without a before dump - I don't think this can happen but we'll handle just in case |
| pass.name = current_dump.header.slice('IR Dump After '.length); |
| pass.after = current_dump.lines; |
| i++; |
| } else if (current_dump.header.startsWith('IR Dump Before ')) { |
| if (next_dump !== null && next_dump.header.startsWith('IR Dump After ')) { |
| assert( |
| passesMatch(current_dump.header, next_dump.header), |
| '', |
| current_dump.header, |
| next_dump.header, |
| ); |
| assert(current_dump.machine === next_dump.machine); |
| pass.name = current_dump.header.slice('IR Dump Before '.length); |
| pass.before = current_dump.lines; |
| pass.after = next_dump.lines; |
| i += 2; |
| } else { |
| // Before with no after - this can happen with Delete dead loops |
| pass.name = current_dump.header.slice('IR Dump Before '.length); |
| pass.before = current_dump.lines; |
| i++; |
| } |
| } else { |
| assert(false, 'Unexpected pass header', current_dump.header); |
| } |
| pass.machine = current_dump.machine; |
| |
| // The first machine pass outputs the same MIR before and after, |
| // making it seem like it did nothing. |
| // Assuming we ran some IR pass before this, grab its output as |
| // the before text, ensuring the first MIR pass appears when |
| // inconsequential passes are filtered away. |
| const previousPass = passes.at(-1); |
| if (previousPass && previousPass.machine !== pass.machine) { |
| pass.before = previousPass.after; |
| } |
| |
| // check for equality |
| pass.irChanged = pass.before.map(x => x.text).join('\n') !== pass.after.map(x => x.text).join('\n'); |
| passes.push(pass); |
| } |
| //console.dir(passes, { |
| // depth: 5, |
| // maxArrayLength: 100000 |
| //}); |
| assert(!(function_name in final_output), 'xxx'); |
| final_output[function_name] = passes; |
| } |
| return final_output; |
| } |
| |
| breakdownOutput(ir: ResultLine[], llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) { |
| // break down output by "*** IR Dump After XYZ ***" markers |
| const raw_passes = this.breakdownOutputIntoPassDumps(ir); |
| if (llvmOptPipelineOptions.fullModule) { |
| const passDumpsByFunction = this.associateFullDumpsWithFunctions(raw_passes); |
| // Match before / after pass dumps and we're done |
| return this.matchPassDumps(passDumpsByFunction); |
| } else { |
| // Further break down by functions in each dump |
| const passDumps = raw_passes.map(this.breakdownPassDumpsIntoFunctions.bind(this)); |
| // Transform array of passes containing multiple functions into a map from functions to arrays of passes on |
| // those functions |
| const passDumpsByFunction = this.breakdownIntoPassDumpsByFunction(passDumps); |
| // Match before / after pass dumps and we're done |
| return this.matchPassDumps(passDumpsByFunction); |
| } |
| } |
| |
| applyIrFilters(ir: ResultLine[], llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) { |
| // Additional filters conditionally enabled by `filterDebugInfo`/`filterIRMetadata` |
| let filters = this.filters; |
| let lineFilters = this.lineFilters; |
| if (llvmOptPipelineOptions.filterDebugInfo) { |
| filters = filters.concat(this.debugInfoFilters); |
| lineFilters = lineFilters.concat(this.debugInfoLineFilters); |
| } |
| if (llvmOptPipelineOptions.filterIRMetadata) { |
| lineFilters = lineFilters.concat(this.metadataLineFilters); |
| } |
| |
| return ( |
| ir |
| // whole-line filters |
| .filter(line => filters.every(re => line.text.match(re) === null)) |
| // intra-line filters |
| .map(_line => { |
| let line = _line.text; |
| // eslint-disable-next-line no-constant-condition |
| while (true) { |
| let newLine = line; |
| for (const re of lineFilters) { |
| newLine = newLine.replace(re, ''); |
| } |
| if (newLine === line) { |
| break; |
| } else { |
| line = newLine; |
| } |
| } |
| _line.text = line; |
| return _line; |
| }) |
| ); |
| } |
| |
| process( |
| output: ResultLine[], |
| _: ParseFiltersAndOutputOptions, |
| llvmOptPipelineOptions: LLVMOptPipelineBackendOptions, |
| ) { |
| // Crop out any junk before the pass dumps (e.g. warnings) |
| const ir = output.slice( |
| output.findIndex(line => line.text.match(this.irDumpHeader) || line.text.match(this.machineCodeDumpHeader)), |
| ); |
| const preprocessed_lines = this.applyIrFilters(ir, llvmOptPipelineOptions); |
| return this.breakdownOutput(preprocessed_lines, llvmOptPipelineOptions); |
| } |
| } |