blob: fc8fe209957faa7e846e48a0498029306d98e0ad [file] [log] [blame] [raw]
// Copyright (c) 2022, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import _ from 'underscore';
import {
LLVMOptPipelineBackendOptions,
LLVMOptPipelineResults,
Pass,
} from '../../types/compilation/llvm-opt-pipeline-output.interfaces';
import {ParseFiltersAndOutputOptions} from '../../types/features/filters.interfaces';
import {ResultLine} from '../../types/resultline/resultline.interfaces';
import {assert} from '../assert';
// Note(jeremy-rifkin):
// For now this filters out a bunch of metadata we aren't interested in
// Maybe at a later date we'll want to allow user-controllable filters
// It'd be helpful to better display annotations about branch weights
// and parse debug info too at some point.
// Just a sanity check
function passesMatch(before: string, after: string) {
assert(before.startsWith('IR Dump Before '));
assert(after.startsWith('IR Dump After '));
before = before.slice('IR Dump Before '.length);
after = after.slice('IR Dump After '.length);
// Observed to happen in clang 13+ for LoopDeletionPass
if (after.endsWith(' (invalidated)')) {
after = after.slice(0, after.length - ' (invalidated)'.length);
}
return before === after;
}
// Ir Dump for a pass with raw lines
type PassDump = {
header: string;
affectedFunction: string | undefined;
machine: boolean;
lines: ResultLine[];
};
// Ir Dump for a pass with raw lines broken into affected functions (or "<loop>")
type SplitPassDump = {
header: string;
machine: boolean;
functions: Record<string, ResultLine[]>;
};
export class LlvmPassDumpParser {
filters: RegExp[];
lineFilters: RegExp[];
debugInfoFilters: RegExp[];
debugInfoLineFilters: RegExp[];
metadataLineFilters: RegExp[];
irDumpHeader: RegExp;
machineCodeDumpHeader: RegExp;
functionDefine: RegExp;
machineFunctionBegin: RegExp;
functionEnd: RegExp;
//label: RegExp;
//instruction: RegExp;
constructor(compilerProps) {
//this.maxIrLines = 5000;
//if (compilerProps) {
// this.maxIrLines = compilerProps('maxLinesOfAsm', this.maxIrLines);
//}
this.filters = [
/^; ModuleID = '.+'$/, // module id line
/^(source_filename|target datalayout|target triple) = ".+"$/, // module metadata
/^; Function Attrs: .+$/, // function attributes
/^declare .+$/, // declare directives
/^attributes #\d+ = { .+ }$/, // attributes directive
];
this.lineFilters = [
/,? #\d+((?=( {)?$))/, // attribute annotation
];
// Additional filters conditionally enabled by `filterDebugInfo`
this.debugInfoFilters = [
/^\s+call void @llvm.dbg.+$/, // dbg calls
/^\s+DBG_.+$/, // dbg pseudo-instructions
/^(!\d+) = (?:distinct )?!DI([A-Za-z]+)\(([^)]+?)\)/, // meta
/^(!\d+) = (?:distinct )?!{.*}/, // meta
/^(![.A-Z_a-z-]+) = (?:distinct )?!{.*}/, // meta
];
this.debugInfoLineFilters = [
/,? !dbg !\d+/, // instruction/function debug metadata
/,? debug-location !\d+/, // Direct source locations like 'example.cpp:8:1' not filtered
];
// Conditionally enabled by `filterIRMetadata`
this.metadataLineFilters = [
/,?(?: ![\d.A-Za-z]+){2}/, // any instruction metadata
];
// Ir dump headers look like "*** IR Dump After XYZ ***"
// Machine dump headers look like "# *** IR Dump After XYZ ***:", possibly with a comment or "(function: xyz)"
// or "(loop: %x)" at the end
this.irDumpHeader = /^\*{3} (.+) \*{3}(?:\s+\((?:function: |loop: )(%?[\w$.]+)\))?(?:;.+)?$/;
this.machineCodeDumpHeader = /^# \*{3} (.+) \*{3}:$/;
// Ir dumps are "define T @_Z3fooi(...) . .. {" or "# Machine code for function _Z3fooi: <attributes>"
// Some interesting edge cases found when testing:
// `define internal %"struct.libassert::detail::assert_static_parameters"* @"_ZZ4mainENK3$_0clEv"(
// %class.anon* nonnull dereferenceable(1) %0) #5 align 2 !dbg !2 { ... }`
// `define internal void @__cxx_global_var_init.1() #0 section ".text.startup" {`
this.functionDefine = /^define .+ @([\w.]+|"[^"]+")\(.+$/;
this.machineFunctionBegin = /^# Machine code for function ([\w$.]+):.*$/;
// Functions end with either a closing brace or "# End machine code for function _Z3fooi."
this.functionEnd = /^(?:}|# End machine code for function ([\w$.]+).)$/;
// Either "123:" with a possible comment or something like "bb.3 (%ir-block.13):"
//this.label = /^(?:\d+:(\s+;.+)?|\w+.+:)$/;
//this.instruction = /^\s+.+$/;
}
breakdownOutputIntoPassDumps(ir: ResultLine[]) {
// break down output by "*** IR Dump After XYZ ***" markers
const raw_passes: PassDump[] = [];
let pass: PassDump | null = null;
let lastWasBlank = false; // skip duplicate blank lines
for (const line of ir) {
// stop once the machine code passes start, can't handle these yet
//if (this.machineCodeDumpHeader.test(line.text)) {
// break;
//}
const irMatch = line.text.match(this.irDumpHeader);
const machineMatch = line.text.match(this.machineCodeDumpHeader);
const header = irMatch || machineMatch;
if (header) {
if (pass !== null) {
raw_passes.push(pass);
}
pass = {
header: header[1],
// in dump full module mode some headers are annotated for what function (or loop) they operate on
// if we aren't in full module mode or this is a header that isn't function/loop specific this will
// be undefined
affectedFunction: header[2],
machine: !!machineMatch,
lines: [],
};
lastWasBlank = true; // skip leading newlines after the header
} else {
assert(pass);
if (line.text.trim() === '') {
if (!lastWasBlank) {
pass.lines.push(line);
}
lastWasBlank = true;
} else {
pass.lines.push(line);
lastWasBlank = false;
}
}
}
if (pass !== null) {
raw_passes.push(pass);
}
return raw_passes;
}
breakdownPassDumpsIntoFunctions(dump: PassDump) {
// break up down dumps for each pass into functions (or absence of functions in the case of loop passes)
// we have three cases of ir dumps to consider:
// - Most passes dump a single function
// - Some passes dump every function
// - Some loop passes only dump loops - these are challenging to deal with
const pass: SplitPassDump = {
header: dump.header,
machine: dump.machine,
functions: {},
};
let func: {
name: string;
lines: ResultLine[];
} | null = null;
for (const line of dump.lines) {
const irFnMatch = line.text.match(this.functionDefine);
const machineFnMatch = line.text.match(this.machineFunctionBegin);
// function define line
if (irFnMatch || machineFnMatch) {
// if the last function has not been closed...
assert(func === null);
func = {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
name: (irFnMatch || machineFnMatch)![1],
lines: [line], // include the current line
};
} else if (line.text.startsWith('; Preheader:')) {
// loop dump
// every line in this dump should be part of the loop, exit condition will be end of the for loop
assert(func === null);
func = {
name: '<loop>',
lines: [line], // include the current line
};
} else {
// close function
if (this.functionEnd.test(line.text.trim())) {
// if not currently in a function
assert(func);
const {name, lines} = func;
lines.push(line); // include the }
// loop dumps can't be terminated with }
assert(name !== '<loop>');
// somehow dumped twice?
assert(!(name in pass.functions));
pass.functions[name] = lines;
func = null;
} else {
// lines outside a function definition
if (func === null) {
if (line.text.trim() === '') {
// may be a blank line
continue;
} else {
///console.log('ignoring ------>', line.text);
// ignore
continue;
}
}
func.lines.push(line);
}
}
}
// unterminated function, either a loop dump or an error
if (func !== null) {
assert(func.name === '<loop>');
// loop dumps must be alone
assert(Object.entries(pass.functions).length === 0);
pass.functions[func.name] = func.lines;
}
return pass;
}
breakdownIntoPassDumpsByFunction(passDumps: SplitPassDump[]) {
// Currently we have an array of passes with a map of functions altered in each pass
// We want to transpose to a map of functions with an array of passes on the function
const passDumpsByFunction: Record<string, PassDump[]> = {};
// I'm assuming loop dumps should correspond to the previous function dumped
let previousFunction: string | null = null;
for (const pass of passDumps) {
const {header, machine, functions} = pass;
const functionEntries = Object.entries(functions);
for (const [function_name, lines] of functionEntries) {
const name: string | null = function_name === '<loop>' ? previousFunction : function_name;
assert(name !== null, 'Loop dump without preceding dump');
if (!(name in passDumpsByFunction)) {
passDumpsByFunction[name] = [];
}
passDumpsByFunction[name].push({
header,
affectedFunction: undefined,
machine,
lines,
});
}
if (functionEntries.length === 0) {
// This can happen as a result of "Printing <null> Function"
//throw 'Internal error during breakdownOutput (2)';
} else if (functionEntries.length === 1) {
const name = functionEntries[0][0];
if (name !== '<loop>') {
previousFunction = name;
}
} else {
previousFunction = null;
}
}
return passDumpsByFunction;
}
// used for full module dump mode
associateFullDumpsWithFunctions(passDumps: PassDump[]) {
// Currently we have an array of passes that'll have target annotations
const passDumpsByFunction: Record<string, PassDump[]> = {};
// First figure out what all the functions are
for (const pass of passDumps) {
// If it starts with a % it's a loop
if (pass.affectedFunction && !pass.affectedFunction.startsWith('%')) {
passDumpsByFunction[pass.affectedFunction] = [];
}
}
passDumpsByFunction['<Full Module>'] = [];
// I'm assuming loop dumps should correspond to the previous function dumped
//const functions = Object.keys(passDumpsByFunction);
let previousFunction: string | null = null;
for (const pass of passDumps) {
const {header, affectedFunction, machine, lines} = pass;
if (affectedFunction) {
let fn = affectedFunction;
if (affectedFunction.startsWith('%')) {
assert(previousFunction !== null);
fn = previousFunction;
}
assert(fn in passDumpsByFunction);
[passDumpsByFunction[fn], passDumpsByFunction['<Full Module>']].map(entry =>
entry.push({
header: `${header} (${fn})`,
affectedFunction: fn,
machine,
lines,
}),
);
previousFunction = fn;
} else {
// applies to everything
for (const [_, entry] of Object.entries(passDumpsByFunction)) {
entry.push({
header,
affectedFunction: undefined,
machine,
lines,
});
}
previousFunction = null;
}
}
return passDumpsByFunction;
}
matchPassDumps(passDumpsByFunction: Record<string, PassDump[]>) {
// We have all the passes for each function, now we will go through each function and match the before/after
// dumps
const final_output: LLVMOptPipelineResults = {};
for (const [function_name, passDumps] of Object.entries(passDumpsByFunction)) {
// I had a fantastic chunk2 method to iterate the passes in chunks of 2 but I've been foiled by an edge
// case: At least the "Delete dead loops" may only print a before dump and no after dump
const passes: Pass[] = [];
// i incremented appropriately later
for (let i = 0; i < passDumps.length; ) {
const pass: Pass = {
name: '',
machine: false,
after: [],
before: [],
irChanged: true,
};
const current_dump = passDumps[i];
const next_dump = i < passDumps.length - 1 ? passDumps[i + 1] : null;
if (current_dump.header.startsWith('IR Dump After ')) {
// An after dump without a before dump - I don't think this can happen but we'll handle just in case
pass.name = current_dump.header.slice('IR Dump After '.length);
pass.after = current_dump.lines;
i++;
} else if (current_dump.header.startsWith('IR Dump Before ')) {
if (next_dump !== null && next_dump.header.startsWith('IR Dump After ')) {
assert(
passesMatch(current_dump.header, next_dump.header),
'',
current_dump.header,
next_dump.header,
);
assert(current_dump.machine === next_dump.machine);
pass.name = current_dump.header.slice('IR Dump Before '.length);
pass.before = current_dump.lines;
pass.after = next_dump.lines;
i += 2;
} else {
// Before with no after - this can happen with Delete dead loops
pass.name = current_dump.header.slice('IR Dump Before '.length);
pass.before = current_dump.lines;
i++;
}
} else {
assert(false, 'Unexpected pass header', current_dump.header);
}
pass.machine = current_dump.machine;
// The first machine pass outputs the same MIR before and after,
// making it seem like it did nothing.
// Assuming we ran some IR pass before this, grab its output as
// the before text, ensuring the first MIR pass appears when
// inconsequential passes are filtered away.
const previousPass = passes.at(-1);
if (previousPass && previousPass.machine !== pass.machine) {
pass.before = previousPass.after;
}
// check for equality
pass.irChanged = pass.before.map(x => x.text).join('\n') !== pass.after.map(x => x.text).join('\n');
passes.push(pass);
}
//console.dir(passes, {
// depth: 5,
// maxArrayLength: 100000
//});
assert(!(function_name in final_output), 'xxx');
final_output[function_name] = passes;
}
return final_output;
}
breakdownOutput(ir: ResultLine[], llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) {
// break down output by "*** IR Dump After XYZ ***" markers
const raw_passes = this.breakdownOutputIntoPassDumps(ir);
if (llvmOptPipelineOptions.fullModule) {
const passDumpsByFunction = this.associateFullDumpsWithFunctions(raw_passes);
// Match before / after pass dumps and we're done
return this.matchPassDumps(passDumpsByFunction);
} else {
// Further break down by functions in each dump
const passDumps = raw_passes.map(this.breakdownPassDumpsIntoFunctions.bind(this));
// Transform array of passes containing multiple functions into a map from functions to arrays of passes on
// those functions
const passDumpsByFunction = this.breakdownIntoPassDumpsByFunction(passDumps);
// Match before / after pass dumps and we're done
return this.matchPassDumps(passDumpsByFunction);
}
}
applyIrFilters(ir: ResultLine[], llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) {
// Additional filters conditionally enabled by `filterDebugInfo`/`filterIRMetadata`
let filters = this.filters;
let lineFilters = this.lineFilters;
if (llvmOptPipelineOptions.filterDebugInfo) {
filters = filters.concat(this.debugInfoFilters);
lineFilters = lineFilters.concat(this.debugInfoLineFilters);
}
if (llvmOptPipelineOptions.filterIRMetadata) {
lineFilters = lineFilters.concat(this.metadataLineFilters);
}
return (
ir
// whole-line filters
.filter(line => filters.every(re => line.text.match(re) === null))
// intra-line filters
.map(_line => {
let line = _line.text;
// eslint-disable-next-line no-constant-condition
while (true) {
let newLine = line;
for (const re of lineFilters) {
newLine = newLine.replace(re, '');
}
if (newLine === line) {
break;
} else {
line = newLine;
}
}
_line.text = line;
return _line;
})
);
}
process(
output: ResultLine[],
_: ParseFiltersAndOutputOptions,
llvmOptPipelineOptions: LLVMOptPipelineBackendOptions,
) {
// Crop out any junk before the pass dumps (e.g. warnings)
const ir = output.slice(
output.findIndex(line => line.text.match(this.irDumpHeader) || line.text.match(this.machineCodeDumpHeader)),
);
const preprocessed_lines = this.applyIrFilters(ir, llvmOptPipelineOptions);
return this.breakdownOutput(preprocessed_lines, llvmOptPipelineOptions);
}
}