blob: a8cdabe93968c034093f6423dd7faa611b3ea789 [file] [log] [blame] [raw]
// Copyright (c) 2021, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
export class LlvmAstParser {
constructor(compilerProps) {
this.maxAstLines = 500;
if (compilerProps) {
this.maxAstLines = compilerProps('maxLinesOfAst', this.maxAstLines);
}
// Almost every line of AST includes a span of related source lines:
// In different forms like <line:a:b, line:c:d>
this.spanTypes = {
NONE: 'none', // No span specified
EMPTY: 'empty', // span mentions no lines
LEFT: 'left', // span mentions only the starting line
RIGHT: 'right', // span mentions only the finishing line
FULL: 'full', // span mentions both lines
};
}
// Extract one, two, or no line numbers:
// <line:a:b, line:c:d> -> [a, b] - FULL
// <col:b, line:c:d> -> [-, b] - RIGHT
// <line:a:b, col:c> -> [a, -] - LEFT
// <col:a, col:b> -> EMPTY
// <no source span> -> NONE
parseSpan(line) {
const spanRegex = /<((?:line|col)[\d ,:ceilno]+)>/;
const twoLineRegex = /line:(\d+)[,:].*line:(\d+)[:>]/;
const leftLineRegex = /^line:(\d+)/;
const rightLineRegex = / line:(\d+):/;
const m = line.match(spanRegex);
if (m) {
const span = m[1];
const ll = span.match(twoLineRegex);
if (ll) {
return { type : this.spanTypes.FULL, left : Number(ll[1]), right: Number(ll[2])};
}
const left = span.match(leftLineRegex);
if (left) {
return { type : this.spanTypes.LEFT, left : Number(left[1])};
}
const right = span.match(rightLineRegex);
if (right) {
return { type : this.spanTypes.RIGHT, right : Number(right[1])};
}
return { type : this.spanTypes.EMPTY };
}
return { type : this.spanTypes.NONE };
}
// Link the AST lines with spans of source lines
parseAndSetSourceLines(astDump) {
var lfrom, lto;
for (var line of astDump) {
const span = this.parseSpan(line.text);
switch(span.type) {
case this.spanTypes.NONE:
case this.spanTypes.EMPTY:
break;
case this.spanTypes.LEFT:
lfrom = span.left;
lto = lfrom;
break;
case this.spanTypes.RIGHT:
lfrom = lto;
lto = span.right;
break;
case this.spanTypes.FULL:
lfrom = span.left;
lto = span.right;
break;
}
if (span.type !== this.spanTypes.NONE) {
line.source = { from : lfrom, to : lto };
}
}
}
processAst(output) {
output = output.stdout;
// Top level decls start with |- or `-
const topLevelRegex = /^([`|])-/;
// Refers to the user's source file rather than a system header
const sourceRegex = /<source>/g;
// Refers to whatever the most recent file specified was
const lineRegex = /<line:/;
let mostRecentIsSource = false;
// Remove all AST nodes which aren't directly from the user's source code
for (let i = 0; i < output.length; ++i) {
if (output[i].text.match(topLevelRegex)) {
if (output[i].text.match(lineRegex) && mostRecentIsSource) {
// do nothing
} else if (!output[i].text.match(sourceRegex)) {
// This is a system header or implicit definition,
// remove everything up to the next top level decl
// Top level decls with invalid sloc as the file don't change the most recent file
const slocRegex = /<<invalid sloc>>/;
if (!output[i].text.match(slocRegex)) {
mostRecentIsSource = false;
}
let spliceMax = i + 1;
while (output[spliceMax] && !output[spliceMax].text.match(topLevelRegex)) {
spliceMax++;
}
output.splice(i, spliceMax - i);
--i;
} else {
mostRecentIsSource = true;
}
}
// Filter out the symbol addresses
const addressRegex = /^([^A-Za-z]*[A-Za-z]+) 0x[\da-z]+/gm;
output[i].text = output[i].text.replace(addressRegex, '$1');
// Filter out <invalid sloc> and <<invalid sloc>>
const slocRegex = / ?<?<invalid sloc>>?/g;
output[i].text = output[i].text.replace(slocRegex, '');
// Unify file references
output[i].text = output[i].text.replace(sourceRegex, 'line');
}
this.parseAndSetSourceLines(output);
return output;
}
}