blob: b3d239c0135d3fbf0b5fac6b119184c70fa04e77 [file] [log] [blame] [raw]
// Copyright (c) 2021, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
export class LlvmAstParser {
constructor(compilerProps) {
this.maxAstLines = 500;
if (compilerProps) {
this.maxAstLines = compilerProps('maxLinesOfAst', this.maxAstLines);
}
// Almost every line of AST includes a span of related source lines:
// In different forms like <line:a:b, line:c:d>
this.locTypes = {
NONE: 'none', // No location specified
POINT: 'point', // A single location: beginning of a token
SPAN: 'span', // Two locations: first token to last token (beginning)
};
}
// Accepts "line:a:b" and "col:b"
parsePoint(ptLine, lastLineNo) {
const lineRegex = /line:(\d+):/;
const colRegex = /(?:col|\d):(\d+)(?::|$)/;
const lineMatch = ptLine.match(lineRegex);
const colMatch = ptLine.match(colRegex);
const line = lineMatch ? Number(lineMatch[1]) : lastLineNo;
const col = colMatch ? Number(colMatch[1]) : null; // Does not happen for well-formed strings
return {line, col};
}
// Accepts "<X, X>" and "<X>", where
// X can be "col:a" or "line:a:b"
// lastLineNo - the line number of the previous node,
// reused when only a column specified.
parseSpan(line, lastLineNo) {
const spanRegex = /<((?:line|col)[\d ,:ceilno]+)>/;
const m = line.match(spanRegex);
if (m) {
const span = m[1];
const beginEnd = span.split(',');
if (beginEnd.length === 2) {
const begin = this.parsePoint(beginEnd[0], lastLineNo);
const end = this.parsePoint(beginEnd[1], begin.line);
return {type: this.locTypes.SPAN, begin, end};
} else {
return {type: this.locTypes.POINT, loc: this.parsePoint(span, lastLineNo)};
}
}
return {type: this.locTypes.NONE};
}
// Link the AST lines with spans of source locations (lines+columns)
parseAndSetSourceLines(astDump) {
let lfrom = {line: null, loc: null},
lto = {line: null, loc: null};
for (const line of astDump) {
const span = this.parseSpan(line.text, lfrom.line);
switch (span.type) {
case this.locTypes.NONE:
break;
case this.locTypes.POINT:
lfrom = span.loc;
lto = span.loc;
break;
case this.locTypes.SPAN:
lfrom = span.begin;
lto = span.end;
break;
}
if (span.type !== this.locTypes.NONE) {
line.source = {from: lfrom, to: lto};
}
}
}
processAst(output) {
output = output.stdout;
// Top level decls start with |- or `-
const topLevelRegex = /^([`|])-/;
// Refers to the user's source file rather than a system header
const sourceRegex = /<source>/g;
// Refers to whatever the most recent file specified was
const lineRegex = /<(col|line):/;
let mostRecentIsSource = false;
// Remove all AST nodes which aren't directly from the user's source code
for (let i = 0; i < output.length; ++i) {
if (topLevelRegex.test(output[i].text)) {
if (lineRegex.test(output[i].text) && mostRecentIsSource) {
// do nothing
} else if (!sourceRegex.test(output[i].text)) {
// This is a system header or implicit definition,
// remove everything up to the next top level decl
// Top level decls with invalid sloc as the file don't change the most recent file
const slocRegex = /<<invalid sloc>>/;
if (!slocRegex.test(output[i].text)) {
mostRecentIsSource = false;
}
let spliceMax = i + 1;
while (output[spliceMax] && !topLevelRegex.test(output[spliceMax].text)) {
spliceMax++;
}
output.splice(i, spliceMax - i);
--i;
} else {
mostRecentIsSource = true;
}
}
// Filter out the symbol addresses
const addressRegex = /^([^A-Za-z]*[A-Za-z]+) 0x[\da-z]+/gm;
output[i].text = output[i].text.replace(addressRegex, '$1');
// Filter out <invalid sloc> and <<invalid sloc>>
const slocRegex = / ?<?<invalid sloc>>?/g;
output[i].text = output[i].text.replace(slocRegex, '');
// Unify file references
output[i].text = output[i].text.replace(sourceRegex, 'line');
}
this.parseAndSetSourceLines(output);
return output;
}
}