lib/llvm-ast.js - compiler-explorer - Rivoreo Source Code Repositories

 // Copyright (c) 2021, Compiler Explorer Authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //     * Redistributions of source code must retain the above copyright notice,
 //       this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above copyright
 //       notice, this list of conditions and the following disclaimer in the
 //       documentation and/or other materials provided with the distribution.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.

 export class LlvmAstParser {
     constructor(compilerProps) {
         this.maxAstLines = 500;
         if (compilerProps) {
             this.maxAstLines = compilerProps('maxLinesOfAst', this.maxAstLines);
         }

         // Almost every line of AST includes a span of related source lines:
         // In different forms like <line:a:b, line:c:d>
         this.spanTypes = {
             NONE: 'none', // No span specified
             EMPTY: 'empty', // span mentions no lines
             LEFT: 'left', // span mentions only the starting line
             RIGHT: 'right', // span mentions only the finishing line
             FULL: 'full', // span mentions both lines
         };
     }

     // Extract one, two, or no line numbers:
     // <line:a:b, line:c:d> -> [a, b] - FULL
     // <col:b, line:c:d> -> [-, b] - RIGHT
     // <line:a:b, col:c> -> [a, -] - LEFT
     // <col:a, col:b> -> EMPTY
     // <no source span> -> NONE
     parseSpan(line) {
         const spanRegex = /<((?:line|col)[\d ,:ceilno]+)>/;
         const twoLineRegex = /line:(\d+)[,:].*line:(\d+)[:>]/;
         const leftLineRegex = /^line:(\d+)/;
         const rightLineRegex = / line:(\d+):/;

         const m = line.match(spanRegex);
         if (m) {
             const span = m[1];
             const ll = span.match(twoLineRegex);
             if (ll) {
                 return { type : this.spanTypes.FULL, left : Number(ll[1]), right: Number(ll[2])};
             }
             const left = span.match(leftLineRegex);
             if (left) {
                 return { type : this.spanTypes.LEFT, left : Number(left[1])};
             }
             const right = span.match(rightLineRegex);
             if (right) {
                 return { type : this.spanTypes.RIGHT, right : Number(right[1])};
             }
             return { type : this.spanTypes.EMPTY };
         }
         return { type : this.spanTypes.NONE };
     }

     // Link the AST lines with spans of source lines
     parseAndSetSourceLines(astDump) {
         var lfrom, lto;
         for (var line of astDump) {
             const span = this.parseSpan(line.text);
             switch(span.type) {
                 case this.spanTypes.NONE:
                 case this.spanTypes.EMPTY:
                     break;
                 case this.spanTypes.LEFT:
                     lfrom = span.left;
                     lto = lfrom;
                     break;
                 case this.spanTypes.RIGHT:
                     lfrom = lto;
                     lto = span.right;
                     break;
                 case this.spanTypes.FULL:
                     lfrom = span.left;
                     lto = span.right;
                     break;
             }
             if (span.type !== this.spanTypes.NONE) {
                 line.source = { from : lfrom, to : lto };
             }
         }
     }

     processAst(output) {
         output = output.stdout;

         // Top level decls start with |- or `-
         const topLevelRegex = /^([`|])-/;

         // Refers to the user's source file rather than a system header
         const sourceRegex = /<source>/g;

         // Refers to whatever the most recent file specified was
         const lineRegex = /<line:/;

         let mostRecentIsSource = false;

         // Remove all AST nodes which aren't directly from the user's source code
         for (let i = 0; i < output.length; ++i) {
             if (output[i].text.match(topLevelRegex)) {
                 if (output[i].text.match(lineRegex) && mostRecentIsSource) {
                     // do nothing
                 } else if (!output[i].text.match(sourceRegex)) {
                     // This is a system header or implicit definition,
                     // remove everything up to the next top level decl
                     // Top level decls with invalid sloc as the file don't change the most recent file
                     const slocRegex = /<<invalid sloc>>/;
                     if (!output[i].text.match(slocRegex)) {
                         mostRecentIsSource = false;
                     }

                     let spliceMax = i + 1;
                     while (output[spliceMax] && !output[spliceMax].text.match(topLevelRegex)) {
                         spliceMax++;
                     }
                     output.splice(i, spliceMax - i);
                     --i;
                 } else {
                     mostRecentIsSource = true;
                 }
             }
             // Filter out the symbol addresses
             const addressRegex = /^([^A-Za-z]*[A-Za-z]+) 0x[\da-z]+/gm;
             output[i].text = output[i].text.replace(addressRegex, '$1');

             // Filter out <invalid sloc> and <<invalid sloc>>
             const slocRegex = / ?<?<invalid sloc>>?/g;
             output[i].text = output[i].text.replace(slocRegex, '');

             // Unify file references
             output[i].text = output[i].text.replace(sourceRegex, 'line');
         }
         this.parseAndSetSourceLines(output);
         return output;
     }
 }
	// Copyright (c) 2021, Compiler Explorer Authors
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// * Redistributions of source code must retain the above copyright notice,
	// this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.

	export class LlvmAstParser {
	constructor(compilerProps) {
	this.maxAstLines = 500;
	if (compilerProps) {
	this.maxAstLines = compilerProps('maxLinesOfAst', this.maxAstLines);
	}

	// Almost every line of AST includes a span of related source lines:
	// In different forms like <line:a:b, line:c:d>
	this.spanTypes = {
	NONE: 'none', // No span specified
	EMPTY: 'empty', // span mentions no lines
	LEFT: 'left', // span mentions only the starting line
	RIGHT: 'right', // span mentions only the finishing line
	FULL: 'full', // span mentions both lines
	};
	}

	// Extract one, two, or no line numbers:
	// <line:a:b, line:c:d> -> [a, b] - FULL
	// <col:b, line:c:d> -> [-, b] - RIGHT
	// <line:a:b, col:c> -> [a, -] - LEFT
	// <col:a, col:b> -> EMPTY
	// <no source span> -> NONE
	parseSpan(line) {
	const spanRegex = /<((?:line\|col)[\d ,:ceilno]+)>/;
	const twoLineRegex = /line:(\d+)[,:].*line:(\d+)[:>]/;
	const leftLineRegex = /^line:(\d+)/;
	const rightLineRegex = / line:(\d+):/;

	const m = line.match(spanRegex);
	if (m) {
	const span = m[1];
	const ll = span.match(twoLineRegex);
	if (ll) {
	return { type : this.spanTypes.FULL, left : Number(ll[1]), right: Number(ll[2])};
	}
	const left = span.match(leftLineRegex);
	if (left) {
	return { type : this.spanTypes.LEFT, left : Number(left[1])};
	}
	const right = span.match(rightLineRegex);
	if (right) {
	return { type : this.spanTypes.RIGHT, right : Number(right[1])};
	}
	return { type : this.spanTypes.EMPTY };
	}
	return { type : this.spanTypes.NONE };
	}

	// Link the AST lines with spans of source lines
	parseAndSetSourceLines(astDump) {
	var lfrom, lto;
	for (var line of astDump) {
	const span = this.parseSpan(line.text);
	switch(span.type) {
	case this.spanTypes.NONE:
	case this.spanTypes.EMPTY:
	break;
	case this.spanTypes.LEFT:
	lfrom = span.left;
	lto = lfrom;
	break;
	case this.spanTypes.RIGHT:
	lfrom = lto;
	lto = span.right;
	break;
	case this.spanTypes.FULL:
	lfrom = span.left;
	lto = span.right;
	break;
	}
	if (span.type !== this.spanTypes.NONE) {
	line.source = { from : lfrom, to : lto };
	}
	}
	}

	processAst(output) {
	output = output.stdout;

	// Top level decls start with \|- or `-
	const topLevelRegex = /^([`\|])-/;

	// Refers to the user's source file rather than a system header
	const sourceRegex = /<source>/g;

	// Refers to whatever the most recent file specified was
	const lineRegex = /<line:/;

	let mostRecentIsSource = false;

	// Remove all AST nodes which aren't directly from the user's source code
	for (let i = 0; i < output.length; ++i) {
	if (output[i].text.match(topLevelRegex)) {
	if (output[i].text.match(lineRegex) && mostRecentIsSource) {
	// do nothing
	} else if (!output[i].text.match(sourceRegex)) {
	// This is a system header or implicit definition,
	// remove everything up to the next top level decl
	// Top level decls with invalid sloc as the file don't change the most recent file
	const slocRegex = /<<invalid sloc>>/;
	if (!output[i].text.match(slocRegex)) {
	mostRecentIsSource = false;
	}

	let spliceMax = i + 1;
	while (output[spliceMax] && !output[spliceMax].text.match(topLevelRegex)) {
	spliceMax++;
	}
	output.splice(i, spliceMax - i);
	--i;
	} else {
	mostRecentIsSource = true;
	}
	}
	// Filter out the symbol addresses
	const addressRegex = /^([^A-Za-z]*[A-Za-z]+) 0x[\da-z]+/gm;
	output[i].text = output[i].text.replace(addressRegex, '$1');

	// Filter out <invalid sloc> and <<invalid sloc>>
	const slocRegex = / ?<?<invalid sloc>>?/g;
	output[i].text = output[i].text.replace(slocRegex, '');

	// Unify file references
	output[i].text = output[i].text.replace(sourceRegex, 'line');
	}
	this.parseAndSetSourceLines(output);
	return output;
	}
	}