Blame - etc/scripts/docenizers/docenizer.py - compiler-explorer

2021-03-12 07:40:49 -0600

[diff] [blame]

1

#! /usr/bin/env python3

Rubén Rincón

2017-03-28 14:45:38 +0200

[diff] [blame]

2

# -*- coding: utf-8 -*-

Rubén Rincón

2017-03-28 14:45:38 +0200

[diff] [blame]

3

import argparse

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

4

import json

Ethiraric

2018-01-26 09:54:42 +0100

[diff] [blame]

5

import os

6

import re

Ethiraric

2018-01-31 20:26:11 +0100

[diff] [blame]

7

import sys

Ethiraric

0e6f003

2018-03-06 09:43:48 +0100

[diff] [blame]

8

import tarfile

Ethiraric

2018-01-26 09:54:42 +0100

[diff] [blame]

9

import urllib

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

10

from urllib import request

11

from urllib import parse

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

12

13

try:

14

from bs4 import BeautifulSoup

RabsRincon

2018-01-24 10:12:31 +0100

[diff] [blame]

15

except ImportError:

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

16

raise ImportError("Please install BeautifulSoup (apt-get install python3-bs4 or pip install beautifulsoup4 should do it)")

Rubén Rincón

2017-03-28 14:45:38 +0200

[diff] [blame]

17

18

parser = argparse.ArgumentParser(description='Docenizes HTML version of the official Intel Asm PDFs')

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

19

parser.add_argument('-i', '--inputfolder', type=str,

Ethiraric

2018-01-31 20:26:11 +0100

[diff] [blame]

20

help='Folder where the input files reside as .html. Default is ./asm-docs/',

21

default='asm-docs')

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

22

parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .js file. Default is ./asm-docs.js',

23

default='./asm-docs.js')

Ethiraric

2018-01-31 20:26:11 +0100

[diff] [blame]

24

parser.add_argument('-d', '--downloadfolder', type=str,

25

help='Folder where the archive will be downloaded and extracted', default='asm-docs')

Rubén Rincón

2017-03-28 14:45:38 +0200

[diff] [blame]

26

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

27

# The maximum number of paragraphs from the description to copy.

28

MAX_DESC_PARAS = 5

Ethiraric

2018-03-09 17:06:25 +0100

[diff] [blame]

29

STRIP_PREFIX = re.compile(r'^(([0-9a-fA-F]{2}|m64|NP|(REX|E?VEX\.)[.0-9A-Z]*|/[0-9a-z]+|[a-z]+)\b\s*)*')

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

30

INSTRUCTION_RE = re.compile(r'^([A-Z][A-Z0-9]+)\*?(\s+|$)')

RabsRincon

2018-01-24 10:12:31 +0100

[diff] [blame]

31

# Some instructions are so broken we just take their names from the filename

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

32

UNPARSEABLE_INSTR_NAMES = ['PSRLW:PSRLD:PSRLQ', 'PSLLW:PSLLD:PSLLQ', 'MOVBE']

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

33

# Some files contain instructions which cannot be parsed and which compilers are unlikely to emit

34

IGNORED_FILE_NAMES = [

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

35

# SGX pseudo-instructions

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

"EADD",

"EACCEPT",

"EAUG",

"EACCEPTCOPY",

"EDECVIRTCHILD",

"EINCVIRTCHILD",

"EINIT",

"ELDB:ELDU:ELDBC:ELBUC",

"EMODPE",

"EMODPR",

"EMODT",

"ERDINFO",

"ESETCONTEXT",

"ETRACKC",

"EBLOCK",

"ECREATE",

"EDBGRD",

"EDBGWR",

"EENTER",

"EEXIT",

"EEXTEND",

"EGETKEY",

"ELDB",

"ELDU",

"ENCLS",

"ENCLU",

"EPA",

"EREMOVE",

"EREPORT",

"ERESUME",

"ETRACK",

"EWB",

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

68

# VMX instructions

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

"INVEPT",

"INVVPID",

"VMCALL",

"VMCLEAR",

"VMFUNC",

"VMLAUNCH",

"VMLAUNCH:VMRESUME",

"VMPTRLD",

"VMPTRST",

"VMREAD",

"VMRESUME",

"VMWRITE",

"VMXOFF",

"VMXON",

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

83

# Other instructions

Ethiraric

2018-03-09 17:06:25 +0100

[diff] [blame]

"INVLPG",

"LAHF",

"RDMSR",

"SGDT",

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

88

# Unparsable instructions

89

# These instructions should be supported in the future

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

90

"MONITOR",

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

91

"MOVDQ2Q",

Ethiraric

2018-03-09 17:06:25 +0100

[diff] [blame]

92

"MFENCE",

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

93

]

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

94

# Some instructions are defined in multiple files. We ignore a specific set of the

95

# duplicates here.

96

IGNORED_DUPLICATES = [

97

'MOV-1', # move to control reg

98

'MOV-2', # move to debug reg

99

'CMPSD', # compare doubleword (defined in CMPS:CMPSB:CMPSW:CMPSD:CMPSQ)

100

'MOVQ', # defined in MOVD:MOVQ

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

101

'MOVSD', # defined in MOVS:MOVSB:MOVSW:MOVSD:MOVSQ

102

'VPBROADCASTB:VPBROADCASTW:VPBROADCASTD:VPBROADCASTQ', # defined in VPBROADCAST

Ethiraric

2018-03-09 17:06:25 +0100

[diff] [blame]

103

"VGATHERDPS:VGATHERDPD",

104

"VGATHERQPS:VGATHERQPD",

105

"VPGATHERDD:VPGATHERQD",

106

"VPGATHERDQ:VPGATHERQQ",

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

107

]

Ethiraric

2018-01-26 09:54:42 +0100

[diff] [blame]

108

# Where to extract the asmdoc archive.

109

ASMDOC_DIR = "asm-docs"

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

110

ARCHIVE_URL = "http://www.felixcloutier.com/x86/x86.tbz2"

111

ARCHIVE_NAME = "x86.tbz2"

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

112

113

114

class Instruction(object):

115

def __init__(self, name, names, tooltip, body):

116

self.name = name

117

self.names = names

Rubén

895b66d

2017-10-06 20:13:04 +0200

[diff] [blame]

118

self.tooltip = tooltip.rstrip(': ,')

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

119

self.body = body

120

121

def __str__(self):

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

122

return f"{self.name} = {self.tooltip}\n{self.body}"

Ethiraric

d612277

2018-01-26 16:51:39 +0100

[diff] [blame]

123

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

124

Ethiraric

d612277

2018-01-26 16:51:39 +0100

[diff] [blame]

125

def get_url_for_instruction(instr):

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

126

return f"http://www.felixcloutier.com/x86/{urllib.parse.quote(instr.name)}.html"

Ethiraric

2018-01-26 09:54:42 +0100

[diff] [blame]

127

Ethiraric

2018-01-31 20:26:11 +0100

[diff] [blame]

128

129

def download_asm_doc_archive(downloadfolder):

130

if not os.path.exists(downloadfolder):

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

131

print(f"Creating {downloadfolder} as download folder")

Ethiraric

2018-01-31 20:26:11 +0100

[diff] [blame]

132

os.makedirs(downloadfolder)

133

elif not os.path.isdir(downloadfolder):

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

134

print(f"Error: download folder {downloadfolder} is not a directory")

Ethiraric

2018-01-31 20:26:11 +0100

[diff] [blame]

135

sys.exit(1)

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

136

archive_name = os.path.join(downloadfolder, ARCHIVE_NAME)

Ethiraric

2018-01-26 09:54:42 +0100

[diff] [blame]

137

print("Downloading archive...")

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

138

urllib.request.urlretrieve(ARCHIVE_URL, archive_name)

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

139

140

141

def extract_asm_doc_archive(downloadfolder, inputfolder):

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

142

print("Extracting file...")

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

143

if os.path.isdir(os.path.join(inputfolder, "html")):

144

for root, dirs, files in os.walk(os.path.join(inputfolder, "html")):

Ethiraric

c88dec2

2018-01-31 20:37:07 +0100

[diff] [blame]

145

for file in files:

146

if os.path.splitext(file)[1] == ".html":

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

147

os.remove(os.path.join(root, file))

148

tar = tarfile.open(os.path.join(downloadfolder, ARCHIVE_NAME))

149

tar.extractall(path=inputfolder)

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

150

151

152

def strip_non_instr(i):

153

# removes junk from encodings where the opcode is in the middle

154

# of prefix stuff. e.g.

155

# 66 0f 38 30 /r PMOVZXBW xmm1, xmm2/m64

156

return STRIP_PREFIX.sub('', i)

def instr_name(i):

match = INSTRUCTION_RE.match(strip_non_instr(i))

161

if match:

162

return match.group(1)

163

164

Ethiraric

2018-03-06 21:07:15 +0100

[diff] [blame]

165

def get_description_paragraphs(document_soup):

RabsRincon

2f9306f

2019-03-04 16:21:49 +0100

[diff] [blame]

166

description_header_node = document_soup.find(id="description")

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

167

i = 0

Ethiraric

2018-03-06 21:07:15 +0100

[diff] [blame]

168

description_paragraph_node = description_header_node.next_sibling.next_sibling

169

description_paragraphs = []

170

while i < MAX_DESC_PARAS and len(description_paragraph_node.text) > 20:

Ethiraric

e0a14ba

2018-03-09 17:04:15 +0100

[diff] [blame]

171

if description_paragraph_node.name == "p":

172

description_paragraphs.append(description_paragraph_node)

173

i = i + 1

174

# Move two siblings forward. Next sibling is the line feed.

Ethiraric

c4f1fb4

2018-03-08 15:04:02 +0100

[diff] [blame]

175

description_paragraph_node = description_paragraph_node.next_sibling.next_sibling

Ethiraric

2018-03-06 21:07:15 +0100

[diff] [blame]

176

return description_paragraphs

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

177

RabsRincon

2018-01-24 10:12:31 +0100

[diff] [blame]

178

Ethiraric

2018-01-25 08:00:15 +0100

[diff] [blame]

179

def parse(filename, f):

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

180

doc = BeautifulSoup(f, 'html.parser')

Ethiraric

2018-03-06 21:07:15 +0100

[diff] [blame]

181

if doc.table is None:

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

182

print(f"{filename}: Failed to find table")

Ethiraric

2018-03-06 21:07:15 +0100

[diff] [blame]

183

return None

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

184

table = read_table(doc.table)

names = set()

def add_all(instrs):

for i in instrs:

Ethiraric

2018-01-25 08:00:15 +0100

[diff] [blame]

189

instruction_name = instr_name(i)

190

if instruction_name:

191

names.add(instruction_name)

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

192

193

for inst in table:

194

if 'Opcode/Instruction' in inst:

195

add_all(inst['Opcode/Instruction'].split("\n"))

Ethiraric

2018-01-31 22:27:28 +0100

[diff] [blame]

196

elif 'OpcodeInstruction' in inst:

197

add_all(inst['OpcodeInstruction'].split("\n"))

Ethiraric

2018-03-09 17:06:25 +0100

[diff] [blame]

198

elif 'Opcode Instruction' in inst:

199

add_all(inst['Opcode Instruction'].split("\n"))

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

200

elif 'Opcode*/Instruction' in inst:

201

add_all(inst['Opcode*/Instruction'].split("\n"))

Ethiraric

2018-03-06 21:07:15 +0100

[diff] [blame]

202

elif 'Opcode / Instruction' in inst:

203

add_all(inst['Opcode / Instruction'].split("\n"))

204

elif 'Instruction' in inst:

Ethiraric

2018-01-25 08:00:15 +0100

[diff] [blame]

205

instruction_name = instr_name(inst['Instruction'])

206

if not instruction_name:

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

207

print(f"Unable to get instruction from: {inst['Instruction']}")

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

208

else:

Ethiraric

2018-01-25 08:00:15 +0100

[diff] [blame]

209

names.add(instruction_name)

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

210

# else, skip the line

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

211

if not names:

Ethiraric

2018-01-25 08:00:15 +0100

[diff] [blame]

212

if filename in UNPARSEABLE_INSTR_NAMES:

213

for inst in filename.split(":"):

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

214

names.add(inst)

215

else:

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

216

print(f"{filename}: Failed to read instruction table")

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

217

return None

Matt Godbolt

657866b

2021-03-28 15:57:40 -0500

[diff] [blame]

218

Ethiraric

2018-03-06 21:07:15 +0100

[diff] [blame]

219

description_paragraphs = get_description_paragraphs(doc)

Ethiraric

2018-01-31 22:27:28 +0100

[diff] [blame]

220

Austin Morton

e1cd7f9

2020-08-02 01:16:00 -0400

[diff] [blame]

221

for para in description_paragraphs:

222

for link in para.find_all('a'):

223

# this urljoin will only ensure relative urls are prefixed

224

# if a url is already absolute it does nothing

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

225

link['href'] = urllib.parse.urljoin('http://www.felixcloutier.com/x86/', link['href'])

Austin Morton

e1cd7f9

2020-08-02 01:16:00 -0400

[diff] [blame]

226

link['target'] = '_blank'

227

link['rel'] = 'noreferrer noopener'

Austin Morton

e1cd7f9

2020-08-02 01:16:00 -0400

[diff] [blame]

228

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

229

return Instruction(

Ethiraric

2018-01-25 08:00:15 +0100

[diff] [blame]

230

filename,

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

231

names,

Ethiraric

c4f1fb4

2018-03-08 15:04:02 +0100

[diff] [blame]

232

description_paragraphs[0].text.strip(),

233

''.join(map(lambda x: str(x), description_paragraphs)).strip())

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

234

235

Jeremy Rifkin

2022-04-23 16:04:09 -0400

[diff] [blame]

236

def read_table(start_table):

237

# Tables on felixcloutier may be split in half, e.g. on https://www.felixcloutier.com/x86/sal:sar:shl:shr

238

# This traverses the immediate siblings of the input table

239

tables = []

240

current_node = start_table

241

while current_node:

242

if current_node.name == 'table':

243

tables.append(current_node)

244

elif current_node.name is not None: # whitespace between the tables, i.e. the \n, is a none tag

245

break

246

current_node = current_node.next_sibling

Ethiraric

2018-01-31 22:27:28 +0100

[diff] [blame]

247

# Finding all 'th' is not enough, since some headers are 'td'.

248

# Instead, walk through all children of the first 'tr', filter out those

249

# that are only whitespace, keep `get_text()` on the others.

250

headers = list(

251

map(lambda th: th.get_text(),

Jeremy Rifkin

2022-04-23 16:04:09 -0400

[diff] [blame]

252

filter(lambda th: str(th).strip(), tables[0].tr.children)))

Ethiraric

2018-01-31 22:27:28 +0100

[diff] [blame]

253

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

254

result = []

255

if headers:

256

# common case

Jeremy Rifkin

2022-04-23 16:04:09 -0400

[diff] [blame]

257

for table in tables:

258

for row in table.find_all('tr'):

259

obj = {}

260

for column, name in zip(row.find_all('td'), headers):

261

# Remove '\n's in names that contain it.

262

obj[name.replace('\n', '')] = column.get_text()

263

if obj:

264

result.append(obj)

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

265

else:

266

# Cases like BEXTR and BZHI

Jeremy Rifkin

2022-04-23 16:04:09 -0400

[diff] [blame]

267

for table in tables:

268

rows = table.find_all('tr')

if len(rows) != 1:

return []

obj = {}

for td in rows[0].find_all('td'):

273

header = td.p.strong.get_text()

274

td.p.strong.decompose()

275

obj[header] = td.get_text()

276

result.append(obj)

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

return result

def parse_html(directory):

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

282

print("Parsing instructions...")

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

283

instructions = []

284

for root, dirs, files in os.walk(directory):

Rubén Rincón

dc35dec

2017-03-28 16:10:09 +0200

[diff] [blame]

285

for file in files:

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

286

if file.endswith(".html") and file != 'index.html':

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

287

with open(os.path.join(root, file), encoding='utf-8') as f2:

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

288

name = os.path.splitext(file)[0]

Ethiraric

2018-03-07 17:45:11 +0100

[diff] [blame]

289

if name in IGNORED_DUPLICATES or name in IGNORED_FILE_NAMES:

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

290

continue

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

291

try:

292

instruction = parse(name, f2)

293

if not instruction:

294

continue

295

patch_instruction(instruction)

296

instructions.append(instruction)

297

except Exception as e:

298

print(f"Error parsing {name}:\n{e}")

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

return instructions

Ethiraric

2018-01-26 13:45:14 +0100

[diff] [blame]

302

def self_test(instructions, directory):

Ethiraric

2018-03-09 20:53:52 +0100

[diff] [blame]

303

# For each generated instruction, check that there is a path to a file in

304

# the documentation.

Ethiraric

2018-01-26 13:45:14 +0100

[diff] [blame]

305

directory = os.path.join(directory, "html")

Ethiraric

2018-03-09 20:53:52 +0100

[diff] [blame]

306

ok = True

Ethiraric

2018-01-26 13:45:14 +0100

[diff] [blame]

307

for inst in instructions:

308

if not os.path.isfile(os.path.join(directory, inst.name + ".html")):

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

309

print(f"Warning: {inst.name} has not file associated")

Ethiraric

2018-03-09 20:53:52 +0100

[diff] [blame]

310

ok = False

311

return ok

Ethiraric

2018-01-26 13:45:14 +0100

[diff] [blame]

312

Ethiraric

2018-03-09 20:53:52 +0100

[diff] [blame]

313

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

314

def patch_instruction(instruction):

315

if instruction.name == "ADDSS":

316

print("\nPatching ADDSS")

317

print("REMINDER: Check if https://github.com/compiler-explorer/compiler-explorer/issues/2380 is still relevant\n")

318

319

old_body = instruction.body

320

old_tooltip = instruction.tooltip

321

instruction.body = old_body.replace("stores the double-precision", "stores the single-precision")

322

instruction.tooltip = old_tooltip.replace("stores the double-precision", "stores the single-precision")

323

324

325

def main():

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

326

args = parser.parse_args()

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

327

print(f"Called with: {args}")

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

328

# If we don't have the html folder already...

329

if not os.path.isdir(os.path.join(args.inputfolder, 'html')):

330

# We don't, try with the compressed file

331

if not os.path.isfile(os.path.join(args.downloadfolder, "x86.tbz2")):

332

# We can't find that either. Download it

333

try:

334

download_asm_doc_archive(args.downloadfolder)

335

extract_asm_doc_archive(args.downloadfolder, args.inputfolder)

336

except IOError as e:

337

print("Error when downloading archive:")

print(e)

sys.exit(1)

else:

# We have a file already downloaded

342

extract_asm_doc_archive(args.downloadfolder, args.inputfolder)

RabsRincon

2018-01-24 10:12:31 +0100

[diff] [blame]

343

instructions = parse_html(args.inputfolder)

Jeremy Overesch

2021-03-12 07:40:49 -0600

[diff] [blame]

344

instructions.sort(key=lambda b: b.name)

Ethiraric

2018-01-26 13:45:14 +0100

[diff] [blame]

345

self_test(instructions, args.inputfolder)

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

346

all_inst = set()

347

for inst in instructions:

348

if not all_inst.isdisjoint(inst.names):

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

349

print(f"Overlap in instruction names: {inst.names.intersection(all_inst)} for {inst.name}")

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

350

all_inst = all_inst.union(inst.names)

Ethiraric

2018-03-09 20:53:52 +0100

[diff] [blame]

351

if not self_test(instructions, args.inputfolder):

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

352

print("Tests do not pass. Not writing output file. Aborting.")

353

sys.exit(3)

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

354

print(f"Writing {len(instructions)} instructions")

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

355

with open(args.outputpath, 'w') as f:

356

f.write("""

Austin Morton

044dcfb

2020-09-26 16:59:26 -0400

[diff] [blame]

357

export function getAsmOpcode(opcode) {

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

358

if (!opcode) return;

359

switch (opcode.toUpperCase()) {

360

""")

361

for inst in instructions:

Jeremy Rifkin

2022-04-23 16:04:09 -0400

[diff] [blame]

362

for name in sorted(inst.names):

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

363

f.write(f' case "{name}":\n')

Ethiraric

94ed5af

2018-01-25 08:01:30 +0100

[diff] [blame]

364

f.write(' return {}'.format(json.dumps({

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

365

"tooltip": inst.tooltip,

366

"html": inst.body,

Ethiraric

d612277

2018-01-26 16:51:39 +0100

[diff] [blame]

367

"url": get_url_for_instruction(inst)

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

368

}, indent=16, separators=(',', ': '), sort_keys=True))[:-1] + ' };\n\n')

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

369

f.write("""

370

}

371

}

Matt Godbolt

2017-04-08 06:45:02 -0500

[diff] [blame]

372

""")

RabsRincon

2021-08-04 10:54:09 +0200

[diff] [blame]

373

RabsRincon

2018-03-10 11:28:22 +0100

[diff] [blame]

374

Ethiraric

2018-03-09 20:53:52 +0100

[diff] [blame]

375

if __name__ == '__main__':

RabsRincon