2011-12-22 13:04:09 +08:00
|
|
|
# quick hack:
|
|
|
|
# grabs data from XML file describing opcodes from http://ref.x86asm.net
|
|
|
|
# then autocomments the cpux86 emulator code
|
|
|
|
#
|
|
|
|
# (super brittle hack)
|
|
|
|
#
|
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulStoneSoup #thank you soup, fuck XML parsers
|
|
|
|
import json, re
|
|
|
|
|
|
|
|
#
|
|
|
|
# Let me reiterate how much I despise scraping data from XML
|
|
|
|
#
|
2011-12-22 13:16:35 +08:00
|
|
|
infile = open("x86reference.xml","r").read()
|
2011-12-22 13:04:09 +08:00
|
|
|
soup=BeautifulStoneSoup(infile)
|
|
|
|
onesies=soup.find('one-byte').findAll('pri_opcd')
|
|
|
|
twosies=soup.find('two-byte').findAll('pri_opcd')
|
|
|
|
|
|
|
|
def hexRepOfOp(op):
|
|
|
|
i=int(op['value'],16)
|
|
|
|
if i < 16:
|
2011-12-22 13:16:35 +08:00
|
|
|
return ("0x0"+hex(i)[2:]).lower()
|
2011-12-22 13:04:09 +08:00
|
|
|
else:
|
2011-12-22 13:16:35 +08:00
|
|
|
return ("0x" +hex(i)[2:]).lower()
|
2011-12-22 13:04:09 +08:00
|
|
|
def mnem(op):
|
|
|
|
res = op.find('mnem')
|
|
|
|
if res:
|
|
|
|
return res.string
|
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
def src(op):
|
|
|
|
res = op.find('syntax').find('src')
|
|
|
|
if res:
|
|
|
|
return res.getText()
|
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
def dst(op):
|
|
|
|
res = op.find('syntax').find('dst')
|
|
|
|
if res:
|
|
|
|
return res.getText()
|
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
def note(op):
|
|
|
|
res = op.find('note').find('brief')
|
|
|
|
if res:
|
|
|
|
return res.getText()
|
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
def opstr(op):
|
|
|
|
return mnem(op)+" "+src(op)+" "+dst(op)+" "+note(op)
|
|
|
|
|
|
|
|
onedict = {}
|
|
|
|
for op in onesies:
|
|
|
|
onedict[hexRepOfOp(op)] = opstr(op)
|
|
|
|
twodict = {}
|
|
|
|
for op in twosies:
|
|
|
|
twodict[hexRepOfOp(op)] = opstr(op)
|
|
|
|
|
|
|
|
# barf some temporaries just for reference later
|
|
|
|
outfile=open("onebyte_dict.json",'w')
|
|
|
|
json.dump(onedict,outfile)
|
|
|
|
outfile.close()
|
|
|
|
|
|
|
|
outfile=open("twobyte_dict.json",'w')
|
|
|
|
json.dump(twodict,outfile)
|
|
|
|
outfile.close()
|
|
|
|
|
2011-12-24 12:21:01 +08:00
|
|
|
# now transform source file --------------------------------------------------------------------------------
|
|
|
|
|
2011-12-24 13:08:07 +08:00
|
|
|
# - for weird exec counting function
|
|
|
|
caseline = re.compile("( case )(0x[0-9a-f]+):.*")
|
|
|
|
def strip_1(str):
|
|
|
|
return str
|
|
|
|
onebyte_start = 3176
|
|
|
|
twobyte_start = 3177
|
|
|
|
twobyte_end = 3546
|
|
|
|
|
2011-12-24 12:21:01 +08:00
|
|
|
# - for normal instruction format: 0xXX
|
|
|
|
#caseline = re.compile("(\s+case )(0x[0-9a-f]+):.*")
|
2011-12-24 13:08:07 +08:00
|
|
|
#def strip_1(str):
|
|
|
|
# return str
|
2011-12-24 12:21:01 +08:00
|
|
|
#onebyte_start = 5662
|
|
|
|
#twobyte_start = 7551
|
|
|
|
#twobyte_end = 8291
|
|
|
|
|
|
|
|
# - for 16bit compat instruction format: 0x1XX
|
2011-12-24 13:08:07 +08:00
|
|
|
#caseline = re.compile("(\s+case )(0x1[0-9a-f]+):.*")
|
|
|
|
#def strip_1(str):
|
|
|
|
# return "0x"+str[-2:]
|
|
|
|
#onebyte_start = 8472
|
|
|
|
#twobyte_start = 9245
|
|
|
|
#twobyte_end = 9647
|
2011-12-22 13:04:09 +08:00
|
|
|
|
|
|
|
emulatorlines = open("cpux86-ta.js","r").readlines()
|
|
|
|
newlines=[]
|
|
|
|
for i,line in enumerate(emulatorlines):
|
2011-12-24 12:21:01 +08:00
|
|
|
if i < onebyte_start:
|
2011-12-22 13:04:09 +08:00
|
|
|
newlines.append(line)
|
2011-12-24 12:21:01 +08:00
|
|
|
if onebyte_start <= i < twobyte_start: #one-byte instructions
|
2011-12-22 13:04:09 +08:00
|
|
|
linematch=caseline.match(line)
|
|
|
|
if linematch:
|
|
|
|
try:
|
2011-12-24 12:21:01 +08:00
|
|
|
newlines.append(linematch.group(1)+linematch.group(2)+"://"+onedict[strip_1(linematch.group(2))]+"\n")
|
2011-12-22 13:04:09 +08:00
|
|
|
except KeyError:
|
|
|
|
newlines.append(line)
|
|
|
|
else:
|
|
|
|
newlines.append(line)
|
2011-12-24 12:21:01 +08:00
|
|
|
if twobyte_start <= i < twobyte_end: #two-byte instructions
|
2011-12-22 13:04:09 +08:00
|
|
|
linematch=caseline.match(line)
|
|
|
|
if linematch:
|
|
|
|
try:
|
2011-12-24 12:21:01 +08:00
|
|
|
newlines.append(linematch.group(1)+linematch.group(2)+"://"+twodict[strip_1(linematch.group(2))]+"\n")
|
2011-12-22 13:04:09 +08:00
|
|
|
except KeyError:
|
|
|
|
newlines.append(line)
|
|
|
|
else:
|
|
|
|
newlines.append(line)
|
2011-12-24 12:21:01 +08:00
|
|
|
if twobyte_end <= i:
|
2011-12-22 13:04:09 +08:00
|
|
|
newlines.append(line)
|
|
|
|
|
|
|
|
outfile=open("cpux86-ta-auto-annotated.js",'w')
|
|
|
|
outfile.writelines(newlines)
|
|
|
|
outfile.close()
|