# quick hack: # grabs data from XML file describing opcodes from http://ref.x86asm.net # then autocomments the cpux86 emulator code # # (super brittle hack) # from BeautifulSoup import BeautifulStoneSoup #thank you soup, fuck XML parsers import json, re # # Let me reiterate how much I despise scraping data from XML # infile = open("x86reference.xml","r").read() soup=BeautifulStoneSoup(infile) onesies=soup.find('one-byte').findAll('pri_opcd') twosies=soup.find('two-byte').findAll('pri_opcd') def hexRepOfOp(op): i=int(op['value'],16) if i < 16: return ("0x0"+hex(i)[2:]).lower() else: return ("0x" +hex(i)[2:]).lower() def mnem(op): res = op.find('mnem') if res: return res.string else: return "" def src(op): res = op.find('syntax').find('src') if res: return res.getText() else: return "" def dst(op): res = op.find('syntax').find('dst') if res: return res.getText() else: return "" def note(op): res = op.find('note').find('brief') if res: return res.getText() else: return "" def opstr(op): return mnem(op)+" "+src(op)+" "+dst(op)+" "+note(op) onedict = {} for op in onesies: onedict[hexRepOfOp(op)] = opstr(op) twodict = {} for op in twosies: twodict[hexRepOfOp(op)] = opstr(op) # barf some temporaries just for reference later outfile=open("onebyte_dict.json",'w') json.dump(onedict,outfile) outfile.close() outfile=open("twobyte_dict.json",'w') json.dump(twodict,outfile) outfile.close() # now transform source file -------------------------------------------------------------------------------- # - for weird exec counting function caseline = re.compile("( case )(0x[0-9a-f]+):.*") def strip_1(str): return str onebyte_start = 3176 twobyte_start = 3177 twobyte_end = 3546 # - for normal instruction format: 0xXX #caseline = re.compile("(\s+case )(0x[0-9a-f]+):.*") #def strip_1(str): # return str #onebyte_start = 5662 #twobyte_start = 7551 #twobyte_end = 8291 # - for 16bit compat instruction format: 0x1XX #caseline = re.compile("(\s+case )(0x1[0-9a-f]+):.*") #def strip_1(str): # return "0x"+str[-2:] #onebyte_start = 8472 #twobyte_start = 9245 #twobyte_end = 9647 emulatorlines = open("cpux86-ta.js","r").readlines() newlines=[] for i,line in enumerate(emulatorlines): if i < onebyte_start: newlines.append(line) if onebyte_start <= i < twobyte_start: #one-byte instructions linematch=caseline.match(line) if linematch: try: newlines.append(linematch.group(1)+linematch.group(2)+"://"+onedict[strip_1(linematch.group(2))]+"\n") except KeyError: newlines.append(line) else: newlines.append(line) if twobyte_start <= i < twobyte_end: #two-byte instructions linematch=caseline.match(line) if linematch: try: newlines.append(linematch.group(1)+linematch.group(2)+"://"+twodict[strip_1(linematch.group(2))]+"\n") except KeyError: newlines.append(line) else: newlines.append(line) if twobyte_end <= i: newlines.append(line) outfile=open("cpux86-ta-auto-annotated.js",'w') outfile.writelines(newlines) outfile.close()