553 lines
23 KiB
C
553 lines
23 KiB
C
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdbool.h>
|
|
#include "parser.h"
|
|
#include "a64instruction/a64instruction.h"
|
|
|
|
//takes input string, read from asm file and returns
|
|
//input as an a64 instruction
|
|
|
|
//TODO:
|
|
// - use string matching to get opcode, and operands (DONE)
|
|
// - check operand count (DONE)
|
|
// - match opcode to a64 struct types (DONE)
|
|
// - count operands and match type/values (DONE)
|
|
// - generate final a64inst and return (TODO: DP instrs)
|
|
// - ASK ABOUT OFFSET CALCULATION
|
|
// - CREATE FUNC TO TIDY UP OPERANDS IN DP
|
|
|
|
int getOperandNumber(char *operand){
|
|
char *operandCpy = strcpy(operandCpy, operand);
|
|
operandCpy++;
|
|
char **endptr;
|
|
int number = strtol(operandCpy, endptr, 10);
|
|
return number;
|
|
}
|
|
|
|
int isOperandRegister(char *operand){
|
|
return((strcmp(&(operand[0]), "x")==0) || (strcmp(&(operand[0]), "w")==0));
|
|
}
|
|
|
|
//calculate offsets from string
|
|
void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){
|
|
char *endptr;
|
|
uint8_t base = strtol(&(operandList[1][2]), &endptr, 10);
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base;
|
|
|
|
if(strcmp(&(operandList[2][strlen(operandList[1])-1]), "!")==0){
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10);
|
|
} else if(strcmp(&(operandList[1][strlen(operandList[0])-1]), "]") == 0) {
|
|
//post-indexed
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10);
|
|
} else if( (isOperandRegister(&(operandList[2][0])) == 1)
|
|
|| (isOperandRegister(&(operandList[2][0])) == 1)){
|
|
//register
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10);
|
|
} else {
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
|
|
if(numOperands==3){
|
|
int offset = strtol(&(operandList[2][1]), &endptr, 10);
|
|
if(instr->data.SingleTransferData.regType == 1){
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
|
|
} else {
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){
|
|
switch(instr->type){
|
|
case a64inst_SINGLETRANSFER:
|
|
if(strcmp(&(operandList[0][0]), "x")==0){
|
|
//x-register
|
|
instr->data.SingleTransferData.regType = 1;
|
|
} else {
|
|
instr->data.SingleTransferData.regType = 0;
|
|
}
|
|
char *endptr;
|
|
instr->data.SingleTransferData.target = strtol(&(operandList[0][0])+1, &endptr, 10);
|
|
calcluateAddressFormat(instr, operandList, numOperands);
|
|
break;
|
|
case a64inst_LOADLITERAL:
|
|
break;
|
|
default:
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){
|
|
char *endptr;
|
|
switch(instr->data.BranchData.BranchType){
|
|
case a64inst_UNCONDITIONAL:
|
|
//define and sign extend immediate offset
|
|
//use symbol table
|
|
printf("unconditional");
|
|
break;
|
|
case a64inst_REGISTER:
|
|
instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, &endptr, 10);
|
|
break;
|
|
case a64inst_CONDITIONAL:
|
|
{
|
|
char *condition = NULL;
|
|
condition = strcpy(condition, opcode);
|
|
condition += 2;
|
|
if(strcmp(condition, "eq")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = EQ;
|
|
} else if (strcmp(condition, "ne")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = NE;
|
|
} else if (strcmp(condition, "ge")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = GE;
|
|
} else if (strcmp(condition, "lt")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = LT;
|
|
} else if (strcmp(condition, "gt")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = GT;
|
|
} else if (strcmp(condition, "le")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = LE;
|
|
} else if (strcmp(condition, "al")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = AL;
|
|
}
|
|
break;
|
|
//calculate offset from symbol table.
|
|
}
|
|
}
|
|
}
|
|
|
|
int classifyDPInst(char *operandList[]){
|
|
return(isOperandRegister(operandList[0]) &&
|
|
isOperandRegister(operandList[1]) &&
|
|
isOperandRegister(operandList[2]));
|
|
}
|
|
|
|
void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){
|
|
int isUnconditional = strcmp(opcode, "b");
|
|
int isRegister = strcmp(opcode, "br");
|
|
int isLoad = strcmp(opcode, "ldr");
|
|
int isStore = strcmp(opcode, "str");
|
|
|
|
if(isUnconditional == 0 ||
|
|
isRegister == 0 ||
|
|
strncmp(opcode, "b.", 2) == 0){
|
|
instr->type = a64inst_BRANCH;
|
|
if(isUnconditional){
|
|
instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
|
|
} else if (isRegister){
|
|
instr->data.BranchData.BranchType = a64inst_REGISTER;
|
|
} else {
|
|
instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
|
|
}
|
|
generateBranchOperands(instr, opcode, operandList);
|
|
} else if(isLoad == 0 || isStore == 0){
|
|
//loading/storing instruction; classify operands
|
|
char *address = operandList[1];
|
|
if( *address == '['){
|
|
//type is register
|
|
instr->type = a64inst_SINGLETRANSFER;
|
|
instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
|
|
if(isLoad == 0){
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD;
|
|
} else {
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE;
|
|
}
|
|
} else {
|
|
instr->type = a64inst_LOADLITERAL;
|
|
if(operandList[0][0] =='#'){
|
|
//offset is immediate
|
|
char *immOffset = NULL;
|
|
immOffset = strcpy(immOffset, operandList[0]);
|
|
immOffset++;
|
|
char *endptr = NULL;
|
|
int offset = strtol(immOffset, &endptr, 10);
|
|
instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset;
|
|
} else {
|
|
//offset is literal, use symbol table and calculate difference
|
|
}
|
|
}
|
|
generateLoadStoreOperands(instr, opcode, operandList, numOperands);
|
|
|
|
} else {
|
|
if(classifyDPInst(operandList)){
|
|
instr->type = a64inst_DPREGISTER;
|
|
a64inst_DPRegisterData data = instr->data.DPRegisterData;
|
|
char t = operandList[0][0];
|
|
if (t == 'w') {
|
|
data.regType=0;
|
|
}
|
|
else {
|
|
data.regType=1;
|
|
}
|
|
data.dest = getOperandNumber(operandList[0]);
|
|
data.src1 = getOperandNumber(operandList[1]);
|
|
data.src2 = getOperandNumber(operandList[2]);
|
|
// multiply
|
|
// mul, mneg, madd, msub
|
|
if (opcode[0] == 'm') {
|
|
data.DPROpType = 1;
|
|
|
|
switch (opcode[1]) {
|
|
// madd
|
|
case 'a':
|
|
data.processOpData.multiplydata.summand = getOperandNumber(operandList[3]);
|
|
data.processOpData.multiplydata.negProd = false;
|
|
break;
|
|
// mneg
|
|
case 'n':
|
|
data.processOpData.multiplydata.summand = ZERO_REGISTER;
|
|
data.processOpData.multiplydata.negProd = true;
|
|
break;
|
|
// msub
|
|
case 's':
|
|
data.processOpData.multiplydata.summand = getOperandNumber(operandList[3]);
|
|
data.processOpData.multiplydata.negProd = true;
|
|
break;
|
|
// mul
|
|
default:
|
|
data.processOpData.multiplydata.summand = ZERO_REGISTER;
|
|
data.processOpData.multiplydata.negProd = false;
|
|
break;
|
|
}
|
|
}
|
|
// arithmlogic
|
|
else {
|
|
data.DPROpType = 0;
|
|
data.processOpData.arithmLogicData.negShiftedSrc2 = 0;
|
|
// logical
|
|
data.processOpData.arithmLogicData.type = 0;
|
|
// three special cases
|
|
if (opcode == 'tst') {
|
|
data.dest = ZERO_REGISTER;
|
|
data.src1 = getOperandNumber(operandList[0]);
|
|
data.src2 = getOperandNumber(operandList[1]);
|
|
data.processOp = 3;
|
|
if (strlen(operandList) == 3) {
|
|
char *split[] = strtok(operandList[2], ' ');
|
|
switch (split[1][0]) {
|
|
case 'L':
|
|
// LSR
|
|
if (split[1][2] == 'R') {
|
|
data.processOpData.arithmLogicData.shiftType = 1;
|
|
}
|
|
// LSL
|
|
else {
|
|
data.processOpData.arithmLogicData.shiftType = 0;
|
|
}
|
|
break;
|
|
// ROR
|
|
case 'R':
|
|
data.processOpData.arithmLogicData.shiftType = 3;
|
|
break;
|
|
// ASR
|
|
default:
|
|
data.processOpData.arithmLogicData.shiftType = 2;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if (opcode == 'mvn') {
|
|
data.dest = getOperandNumber(operandList[0]);
|
|
data.src1 = ZERO_REGISTER;
|
|
data.src2 = getOperandNumber(operandList[1]);
|
|
data.processOp = 1;
|
|
data.processOpData.arithmLogicData.negShiftedSrc2 = 1;
|
|
if (strlen(operandList) == 3) {
|
|
char *split[] = strtok(operandList[2], ' ');
|
|
switch (split[1][0]) {
|
|
case 'L':
|
|
// LSR
|
|
if (split[1][2] == 'R') {
|
|
data.processOpData.arithmLogicData.shiftType = 1;
|
|
}
|
|
// LSL
|
|
else {
|
|
data.processOpData.arithmLogicData.shiftType = 0;
|
|
}
|
|
break;
|
|
// ROR
|
|
case 'R':
|
|
data.processOpData.arithmLogicData.shiftType = 3;
|
|
break;
|
|
// ASR
|
|
default:
|
|
data.processOpData.arithmLogicData.shiftType = 2;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if (opcode == 'mov') {
|
|
data.dest = getOperandNumber(operandList[0]);
|
|
data.src1 = ZERO_REGISTER;
|
|
data.src2 = getOperandNumber(operandList[1]);
|
|
data.processOp = 1;
|
|
}
|
|
else {
|
|
// handles shifts
|
|
if (strlen(operandList) == 4) {
|
|
char *split[] = strtok(operandList[3], ' ');
|
|
switch (split[1][0]) {
|
|
case 'L':
|
|
// LSR
|
|
if (split[1][2] == 'R') {
|
|
data.processOpData.arithmLogicData.shiftType = 1;
|
|
}
|
|
// LSL
|
|
else {
|
|
data.processOpData.arithmLogicData.shiftType = 0;
|
|
}
|
|
break;
|
|
// ROR
|
|
case 'R':
|
|
data.processOpData.arithmLogicData.shiftType = 3;
|
|
break;
|
|
// ASR
|
|
default:
|
|
data.processOpData.arithmLogicData.shiftType = 2;
|
|
break;
|
|
}
|
|
}
|
|
switch (opcode[0]) {
|
|
// and, ands
|
|
case 'a':
|
|
// ands
|
|
if (strlen(opcode) == 4) {
|
|
data.processOp = 3;
|
|
}
|
|
// and
|
|
else {
|
|
data.processOp = 0;
|
|
}
|
|
break;
|
|
// bic, bics
|
|
case 'b':
|
|
data.processOpData.arithmLogicData.negShiftedSrc2 = 1;
|
|
// bics
|
|
if (strlen(opcode) == 4) {
|
|
data.processOp = 3;
|
|
}
|
|
// bic
|
|
else {
|
|
data.processOp = 0;
|
|
}
|
|
break;
|
|
// orr, orn
|
|
case 'o':
|
|
data.processOp = 1;
|
|
// orn
|
|
if (opcode[2] == 'n') {
|
|
data.processOpData.arithmLogicData.negShiftedSrc2 = 1;
|
|
}
|
|
break;
|
|
// eor, eon
|
|
default:
|
|
data.processOp = 2;
|
|
// eon
|
|
if (opcode[2] == 'n') {
|
|
data.processOpData.arithmLogicData.negShiftedSrc2 = 1;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
instr->type = a64inst_DPIMMEDIATE;
|
|
a64inst_DPImmediateData data = instr->data.DPImmediateData;
|
|
char t = operandList[0][0];
|
|
if (t == 'w') {
|
|
data.regType=0;
|
|
}
|
|
else {
|
|
data.regType=1;
|
|
}
|
|
// arithmetic
|
|
// add, adds
|
|
if (opcode[1] == 'd') {
|
|
data.DPIOpType = 0;
|
|
data.dest = getOperandNumber(operandList[0]);
|
|
data.processOpData.arithmData.src = getOperandNumber(operandList[1]);
|
|
data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]);
|
|
if (opcode[-1] == 's') {
|
|
data.processOpData.arithmData.shiftImmediate = true;
|
|
data.processOp = 1;
|
|
}
|
|
else {
|
|
data.processOpData.arithmData.shiftImmediate = false;
|
|
data.processOp = 0;
|
|
}
|
|
}
|
|
// cmn
|
|
else if (opcode == "cmn") {
|
|
data.DPIOpType = 0;
|
|
data.dest = ZERO_REGISTER;
|
|
data.processOpData.arithmData.src = getOperandNumber(operandList[0]);
|
|
data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]);
|
|
data.processOpData.arithmData.shiftImmediate = true;
|
|
data.processOp = 1;
|
|
}
|
|
// sub, subs
|
|
else if (opcode[0] == 's') {
|
|
data.DPIOpType = 0;
|
|
data.dest = getOperandNumber(operandList[0]);
|
|
data.processOpData.arithmData.src = getOperandNumber(operandList[1]);
|
|
data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]);
|
|
if (opcode[-1] == 's') {
|
|
data.processOpData.arithmData.shiftImmediate = true;
|
|
data.processOp = 3;
|
|
}
|
|
else {
|
|
data.processOpData.arithmData.shiftImmediate = false;
|
|
data.processOp = 2;
|
|
}
|
|
}
|
|
// cmp
|
|
else if (opcode == 'cmp') {
|
|
data.DPIOpType = 0;
|
|
data.dest = ZERO_REGISTER;
|
|
data.processOpData.arithmData.src = getOperandNumber(operandList[0]);
|
|
data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]);
|
|
data.processOpData.arithmData.shiftImmediate = true;
|
|
data.processOp = 3;
|
|
}
|
|
// neg, negs
|
|
else if (opcode[0] == 'n') {
|
|
data.DPIOpType = 0;
|
|
data.dest = getOperandNumber(operandList[1]);
|
|
data.processOpData.arithmData.src = ZERO_REGISTER;
|
|
data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]);
|
|
if (opcode[-1] == 's') {
|
|
data.processOpData.arithmData.shiftImmediate = true;
|
|
data.processOp = 3;
|
|
}
|
|
else {
|
|
data.processOpData.arithmData.shiftImmediate = false;
|
|
data.processOp = 2;
|
|
}
|
|
}
|
|
// wide move
|
|
else {
|
|
data.DPIOpType = 1;
|
|
data.dest = getOperandNumber(operandList[0]);
|
|
switch (opcode[3]) {
|
|
case 'k':
|
|
data.processOp = 3;
|
|
break;
|
|
case 'n':
|
|
data.processOp = 0;
|
|
break;
|
|
case 'z':
|
|
data.processOp = 2;
|
|
break;
|
|
default:
|
|
data.processOp = 1;
|
|
break;
|
|
}
|
|
data.processOpData.wideMovData.immediate = getOperandNumber(operandList[1]);
|
|
if (numOperands == 3){
|
|
data.processOpData.wideMovData.shiftScalar = getOperandNumber(strtok(operandList[2], ' ')[1]);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){
|
|
assert(str != NULL);
|
|
char operandsDupe[strlen(str)+1];
|
|
strcpy(operandsDupe, str);
|
|
char *operand = strtok(operandsDupe, OPERAND_DELIMITER);
|
|
operands[0] = operand;
|
|
|
|
while (operand != NULL){
|
|
*operandCount = *(operandCount)+1;
|
|
operand = strtok(NULL, OPERAND_DELIMITER);
|
|
operands[*(operandCount)] = operand;
|
|
}
|
|
*(numOperands) = *(operandCount)+1;
|
|
}
|
|
|
|
//takes inputted assembly line and returns a
|
|
//pointer to an abstract representation of the instruction
|
|
void parser_instruction(char asmLine[], a64inst_instruction *instr) {
|
|
int numOperands = 0;
|
|
if (instr == NULL){
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
if(strcmp(asmLine, HALT_ASM_CMD) == 0){
|
|
instr->type = a64inst_HALT;
|
|
return;
|
|
}
|
|
|
|
//"opcode operand1, {operand2}, ..."
|
|
//duplicated as strtok modifies the input string
|
|
char stringptr[strlen(asmLine) + 1];
|
|
strcpy(stringptr, asmLine);
|
|
|
|
char *opcode = strtok(stringptr, " ");
|
|
char *operands = strtok(NULL, "");
|
|
|
|
if(strcmp(opcode, ".int") == 0){
|
|
//type is directive
|
|
instr->type = a64inst_DIRECTIVE;
|
|
|
|
} else if(opcode[strlen(opcode)-1]== ':') {
|
|
//type is label
|
|
//add to symbol table
|
|
instr->type = a64inst_LABEL;
|
|
char *opcodeCpy = NULL;
|
|
opcodeCpy = strcpy(opcodeCpy, opcode);
|
|
char *labelData = strtok(opcodeCpy, ":");
|
|
instr->data.LabelData.label = labelData;
|
|
} else {
|
|
//type is instruction
|
|
int operandCount = 0;
|
|
char *operandList[4];
|
|
//generate list of operands
|
|
tokeniseOperands(operands, &operandCount, operandList, &numOperands);
|
|
//categorise instruction type from opcode and operands
|
|
classifyOpcode(opcode, instr, operandList, operandCount);
|
|
//define struct values according to operands and type
|
|
switch(instr->type){
|
|
case a64inst_BRANCH:
|
|
generateBranchOperands(instr, opcode, operandList);
|
|
break;
|
|
case a64inst_SINGLETRANSFER:
|
|
generateLoadStoreOperands(instr, opcode, operandList, numOperands);
|
|
break;
|
|
case a64inst_LOADLITERAL:
|
|
generateLoadStoreOperands(instr, opcode, operandList, numOperands);
|
|
break;
|
|
case a64inst_DPREGISTER:
|
|
//generate DP operands;
|
|
break;
|
|
case a64inst_DPIMMEDIATE:
|
|
//generate DP operands;
|
|
break;
|
|
default:
|
|
printf("INVALID INSTRUCTION");
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Takes an array of strings, each string representing an assembly instruction.
|
|
// Returns an array of a64inst_instruction pointers, each representing an instruction.
|
|
a64inst_instruction *parse(char **asmLines, int lineCount) {
|
|
a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
|
|
|
|
int i = 0;
|
|
while (asmLines[i] != NULL) {
|
|
parser_instruction(asmLines[i], &instructions[i]);
|
|
i++;
|
|
}
|
|
|
|
return instructions;
|
|
}
|