#include #include #include #include #include #include "parser.h" #include "a64instruction/a64instruction.h" //takes input string, read from asm file and returns //input as an a64 instruction //TODO: // - use string matching to get opcode, and operands (DONE) // - check operand count (DONE) // - match opcode to a64 struct types (DONE) // - count operands and match type/values (DONE) // - generate final a64inst and return (TODO: DP instrs) // - ASK ABOUT OFFSET CALCULATION // - CREATE FUNC TO TIDY UP OPERANDS IN DP int getOperandNumber(char *operand){ char *operandCpy = strcpy(operandCpy, operand); operandCpy++; char **endptr; int number = strtol(operandCpy, endptr, 10); return number; } int isOperandRegister(char *operand){ return((strcmp(&(operand[0]), "x")==0) || (strcmp(&(operand[0]), "w")==0)); } //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ char *endptr; uint8_t base = strtol(&(operandList[1][2]), &endptr, 10); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; if(strcmp(&(operandList[2][strlen(operandList[1])-1]), "!")==0){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); } else if(strcmp(&(operandList[1][strlen(operandList[0])-1]), "]") == 0) { //post-indexed instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); } else if( (isOperandRegister(&(operandList[2][0])) == 1) || (isOperandRegister(&(operandList[2][0])) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10); } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(numOperands==3){ int offset = strtol(&(operandList[2][1]), &endptr, 10); if(instr->data.SingleTransferData.regType == 1){ instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4; } } } } void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){ switch(instr->type){ case a64inst_SINGLETRANSFER: if(strcmp(&(operandList[0][0]), "x")==0){ //x-register instr->data.SingleTransferData.regType = 1; } else { instr->data.SingleTransferData.regType = 0; } char *endptr; instr->data.SingleTransferData.target = strtol(&(operandList[0][0])+1, &endptr, 10); calcluateAddressFormat(instr, operandList, numOperands); break; case a64inst_LOADLITERAL: break; default: break; } } void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ char *endptr; switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset //use symbol table printf("unconditional"); break; case a64inst_REGISTER: instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, &endptr, 10); break; case a64inst_CONDITIONAL: { char *condition = NULL; condition = strcpy(condition, opcode); condition += 2; if(strcmp(condition, "eq")==0){ instr->data.BranchData.processOpData.conditionalData.cond = EQ; } else if (strcmp(condition, "ne")==0){ instr->data.BranchData.processOpData.conditionalData.cond = NE; } else if (strcmp(condition, "ge")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GE; } else if (strcmp(condition, "lt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LT; } else if (strcmp(condition, "gt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GT; } else if (strcmp(condition, "le")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LE; } else if (strcmp(condition, "al")==0){ instr->data.BranchData.processOpData.conditionalData.cond = AL; } break; //calculate offset from symbol table. } } } int classifyDPInst(char *operandList[]){ return(isOperandRegister(operandList[0]) && isOperandRegister(operandList[1]) && isOperandRegister(operandList[2])); } void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){ int isUnconditional = strcmp(opcode, "b"); int isRegister = strcmp(opcode, "br"); int isLoad = strcmp(opcode, "ldr"); int isStore = strcmp(opcode, "str"); if(isUnconditional == 0 || isRegister == 0 || strncmp(opcode, "b.", 2) == 0){ instr->type = a64inst_BRANCH; if(isUnconditional){ instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; } else if (isRegister){ instr->data.BranchData.BranchType = a64inst_REGISTER; } else { instr->data.BranchData.BranchType = a64inst_CONDITIONAL; } generateBranchOperands(instr, opcode, operandList); } else if(isLoad == 0 || isStore == 0){ //loading/storing instruction; classify operands char *address = operandList[1]; if( *address == '['){ //type is register instr->type = a64inst_SINGLETRANSFER; instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; if(isLoad == 0){ instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; } } else { instr->type = a64inst_LOADLITERAL; if(operandList[0][0] =='#'){ //offset is immediate char *immOffset = NULL; immOffset = strcpy(immOffset, operandList[0]); immOffset++; char *endptr = NULL; int offset = strtol(immOffset, &endptr, 10); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { //offset is literal, use symbol table and calculate difference } } generateLoadStoreOperands(instr, opcode, operandList, numOperands); } else { if(classifyDPInst(operandList)){ instr->type = a64inst_DPREGISTER; } else { instr->type = a64inst_DPIMMEDIATE; a64inst_DPImmediateData data = instr->data.DPImmediateData; char t = operandList[0][0]; int reg = getOperandNumber(operandList[0]); data.dest=reg; if (t == 'w') { data.regType=0; } else { data.regType=1; } // arithmetic // add, adds if (opcode[1] == 'd') { data.DPIOpType = 0; data.processOpData.arithmData.src = getOperandNumber(operandList[1]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); if (opcode[-1] == 's') { data.processOpData.arithmData.shiftImmediate = true; data.processOp = 1; } else { data.processOpData.arithmData.shiftImmediate = false; data.processOp = 0; } } // cmn else if (opcode == "cmn") { data.DPIOpType = 0; data.processOpData.arithmData.src = ZERO_REGISTER; data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); data.processOpData.arithmData.shiftImmediate = true; data.processOp = 1; } // sub, subs else if (opcode[0] == 's') { data.DPIOpType = 0; data.processOpData.arithmData.src = getOperandNumber(operandList[1]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); if (opcode[-1] == 's') { data.processOpData.arithmData.shiftImmediate = true; data.processOp = 3; } else { data.processOpData.arithmData.shiftImmediate = false; data.processOp = 2; } } // cmp else if (opcode == 'cmp') { data.DPIOpType = 0; data.processOpData.arithmData.src = ZERO_REGISTER; data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); data.processOpData.arithmData.shiftImmediate = true; data.processOp = 3; } // wide move } } } void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){ assert(str != NULL); char operandsDupe[strlen(str)+1]; strcpy(operandsDupe, str); char *operand = strtok(operandsDupe, OPERAND_DELIMITER); operands[0] = operand; while (operand != NULL){ *operandCount = *(operandCount)+1; operand = strtok(NULL, OPERAND_DELIMITER); operands[*(operandCount)] = operand; } *(numOperands) = *(operandCount)+1; } //takes inputted assembly line and returns a //pointer to an abstract representation of the instruction void parser_instruction(char asmLine[], a64inst_instruction *instr) { int numOperands = 0; if (instr == NULL){ exit(EXIT_FAILURE); } if(strcmp(asmLine, HALT_ASM_CMD) == 0){ instr->type = a64inst_HALT; return; } //"opcode operand1, {operand2}, ..." //duplicated as strtok modifies the input string char stringptr[strlen(asmLine) + 1]; strcpy(stringptr, asmLine); char *opcode = strtok(stringptr, " "); char *operands = strtok(NULL, ""); if(strcmp(opcode, ".int") == 0){ //type is directive instr->type = a64inst_DIRECTIVE; } else if(opcode[strlen(opcode)-1]== ':') { //type is label //add to symbol table instr->type = a64inst_LABEL; char *opcodeCpy = NULL; opcodeCpy = strcpy(opcodeCpy, opcode); char *labelData = strtok(opcodeCpy, ":"); instr->data.LabelData.label = labelData; } else { //type is instruction int operandCount = 0; char *operandList[4]; //generate list of operands tokeniseOperands(operands, &operandCount, operandList, &numOperands); //categorise instruction type from opcode and operands classifyOpcode(opcode, instr, operandList, operandCount); //define struct values according to operands and type switch(instr->type){ case a64inst_BRANCH: generateBranchOperands(instr, opcode, operandList); break; case a64inst_SINGLETRANSFER: generateLoadStoreOperands(instr, opcode, operandList, numOperands); break; case a64inst_LOADLITERAL: generateLoadStoreOperands(instr, opcode, operandList, numOperands); break; case a64inst_DPREGISTER: //generate DP operands; break; case a64inst_DPIMMEDIATE: //generate DP operands; break; default: printf("INVALID INSTRUCTION"); break; } } } // Takes an array of strings, each string representing an assembly instruction. // Returns an array of a64inst_instruction pointers, each representing an instruction. a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); int i = 0; while (asmLines[i] != NULL) { parser_instruction(asmLines[i], &instructions[i]); i++; } return instructions; }