#include #include #include #include #include #include "parser.h" #include "a64instruction/a64instruction.h" //takes input string, read from asm file and returns //input as an a64 instruction //TODO: // - use string matching to get opcode, and operands (DONE) // - check operand count (DONE) // - match opcode to a64 struct types (DONE) // - count operands and match type/values (DONE) // - generate final a64inst and return (TODO: DP instrs) // - ASK ABOUT OFFSET CALCULATION // - CREATE FUNC TO TIDY UP OPERANDS IN DP //takes inputted char array and returns the integer of the operand, skipping the first character //e.g. for a passed "R32", it skips the 'R' and returns 32 int getOperandNumber(char *operand){ char operandCpy[strlen(operand)]; strcpy(operandCpy, operand+1); char **endptr = NULL; int number = strtol(operandCpy, endptr, 10); return number; } int isOperandRegister(char regStartChar){ return((regStartChar == 'x') || (regStartChar == 'w')); } //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ char *endptr; char baseRegParam[strlen(operandList[1])]; strcpy(baseRegParam, operandList[1]); char *startptr = &baseRegParam[1]; int base = getOperandNumber(startptr); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; if(operandList[2][strlen(operandList[2])-1] == '!'){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); } else if(operandList[1][strlen(operandList[1])-1] == ']') { //post-indexed instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); } else if( (isOperandRegister(operandList[1][0]) == 1) || (isOperandRegister(operandList[2][0]) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10); } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(numOperands==3){ int offset = strtol(&(operandList[2][1]), &endptr, 10); if(instr->data.SingleTransferData.regType == 1){ instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4; } } } } void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){ switch(instr->type){ case a64inst_SINGLETRANSFER: { if(operandList[0][0] == 'x'){ //x-register instr->data.SingleTransferData.regType = 1; } else { instr->data.SingleTransferData.regType = 0; } instr->data.SingleTransferData.target = getOperandNumber(operandList[0]); break; } case a64inst_LOADLITERAL: break; default: break; } } void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ char *endptr; switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset //use symbol table printf("unconditional"); break; case a64inst_REGISTER: instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, &endptr, 10); break; case a64inst_CONDITIONAL: { char condition[strlen(opcode)+1]; strcpy(condition, opcode+2); if(strcmp(condition, "eq")==0){ instr->data.BranchData.processOpData.conditionalData.cond = EQ; } else if (strcmp(condition, "ne")==0){ instr->data.BranchData.processOpData.conditionalData.cond = NE; } else if (strcmp(condition, "ge")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GE; } else if (strcmp(condition, "lt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LT; } else if (strcmp(condition, "gt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GT; } else if (strcmp(condition, "le")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LE; } else if (strcmp(condition, "al")==0){ instr->data.BranchData.processOpData.conditionalData.cond = AL; } break; //calculate offset from symbol table. } } } int classifyDPInst(char *operandList[]){ return(isOperandRegister(operandList[0][0]) && isOperandRegister(operandList[1][0]) && isOperandRegister(operandList[2][0])); } void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){ int isUnconditional = strcmp(opcode, "b"); int isRegister = strcmp(opcode, "br"); int isLoad = strcmp(opcode, "ldr"); int isStore = strcmp(opcode, "str"); if(isUnconditional == 0 || isRegister == 0 || strncmp(opcode, "b.", 2) == 0){ instr->type = a64inst_BRANCH; if(isUnconditional){ instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; } else if (isRegister){ instr->data.BranchData.BranchType = a64inst_REGISTER; } else { instr->data.BranchData.BranchType = a64inst_CONDITIONAL; } } else if(isLoad == 0 || isStore == 0){ //loading/storing instruction; classify operands if( operandList[1][0] == '['){ //type is register instr->type = a64inst_SINGLETRANSFER; instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; if(isLoad == 0){ instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; } } else { instr->type = a64inst_LOADLITERAL; if(operandList[0][0] =='#'){ //offset is immediate int offset = getOperandNumber(operandList[0]); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { //offset is literal, use symbol table and calculate difference } } } else { if(classifyDPInst(operandList)){ instr->type = a64inst_DPREGISTER; } else { instr->type = a64inst_DPIMMEDIATE; } } } void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){ assert(str != NULL); char *operandsDupe = malloc(strlen(str)+1); assert(operandsDupe != NULL); strcpy(operandsDupe, str); char *operand = strtok(operandsDupe, OPERAND_DELIMITER); operands[0] = operand; while (operand != NULL){ *operandCount = *(operandCount)+1; operand = strtok(NULL, OPERAND_DELIMITER); operands[*(operandCount)] = operand; } *(numOperands) = *(operandCount)+1; } //takes inputted assembly line and returns a //pointer to an abstract representation of the instruction void parser_instruction(char asmLine[], a64inst_instruction *instr) { printf("%s", asmLine); int numOperands = 0; if (instr == NULL){ exit(EXIT_FAILURE); } if(strcmp(asmLine, HALT_ASM_CMD) == 0){ instr->type = a64inst_HALT; return; } //"opcode operand1, {operand2}, ..." //duplicated as strtok modifies the input string char stringptr[strlen(asmLine) + 1]; strcpy(stringptr, asmLine); char *token; token = strtok(stringptr, " "); char opcode[strlen(token)+1]; strcpy(opcode, token); token = strtok(NULL, ""); char operands[strlen(token)+1]; strcpy(operands, token); if(strcmp(opcode, ".int") == 0){ //type is directive instr->type = a64inst_DIRECTIVE; } else if(opcode[strlen(opcode)-1]== ':') { //type is label //add to symbol table instr->type = a64inst_LABEL; char opcodeCpy[strlen(opcode)+1]; strcpy(opcodeCpy, opcode); char *labelData = strtok(opcodeCpy, ":"); instr->data.LabelData.label = labelData; } else { //type is instruction int operandCount = 0; char *operandList[5]; //generate list of operands tokeniseOperands(operands, &operandCount, operandList, &numOperands); //categorise instruction type from opcode and operands classifyOpcode(opcode, instr, operandList, operandCount); //define struct values according to operands and type printf("got to here"); switch(instr->type){ case a64inst_BRANCH: generateBranchOperands(instr, opcode, operandList); break; case a64inst_SINGLETRANSFER: generateLoadStoreOperands(instr, opcode, operandList, numOperands); calcluateAddressFormat(instr, operandList, numOperands); break; case a64inst_LOADLITERAL: generateLoadStoreOperands(instr, opcode, operandList, numOperands); break; case a64inst_DPREGISTER: //generate DP operands; break; case a64inst_DPIMMEDIATE: //generate DP operands; break; default: printf("INVALID INSTRUCTION"); break; } } } // Takes an array of strings, each string representing an assembly instruction. // Returns an array of a64inst_instruction pointers, each representing an instruction. a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); int i = 0; while (asmLines[i] != NULL) { parser_instruction(asmLines[i], &instructions[i]); i++; } return instructions; }