#include #include #include #include #include #include "parser.h" #include "a64instruction/a64instruction.h" #include "tokeniser.c" /** Prototypes */ void parse_instruction(char asmLine[], a64inst_instruction *instr); static char *duplicateString(char *str); void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); /** Constants */ static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"}; static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); int i = 0; while (asmLines[i] != NULL) { parse_instruction(asmLines[i], &instructions[i]); i++; } return instructions; } static char *duplicateString(char *str) { char *newStr = malloc(strlen(str) + 1); strcpy(newStr, str); return newStr; } static bool isStringIn(char *str, const char *arr[], int arrSize) { for (int i = 0; i < arrSize; i++) { if (strcmp(str, arr[i]) == 0) { return true; } } return false; } // If more than one occurance, return the last index static int indexStringIn(char *str, const char *arr[], int arrSize) { for (int i = arrSize - 1; i >= 0; i--) { if (strcmp(str, arr[i]) == 0) { return i; } } return -1; } int isOperandRegister(char regStartChar) { return((regStartChar == 'x') || (regStartChar == 'w')); } int classifyDPInst(char *operandList[]){ return(isOperandRegister(operandList[1][0]) && isOperandRegister(operandList[2][0]) && isOperandRegister(operandList[3][0])); } void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int tokensCount){ if (isStringIn(opcode, BRANCH_OPCODES, 9)) { instr->type = a64inst_BRANCH; if (strcmp(opcode, "br") == 0) { instr->data.BranchData.BranchType = a64inst_REGISTER; } else if (strcmp(opcode, "b") == 0) { instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; } else { instr->data.BranchData.BranchType = a64inst_CONDITIONAL; } } else if (isStringIn(opcode, SINGLE_TRANSFER_OPCODES, 2)) { instr->type = a64inst_SINGLETRANSFER; if (*tokens[2] == '[') { instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0; } else { instr->type = a64inst_LOADLITERAL; } } else if (classifyDPInst(tokens)) { instr->type = a64inst_DPREGISTER; } else { instr->type = a64inst_DPIMMEDIATE; } } void parse_instruction(char asmLine[], a64inst_instruction *instr) { if (instr == NULL){ exit(EXIT_FAILURE); } if(strcmp(asmLine, HALT_ASM_CMD) == 0){ instr->type = a64inst_HALT; return; } char *asmLineCopy = duplicateString(asmLine); int tokensCount = 0; char **tokens = tokenise(asmLineCopy, &tokensCount); char *opcode = tokens[0]; if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; } else if(opcode[strlen(opcode)-1]== ':') { // Label instr->type = a64inst_LABEL; opcode[strlen(opcode) - 1] = '\0'; // Remove the colon instr->data.LabelData.label = opcode; } else { // Instruction classifyOpcode(opcode, instr, tokens, tokensCount); switch(instr->type){ case a64inst_BRANCH: parseBranch(instr, opcode, tokens); break; case a64inst_SINGLETRANSFER: parseSingleTransfer(instr, opcode, tokens, tokensCount); calcluateAddressFormat(instr, tokens, tokensCount); break; case a64inst_LOADLITERAL: parseSingleTransfer(instr, opcode, tokens, tokensCount); break; case a64inst_DPREGISTER: //generate DP operands; parseDPRegister(instr, tokens, tokensCount); break; case a64inst_DPIMMEDIATE: parseDPImmediate(instr, tokens, tokensCount); break; default: printf("Error: Invalid Instruction\n"); break; } } /* TODO: FREE MEMORY! */ } //takes inputted char array and returns the integer of the operand, skipping the first character //e.g. for a passed "R32", it skips the 'R' and returns 32 int getOperandNumber(char *operand){ char operandCpy[strlen(operand)]; strcpy(operandCpy, operand+1); char **endptr = NULL; int number; if(strncmp(operandCpy, "0x", 2)==0){ //hex value strcpy(operandCpy, operand+3); number = strtol(operandCpy, endptr, 16); } else { number = strtol(operandCpy, endptr, 10); } return number; } void calcluateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { assert(*tokens[2] == '['); int operandCount = 0; char **operands = tokeniseOperands(tokens[2], &operandCount); int baseRegister = getOperandNumber(operands[0]); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; if(operands[1][strlen(operands[1])-1] == '!') { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); } else if(operands[1][strlen(operands[1])-1] == ']') { // POST_INDEXED instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); } else if( (isOperandRegister(*operands[0]) == 1) && (isOperandRegister(*operands[1]) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(operandCount > 1){ int offset = getOperandNumber(operands[1]); instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; //NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER } } } static int parseRegisterType(char *operand) { return operand[0] == 'x'; } void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { switch(instr->type){ case a64inst_SINGLETRANSFER: instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); break; case a64inst_LOADLITERAL: instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); if(*tokens[2] =='#'){ //offset is immediate int offset = getOperandNumber(tokens[1]); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { //offset is literal, use symbol table and calculate difference } break; default: break; } } void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) { switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset //use symbol table printf("unconditional"); break; case a64inst_REGISTER: instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[0]); break; case a64inst_CONDITIONAL: { char condition[strlen(opcode)+1]; strcpy(condition, opcode+2); if(strcmp(condition, "eq")==0){ instr->data.BranchData.processOpData.conditionalData.cond = EQ; } else if (strcmp(condition, "ne")==0){ instr->data.BranchData.processOpData.conditionalData.cond = NE; } else if (strcmp(condition, "ge")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GE; } else if (strcmp(condition, "lt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LT; } else if (strcmp(condition, "gt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GT; } else if (strcmp(condition, "le")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LE; } else if (strcmp(condition, "al")==0){ instr->data.BranchData.processOpData.conditionalData.cond = AL; } break; //calculate offset from symbol table. } } } void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) { a64inst_DPImmediateData *data = &inst->data.DPImmediateData; data->dest = getOperandNumber(tokens[1]); data->regType = parseRegisterType(tokens[1]); if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 3)) { data->DPIOpType = a64inst_DPI_WIDEMOV; data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 3); data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); if (tokensCount >= 4) { data->processOpData.wideMovData.shiftScalar = getOperandNumber(tokens[3]); } } else { data->DPIOpType = a64inst_DPI_ARITHM; data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); data->processOpData.arithmData.src = getOperandNumber(tokens[2]); data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]); if (tokensCount >= 5) { int numTokens = 0; char **shiftOperands = tokenise(tokens[4], &numTokens); int shiftAmount = getOperandNumber(shiftOperands[1]); if (shiftAmount > 0) { data->processOpData.arithmData.shiftImmediate = true; } } } } void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) { a64inst_DPRegisterData *data = &inst->data.DPRegisterData; data->dest = getOperandNumber(tokens[1]); data->regType = parseRegisterType(tokens[1]); data->src1 = getOperandNumber(tokens[2]); data->src2 = getOperandNumber(tokens[3]); if (isStringIn(tokens[0], MULTIPLY_OPCODES, 4)) { // Multiply data->DPROpType = a64inst_DPR_MULTIPLY; if (tokensCount >= 5) { data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]); data->processOpData.multiplydata.negProd = strcmp(tokens[4], "mneg") == 0; } } else { // Arithmetic/Logic data->DPROpType = a64inst_DPR_ARITHMLOGIC; if (isStringIn(tokens[0], ARITHMETIC_OPCODES, 4)) { // Arithmetic data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); data->processOpData.arithmLogicData.type = 1; if(tokensCount == 5) { //has a shift int numTokens = 0; char **shiftOperands = tokenise(tokens[4], &numTokens); data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]); } } else { // Logic } } }