From 995c6d02fa8e2023623bf4dd50debcd036a7775c Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 17:24:09 +0100 Subject: [PATCH] Rewrite the parser for better structure, Add DPI parsing --- src/parser.c | 421 ++++++++++++++++++++++++++++----------------------- 1 file changed, 233 insertions(+), 188 deletions(-) diff --git a/src/parser.c b/src/parser.c index 8696230..793bb68 100644 --- a/src/parser.c +++ b/src/parser.c @@ -5,18 +5,159 @@ #include #include "parser.h" #include "a64instruction/a64instruction.h" +#include "tokeniser.c" -//takes input string, read from asm file and returns -//input as an a64 instruction +/** Prototypes */ +void parse_instruction(char asmLine[], a64inst_instruction *instr); +static char *duplicateString(char *str); +void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); +void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); +void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); +void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); + +/** Constants */ +static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; +static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"}; +static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; +static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; +static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; + +a64inst_instruction *parse(char **asmLines, int lineCount) { + a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); + + int i = 0; + while (asmLines[i] != NULL) { + parse_instruction(asmLines[i], &instructions[i]); + i++; + } + + return instructions; +} + +static char *duplicateString(char *str) { + char *newStr = malloc(strlen(str) + 1); + strcpy(newStr, str); + return newStr; +} + +static bool isStringIn(char *str, const char *arr[], int arrSize) { + for (int i = 0; i < arrSize; i++) { + if (strcmp(str, arr[i]) == 0) { + return true; + } + } + return false; +} + +// If more than one occurance, return the last index +static int indexStringIn(char *str, const char *arr[], int arrSize) { + for (int i = arrSize - 1; i >= 0; i--) { + if (strcmp(str, arr[i]) == 0) { + return i; + } + } + return -1; +} + +int isOperandRegister(char regStartChar) { + return((regStartChar == 'x') || (regStartChar == 'w')); +} + +int classifyDPInst(char *operandList[]){ + return(isOperandRegister(operandList[1][0]) && + isOperandRegister(operandList[2][0]) && + isOperandRegister(operandList[3][0])); +} + +void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int tokensCount){ + + if (isStringIn(opcode, BRANCH_OPCODES, 9)) { + instr->type = a64inst_BRANCH; + + if (strcmp(opcode, "br") == 0) { + instr->data.BranchData.BranchType = a64inst_REGISTER; + } else if (strcmp(opcode, "b") == 0) { + instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; + } else { + instr->data.BranchData.BranchType = a64inst_CONDITIONAL; + } + + } else if (isStringIn(opcode, SINGLE_TRANSFER_OPCODES, 2)) { + instr->type = a64inst_SINGLETRANSFER; + if (*tokens[2] == '[') { + instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0; + + } else { + instr->type = a64inst_LOADLITERAL; + } + } else if (classifyDPInst(tokens)) { + instr->type = a64inst_DPREGISTER; + } else { + instr->type = a64inst_DPIMMEDIATE; + } + +} + + +void parse_instruction(char asmLine[], a64inst_instruction *instr) { + if (instr == NULL){ + exit(EXIT_FAILURE); + } + + if(strcmp(asmLine, HALT_ASM_CMD) == 0){ + instr->type = a64inst_HALT; + return; + } + + char *asmLineCopy = duplicateString(asmLine); + int tokensCount = 0; + char **tokens = tokenise(asmLineCopy, &tokensCount); + char *opcode = tokens[0]; + + if(strcmp(opcode, ".int") == 0){ + // Directive + instr->type = a64inst_DIRECTIVE; + + } else if(opcode[strlen(opcode)-1]== ':') { + // Label + instr->type = a64inst_LABEL; + opcode[strlen(opcode) - 1] = '\0'; // Remove the colon + instr->data.LabelData.label = opcode; + + } else { + // Instruction + classifyOpcode(opcode, instr, tokens, tokensCount); + + switch(instr->type){ + case a64inst_BRANCH: + parseBranch(instr, opcode, tokens); + break; + + case a64inst_SINGLETRANSFER: + parseSingleTransfer(instr, opcode, tokens, tokensCount); + calcluateAddressFormat(instr, tokens, tokensCount); + break; + case a64inst_LOADLITERAL: + parseSingleTransfer(instr, opcode, tokens, tokensCount); + break; + case a64inst_DPREGISTER: + //generate DP operands; + break; + case a64inst_DPIMMEDIATE: + parseDPImmediate(instr, tokens, tokensCount); + break; + default: + printf("Error: Invalid Instruction\n"); + break; + } + + } + + /* TODO: FREE MEMORY! */ + +} -//TODO: -// - use string matching to get opcode, and operands (DONE) -// - check operand count (DONE) -// - match opcode to a64 struct types (DONE) -// - count operands and match type/values (DONE) -// - generate final a64inst and return (TODO: DP instrs) -// - ASK ABOUT OFFSET CALCULATION -// - CREATE FUNC TO TIDY UP OPERANDS IN DP //takes inputted char array and returns the integer of the operand, skipping the first character //e.g. for a passed "R32", it skips the 'R' and returns 32 @@ -28,64 +169,61 @@ int getOperandNumber(char *operand){ return number; } -int isOperandRegister(char regStartChar){ - return((regStartChar == 'x') || (regStartChar == 'w')); -} -//calculate offsets from string -void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ - char baseRegParam[strlen(operandList[1])]; - strcpy(baseRegParam, operandList[1]); - char *startptr = &baseRegParam[1]; - int base = getOperandNumber(startptr); - instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; +void calcluateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { + assert(*tokens[2] == '['); - if(operandList[2][strlen(operandList[2])-2] == '!'){ + int operandCount = 0; + char **operands = tokeniseOperands(tokens[2], &operandCount); + + int baseRegister = getOperandNumber(operands[0]); + + instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; + + if(operands[1][strlen(operands[1])-1] == '!') { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operandList[2]); - } else if(operandList[1][strlen(operandList[1])-2] == ']') { - //post-indexed - char immOffset[strlen(operandList[2])+1]; - strcpy(immOffset, operandList[2]); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); + + } else if(operands[1][strlen(operands[1])-1] == ']') { + // POST_INDEXED instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(immOffset); - } else if( (isOperandRegister(operandList[1][0]) == 1) - || (isOperandRegister(operandList[2][0]) == 1)){ + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); + + } else if( (isOperandRegister(*operands[0]) == 1) + && (isOperandRegister(*operands[1]) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operandList[2]); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); + } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; - if(numOperands==3){ - int offset = getOperandNumber(operandList[2]); + if(operandCount > 1){ + int offset = getOperandNumber(operands[1]); instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; //NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER } } } -void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){ +static int parseRegisterType(char *operand) { + return operand[0] == 'x'; +} + +void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { + switch(instr->type){ - case a64inst_SINGLETRANSFER: { - if(operandList[0][0] == 'x'){ - //x-register - instr->data.SingleTransferData.regType = 1; - } else { - instr->data.SingleTransferData.regType = 0; - } - instr->data.SingleTransferData.target = getOperandNumber(operandList[0]); + case a64inst_SINGLETRANSFER: + instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); break; - } + case a64inst_LOADLITERAL: - if(operandList[0][0] == 'x') { - instr->data.SingleTransferData.regType = 1; - } else { - instr->data.SingleTransferData.regType = 0; - } - instr->data.SingleTransferData.target = getOperandNumber(operandList[0]); - if(operandList[1][0] =='#'){ + instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); + + if(*tokens[2] =='#'){ //offset is immediate - int offset = getOperandNumber(operandList[1]); + int offset = getOperandNumber(tokens[1]); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { //offset is literal, use symbol table and calculate difference @@ -97,7 +235,7 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o } } -void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ +void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) { switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset @@ -132,155 +270,62 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper } } -int classifyDPInst(char *operandList[]){ - return(isOperandRegister(operandList[0][0]) && - isOperandRegister(operandList[1][0]) && - isOperandRegister(operandList[2][0])); -} +void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) { + a64inst_DPImmediateData *data = &inst->data.DPImmediateData; + data->dest = getOperandNumber(tokens[1]); + data->regType = parseRegisterType(tokens[1]); -void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){ - int isUnconditional = strcmp(opcode, "b"); - int isRegister = strcmp(opcode, "br"); - int isLoad = strcmp(opcode, "ldr"); - int isStore = strcmp(opcode, "str"); - - if(isUnconditional == 0 || - isRegister == 0 || - strncmp(opcode, "b.", 2) == 0){ - instr->type = a64inst_BRANCH; - if(isRegister == 0){ - instr->data.BranchData.BranchType = a64inst_REGISTER; - } else if (isUnconditional == 0){ - instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; - } else { - instr->data.BranchData.BranchType = a64inst_CONDITIONAL; - } - } else if(isLoad == 0 || isStore == 0){ - //loading/storing instruction; classify operands - if( operandList[1][0] == '['){ - //type is register - instr->type = a64inst_SINGLETRANSFER; - instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; - if(isLoad == 0){ - instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; - } else { - instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; - } - } else { - instr->type = a64inst_LOADLITERAL; + if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 3)) { + data->DPIOpType = a64inst_DPI_WIDEMOV; + data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 3); + data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); + if (tokensCount >= 3) { + data->processOpData.wideMovData.shiftScalar = getOperandNumber(tokens[3]); } } else { - if(classifyDPInst(operandList)){ - instr->type = a64inst_DPREGISTER; - } else { - instr->type = a64inst_DPIMMEDIATE; + data->DPIOpType = a64inst_DPI_ARITHM; + data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOpData.arithmData.src = getOperandNumber(tokens[2]); + data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]); + + if (tokensCount >= 5) { + int shiftAmount = getOperandNumber(tokens[4]); + if (shiftAmount > 0) { + data->processOpData.arithmData.shiftImmediate = true; + } } } } -void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){ - assert(str != NULL); - char *operandsDupe = malloc(strlen(str)+1); - assert(operandsDupe != NULL); - strcpy(operandsDupe, str); - char *operand = strtok(operandsDupe, OPERAND_DELIMITER); - operands[0] = operand; +void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) { + a64inst_DPRegisterData *data = &inst->data.DPRegisterData; + data->dest = getOperandNumber(tokens[1]); + data->regType = parseRegisterType(tokens[1]); + data->src1 = getOperandNumber(tokens[2]); + data->src2 = getOperandNumber(tokens[3]); - while (operand != NULL){ - *operandCount = *(operandCount)+1; - operand = strtok(NULL, OPERAND_DELIMITER); - operands[*(operandCount)] = operand; - } - *(numOperands) = *(operandCount); -} + if (isStringIn(tokens[0], MULTIPLY_OPCODES, 4)) { + // Multiply + data->DPROpType = a64inst_DPR_MULTIPLY; + if (tokensCount >= 5) { + data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]); + data->processOpData.multiplydata.negProd = strcmp(tokens[4], "mneg") == 0; + } -//takes inputted assembly line and returns a -//pointer to an abstract representation of the instruction -void parser_instruction(char asmLine[], a64inst_instruction *instr) { - printf("%s", asmLine); - int numOperands = 0; - if (instr == NULL){ - exit(EXIT_FAILURE); - } - - if(strcmp(asmLine, HALT_ASM_CMD) == 0){ - instr->type = a64inst_HALT; - return; - } - - //"opcode operand1, {operand2}, ..." - //duplicated as strtok modifies the input string - - char stringptr[strlen(asmLine) + 1]; - strcpy(stringptr, asmLine); - char *token; - token = strtok(stringptr, " "); - char opcode[strlen(token)+1]; - strcpy(opcode, token); - token = strtok(NULL, ""); - char operands[strlen(token)+1]; - strcpy(operands, token); - - if(strcmp(opcode, ".int") == 0){ - //type is directive - instr->type = a64inst_DIRECTIVE; - - } else if(opcode[strlen(opcode)-1]== ':') { - //type is label - //add to symbol table - instr->type = a64inst_LABEL; - char opcodeCpy[strlen(opcode)+1]; - strcpy(opcodeCpy, opcode); - char *labelData = strtok(opcodeCpy, ":"); - instr->data.LabelData.label = labelData; } else { - //type is instruction - int operandCount = 0; - char *operandList[5]; - //generate list of operands - tokeniseOperands(operands, &operandCount, operandList, &numOperands); - //categorise instruction type from opcode and operands - classifyOpcode(opcode, instr, operandList, operandCount); - //define struct values according to operands and type - printf("got to here"); - switch(instr->type){ - case a64inst_BRANCH: - generateBranchOperands(instr, opcode, operandList); - break; - case a64inst_SINGLETRANSFER: - generateLoadStoreOperands(instr, opcode, operandList, numOperands); - calcluateAddressFormat(instr, operandList, numOperands); - break; - case a64inst_LOADLITERAL: - generateLoadStoreOperands(instr, opcode, operandList, numOperands); - break; - case a64inst_DPREGISTER: - //generate DP operands; - break; - case a64inst_DPIMMEDIATE: - //generate DP operands; - break; - default: - printf("INVALID INSTRUCTION"); - break; - } + // Arithmetic/Logic + data->DPROpType = a64inst_DPR_ARITHMLOGIC; + if (isStringIn(tokens[0], ARITHMETIC_OPCODES, 4)) { + // Arithmetic + data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOpData.arithmLogicData.type = 1; + + } else { + // Logic + + } } - -} - -// Takes an array of strings, each string representing an assembly instruction. -// Returns an array of a64inst_instruction pointers, each representing an instruction. -a64inst_instruction *parse(char **asmLines, int lineCount) { - a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); - - int i = 0; - while (asmLines[i] != NULL) { - parser_instruction(asmLines[i], &instructions[i]); - i++; - } - - return instructions; }