ARMv8/src/parser.c
2024-06-13 19:25:22 +01:00

351 lines
13 KiB
C

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "parser.h"
#include "a64instruction/a64instruction.h"
#include "tokeniser.c"
/** Prototypes */
void parse_instruction(char asmLine[], a64inst_instruction *instr);
static char *duplicateString(char *str);
void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands);
void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]);
void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands);
void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount);
void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount);
/** Constants */
static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"};
static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"};
static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"};
static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"};
static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"};
static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"};
a64inst_instruction *parse(char **asmLines, int lineCount) {
a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
int i = 0;
while (asmLines[i] != NULL) {
parse_instruction(asmLines[i], &instructions[i]);
i++;
}
return instructions;
}
static char *duplicateString(char *str) {
char *newStr = malloc(strlen(str) + 1);
strcpy(newStr, str);
return newStr;
}
static bool isStringIn(char *str, const char *arr[], int arrSize) {
for (int i = 0; i < arrSize; i++) {
if (strcmp(str, arr[i]) == 0) {
return true;
}
}
return false;
}
// If more than one occurance, return the last index
static int indexStringIn(char *str, const char *arr[], int arrSize) {
for (int i = arrSize - 1; i >= 0; i--) {
if (strcmp(str, arr[i]) == 0) {
return i;
}
}
return -1;
}
int isOperandRegister(char regStartChar) {
return((regStartChar == 'x') || (regStartChar == 'w'));
}
int classifyDPInst(char *operandList[]){
return(isOperandRegister(operandList[1][0]) &&
isOperandRegister(operandList[2][0]) &&
isOperandRegister(operandList[3][0]));
}
void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int tokensCount){
if (isStringIn(opcode, BRANCH_OPCODES, 9)) {
instr->type = a64inst_BRANCH;
if (strcmp(opcode, "br") == 0) {
instr->data.BranchData.BranchType = a64inst_REGISTER;
} else if (strcmp(opcode, "b") == 0) {
instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
} else {
instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
}
} else if (isStringIn(opcode, SINGLE_TRANSFER_OPCODES, 2)) {
instr->type = a64inst_SINGLETRANSFER;
if (*tokens[2] == '[') {
instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0;
} else {
instr->type = a64inst_LOADLITERAL;
}
} else if (classifyDPInst(tokens)) {
instr->type = a64inst_DPREGISTER;
} else {
instr->type = a64inst_DPIMMEDIATE;
}
}
void parse_instruction(char asmLine[], a64inst_instruction *instr) {
if (instr == NULL){
exit(EXIT_FAILURE);
}
if(strcmp(asmLine, HALT_ASM_CMD) == 0){
instr->type = a64inst_HALT;
return;
}
char *asmLineCopy = duplicateString(asmLine);
int tokensCount = 0;
char **tokens = tokenise(asmLineCopy, &tokensCount);
char *opcode = tokens[0];
if(strcmp(opcode, ".int") == 0){
// Directive
instr->type = a64inst_DIRECTIVE;
} else if(opcode[strlen(opcode)-1]== ':') {
// Label
instr->type = a64inst_LABEL;
opcode[strlen(opcode) - 1] = '\0'; // Remove the colon
instr->data.LabelData.label = opcode;
} else {
// Instruction
classifyOpcode(opcode, instr, tokens, tokensCount);
switch(instr->type){
case a64inst_BRANCH:
parseBranch(instr, opcode, tokens);
break;
case a64inst_SINGLETRANSFER:
parseSingleTransfer(instr, opcode, tokens, tokensCount);
calcluateAddressFormat(instr, tokens, tokensCount);
break;
case a64inst_LOADLITERAL:
parseSingleTransfer(instr, opcode, tokens, tokensCount);
break;
case a64inst_DPREGISTER:
//generate DP operands;
parseDPRegister(instr, tokens, tokensCount);
break;
case a64inst_DPIMMEDIATE:
parseDPImmediate(instr, tokens, tokensCount);
break;
default:
printf("Error: Invalid Instruction\n");
break;
}
}
/* TODO: FREE MEMORY! */
}
//takes inputted char array and returns the integer of the operand, skipping the first character
//e.g. for a passed "R32", it skips the 'R' and returns 32
int getOperandNumber(char *operand){
char operandCpy[strlen(operand)];
strcpy(operandCpy, operand+1);
char **endptr = NULL;
int number;
if(strncmp(operandCpy, "0x", 2)==0){
//hex value
strcpy(operandCpy, operand+3);
number = strtol(operandCpy, endptr, 16);
} else {
number = strtol(operandCpy, endptr, 10);
}
return number;
}
void calcluateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) {
assert(*tokens[2] == '[');
int operandCount = 0;
char **operands = tokeniseOperands(tokens[2], &operandCount);
int baseRegister = getOperandNumber(operands[0]);
instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister;
if(operands[1][strlen(operands[1])-1] == '!') {
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]);
} else if(operands[1][strlen(operands[1])-1] == ']') {
// POST_INDEXED
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]);
} else if( (isOperandRegister(*operands[0]) == 1)
&& (isOperandRegister(*operands[1]) == 1)){
//register
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]);
} else {
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
if(operandCount > 1){
int offset = getOperandNumber(operands[1]);
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
//NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER
}
}
}
static int parseRegisterType(char *operand) {
return operand[0] == 'x';
}
void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) {
switch(instr->type){
case a64inst_SINGLETRANSFER:
instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]);
instr->data.SingleTransferData.target = getOperandNumber(tokens[1]);
break;
case a64inst_LOADLITERAL:
instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]);
instr->data.SingleTransferData.target = getOperandNumber(tokens[1]);
if(*tokens[2] =='#'){
//offset is immediate
int offset = getOperandNumber(tokens[1]);
instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset;
} else {
//offset is literal, use symbol table and calculate difference
}
break;
default:
break;
}
}
void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) {
switch(instr->data.BranchData.BranchType){
case a64inst_UNCONDITIONAL:
//define and sign extend immediate offset
//use symbol table
printf("unconditional");
break;
case a64inst_REGISTER:
instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[0]);
break;
case a64inst_CONDITIONAL:
{
char condition[strlen(opcode)+1];
strcpy(condition, opcode+2);
if(strcmp(condition, "eq")==0){
instr->data.BranchData.processOpData.conditionalData.cond = EQ;
} else if (strcmp(condition, "ne")==0){
instr->data.BranchData.processOpData.conditionalData.cond = NE;
} else if (strcmp(condition, "ge")==0){
instr->data.BranchData.processOpData.conditionalData.cond = GE;
} else if (strcmp(condition, "lt")==0){
instr->data.BranchData.processOpData.conditionalData.cond = LT;
} else if (strcmp(condition, "gt")==0){
instr->data.BranchData.processOpData.conditionalData.cond = GT;
} else if (strcmp(condition, "le")==0){
instr->data.BranchData.processOpData.conditionalData.cond = LE;
} else if (strcmp(condition, "al")==0){
instr->data.BranchData.processOpData.conditionalData.cond = AL;
}
break;
//calculate offset from symbol table.
}
}
}
void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) {
a64inst_DPImmediateData *data = &inst->data.DPImmediateData;
data->dest = getOperandNumber(tokens[1]);
data->regType = parseRegisterType(tokens[1]);
if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 3)) {
data->DPIOpType = a64inst_DPI_WIDEMOV;
data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 3);
data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]);
if (tokensCount >= 4) {
data->processOpData.wideMovData.shiftScalar = getOperandNumber(tokens[3]);
}
} else {
data->DPIOpType = a64inst_DPI_ARITHM;
data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4);
data->processOpData.arithmData.src = getOperandNumber(tokens[2]);
data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]);
if (tokensCount >= 5) {
int numTokens = 0;
char **shiftOperands = tokenise(tokens[4], &numTokens);
int shiftAmount = getOperandNumber(shiftOperands[1]);
if (shiftAmount > 0) {
data->processOpData.arithmData.shiftImmediate = true;
}
}
}
}
void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) {
a64inst_DPRegisterData *data = &inst->data.DPRegisterData;
data->dest = getOperandNumber(tokens[1]);
data->regType = parseRegisterType(tokens[1]);
data->src1 = getOperandNumber(tokens[2]);
data->src2 = getOperandNumber(tokens[3]);
if (isStringIn(tokens[0], MULTIPLY_OPCODES, 4)) {
// Multiply
data->DPROpType = a64inst_DPR_MULTIPLY;
if (tokensCount >= 5) {
data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]);
data->processOpData.multiplydata.negProd = strcmp(tokens[4], "mneg") == 0;
}
} else {
// Arithmetic/Logic
data->DPROpType = a64inst_DPR_ARITHMLOGIC;
if (isStringIn(tokens[0], ARITHMETIC_OPCODES, 4)) {
// Arithmetic
data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4);
data->processOpData.arithmLogicData.type = 1;
if(tokensCount == 5) {
//has a shift
int numTokens = 0;
char **shiftOperands = tokenise(tokens[4], &numTokens);
data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]);
}
} else {
// Logic
}
}
}