ARMv8/src/parser.c
2024-06-12 19:25:05 +01:00

284 lines
11 KiB
C

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "parser.h"
#include "a64instruction/a64instruction.h"
//takes input string, read from asm file and returns
//input as an a64 instruction
//TODO:
// - use string matching to get opcode, and operands (DONE)
// - check operand count (DONE)
// - match opcode to a64 struct types (DONE)
// - count operands and match type/values (DONE)
// - generate final a64inst and return (TODO: DP instrs)
// - ASK ABOUT OFFSET CALCULATION
// - CREATE FUNC TO TIDY UP OPERANDS IN DP
//takes inputted char array and returns the integer of the operand, skipping the first character
//e.g. for a passed "R32", it skips the 'R' and returns 32
int getOperandNumber(char *operand){
char operandCpy[strlen(operand)];
strcpy(operandCpy, operand+1);
char **endptr = NULL;
int number = strtol(operandCpy, endptr, 10);
return number;
}
int isOperandRegister(char regStartChar){
return((regStartChar == 'x') || (regStartChar == 'w'));
}
//calculate offsets from string
void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){
char *endptr;
char baseRegParam[strlen(operandList[1])];
strcpy(baseRegParam, operandList[1]);
char *startptr = &baseRegParam[1];
int base = getOperandNumber(startptr);
instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base;
if(operandList[2][strlen(operandList[2])-1] == '!'){
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10);
} else if(operandList[1][strlen(operandList[1])-1] == ']') {
//post-indexed
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10);
} else if( (isOperandRegister(operandList[1][0]) == 1)
|| (isOperandRegister(operandList[2][0]) == 1)){
//register
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10);
} else {
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
if(numOperands==3){
int offset = strtol(&(operandList[2][1]), &endptr, 10);
if(instr->data.SingleTransferData.regType == 1){
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
} else {
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4;
}
}
}
}
void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){
switch(instr->type){
case a64inst_SINGLETRANSFER: {
if(operandList[0][0] == 'x'){
//x-register
instr->data.SingleTransferData.regType = 1;
} else {
instr->data.SingleTransferData.regType = 0;
}
instr->data.SingleTransferData.target = getOperandNumber(operandList[0]);
break;
}
case a64inst_LOADLITERAL:
break;
default:
break;
}
}
void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){
char *endptr;
switch(instr->data.BranchData.BranchType){
case a64inst_UNCONDITIONAL:
//define and sign extend immediate offset
//use symbol table
printf("unconditional");
break;
case a64inst_REGISTER:
instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, &endptr, 10);
break;
case a64inst_CONDITIONAL:
{
char condition[strlen(opcode)+1];
strcpy(condition, opcode+2);
if(strcmp(condition, "eq")==0){
instr->data.BranchData.processOpData.conditionalData.cond = EQ;
} else if (strcmp(condition, "ne")==0){
instr->data.BranchData.processOpData.conditionalData.cond = NE;
} else if (strcmp(condition, "ge")==0){
instr->data.BranchData.processOpData.conditionalData.cond = GE;
} else if (strcmp(condition, "lt")==0){
instr->data.BranchData.processOpData.conditionalData.cond = LT;
} else if (strcmp(condition, "gt")==0){
instr->data.BranchData.processOpData.conditionalData.cond = GT;
} else if (strcmp(condition, "le")==0){
instr->data.BranchData.processOpData.conditionalData.cond = LE;
} else if (strcmp(condition, "al")==0){
instr->data.BranchData.processOpData.conditionalData.cond = AL;
}
break;
//calculate offset from symbol table.
}
}
}
int classifyDPInst(char *operandList[]){
return(isOperandRegister(operandList[0][0]) &&
isOperandRegister(operandList[1][0]) &&
isOperandRegister(operandList[2][0]));
}
void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){
int isUnconditional = strcmp(opcode, "b");
int isRegister = strcmp(opcode, "br");
int isLoad = strcmp(opcode, "ldr");
int isStore = strcmp(opcode, "str");
if(isUnconditional == 0 ||
isRegister == 0 ||
strncmp(opcode, "b.", 2) == 0){
instr->type = a64inst_BRANCH;
if(isUnconditional){
instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
} else if (isRegister){
instr->data.BranchData.BranchType = a64inst_REGISTER;
} else {
instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
}
} else if(isLoad == 0 || isStore == 0){
//loading/storing instruction; classify operands
if( operandList[1][0] == '['){
//type is register
instr->type = a64inst_SINGLETRANSFER;
instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
if(isLoad == 0){
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE;
} else {
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD;
}
} else {
instr->type = a64inst_LOADLITERAL;
if(operandList[0][0] =='#'){
//offset is immediate
int offset = getOperandNumber(operandList[0]);
instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset;
} else {
//offset is literal, use symbol table and calculate difference
}
}
} else {
if(classifyDPInst(operandList)){
instr->type = a64inst_DPREGISTER;
} else {
instr->type = a64inst_DPIMMEDIATE;
}
}
}
void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){
assert(str != NULL);
char *operandsDupe = malloc(strlen(str)+1);
assert(operandsDupe != NULL);
strcpy(operandsDupe, str);
char *operand = strtok(operandsDupe, OPERAND_DELIMITER);
operands[0] = operand;
while (operand != NULL){
*operandCount = *(operandCount)+1;
operand = strtok(NULL, OPERAND_DELIMITER);
operands[*(operandCount)] = operand;
}
*(numOperands) = *(operandCount)+1;
}
//takes inputted assembly line and returns a
//pointer to an abstract representation of the instruction
void parser_instruction(char asmLine[], a64inst_instruction *instr) {
printf("%s", asmLine);
int numOperands = 0;
if (instr == NULL){
exit(EXIT_FAILURE);
}
if(strcmp(asmLine, HALT_ASM_CMD) == 0){
instr->type = a64inst_HALT;
return;
}
//"opcode operand1, {operand2}, ..."
//duplicated as strtok modifies the input string
char stringptr[strlen(asmLine) + 1];
strcpy(stringptr, asmLine);
char *token;
token = strtok(stringptr, " ");
char opcode[strlen(token)+1];
strcpy(opcode, token);
token = strtok(NULL, "");
char operands[strlen(token)+1];
strcpy(operands, token);
if(strcmp(opcode, ".int") == 0){
//type is directive
instr->type = a64inst_DIRECTIVE;
} else if(opcode[strlen(opcode)-1]== ':') {
//type is label
//add to symbol table
instr->type = a64inst_LABEL;
char opcodeCpy[strlen(opcode)+1];
strcpy(opcodeCpy, opcode);
char *labelData = strtok(opcodeCpy, ":");
instr->data.LabelData.label = labelData;
} else {
//type is instruction
int operandCount = 0;
char *operandList[5];
//generate list of operands
tokeniseOperands(operands, &operandCount, operandList, &numOperands);
//categorise instruction type from opcode and operands
classifyOpcode(opcode, instr, operandList, operandCount);
//define struct values according to operands and type
printf("got to here");
switch(instr->type){
case a64inst_BRANCH:
generateBranchOperands(instr, opcode, operandList);
break;
case a64inst_SINGLETRANSFER:
generateLoadStoreOperands(instr, opcode, operandList, numOperands);
calcluateAddressFormat(instr, operandList, numOperands);
break;
case a64inst_LOADLITERAL:
generateLoadStoreOperands(instr, opcode, operandList, numOperands);
break;
case a64inst_DPREGISTER:
//generate DP operands;
break;
case a64inst_DPIMMEDIATE:
//generate DP operands;
break;
default:
printf("INVALID INSTRUCTION");
break;
}
}
}
// Takes an array of strings, each string representing an assembly instruction.
// Returns an array of a64inst_instruction pointers, each representing an instruction.
a64inst_instruction *parse(char **asmLines, int lineCount) {
a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
int i = 0;
while (asmLines[i] != NULL) {
parser_instruction(asmLines[i], &instructions[i]);
i++;
}
return instructions;
}