Merge branch 'assembler-e' into 'assembler'

merge compiler fixes into assembler

See merge request lab2324_summer/armv8_43!7
This commit is contained in:
Niedringhaus, George 2024-06-12 14:51:55 +00:00
commit 654d6fdbb9
6 changed files with 195 additions and 101 deletions

View File

@ -1,7 +1,8 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include "parser.c"
#include "fileio.c"
int main(int argc, char **argv) { int main(int argc, char **argv) {
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

6
src/emulate.c Executable file → Normal file
View File

@ -8,6 +8,11 @@
#include "decode.h" #include "decode.h"
#include "execute.h" #include "execute.h"
int main(int arg, char **argv){
return EXIT_SUCCESS;
}
/*
extern a64inst_instruction *decode(word w); extern a64inst_instruction *decode(word w);
int main(int argc, char **argv) { int main(int argc, char **argv) {
@ -59,3 +64,4 @@ int main(int argc, char **argv) {
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
*/

View File

@ -1,48 +1,80 @@
#include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "fileio.h"
#include "global.h"
/* Loads a binary file located at filePath to memory, taking up a block of exactly memorySize bytes, #define MAX_ASM_LINE_LENGTH 30
and returns the starting address of the data. If memorySize is insufficient to store the entire file,
an appropriate error is reported. Excess memory is set to 0 bit values. */
byte *fileio_loadBin(const char *filePath, size_t memorySize) { int isValidFileFormat(char filename[], char expectedExtension[]){
FILE *file = fopen(filePath, "rb"); char *pointLoc = strrchr(filename, '.');
if (file == NULL) {
fprintf(stderr, "Couldn't open %s!\n", filePath);
exit(EXIT_FAILURE);
}
byte *fileData = malloc(memorySize); if(pointLoc != NULL){
if (fileData == NULL) { if(strcmp(pointLoc, expectedExtension)==0){
fprintf(stderr, "Ran out of memory attempting to load %s!\n", filePath); return(1);
exit(EXIT_FAILURE);
}
// Loop while reading from the file yields data. Only terminates if EOF is reached or ERROR occurs.
// Explicitly deal with attempting to write too much data to memory block, rather than allow segfault.
const size_t byteCount = memorySize/sizeof(byte);
int i = 0;
while (fread(fileData + i, sizeof(byte), 1, file)) {
if (i >= byteCount) {
fprintf(stderr, "Attempting to load binary %s to memory of smaller size %zu!\n", filePath, memorySize);
exit(EXIT_FAILURE);
} }
i++;
} }
return(0);
if (ferror(file)) { }
fprintf(stderr, "Encountered error attempting to read %s!\n", filePath);
exit(EXIT_FAILURE); int writeBinaryFile(word instrs[], char outputFile[], int numInstrs){
}
assert(fclose(file) != EOF); if (!isValidFileFormat(outputFile, "bin")){
return(-1);
// If part of memory block was left uninitialized, initialize it to zero. }
if (i < byteCount) {
memset(fileData + i, 0, (byteCount - i) * sizeof(byte)); FILE *fp;
}
return fileData; fp = fopen(outputFile, "wb");
if(fp == NULL){
return(-1);
}
fwrite(instrs, 4, sizeof(word) * numInstrs, fp);
fclose(fp);
return(0);
}
char **readAssemblyFile(char inputFile[]) {
if (!isValidFileFormat(inputFile, "s")){
return(NULL);
}
FILE *fp = fopen(inputFile, "r");
if (fp == NULL){
return(NULL);
}
int lineCount = 0;
char ch;
while ((ch = fgetc(fp)) != EOF)
{
if (ch == '\n' || ch == '\0')
{
lineCount++;
}
}
char **heap = malloc(sizeof(char *) * lineCount);
rewind(fp);
for( int i=0; i<lineCount; i++) {
char tmp[512];
// read line into tmp
fgets(tmp, MAX_ASM_LINE_LENGTH-1, fp);
int size = strlen(tmp);
char *line = malloc(size+1); // allocate mem for text line
strcpy(line, tmp);
heap[i] = line; // store line pointer
}
return(heap);
} }

View File

@ -12,90 +12,111 @@
// - use string matching to get opcode, and operands (DONE) // - use string matching to get opcode, and operands (DONE)
// - check operand count (DONE) // - check operand count (DONE)
// - match opcode to a64 struct types (DONE) // - match opcode to a64 struct types (DONE)
// - count operands and match type/values // - count operands and match type/values (DONE)
// - generate final a64inst and return // - generate final a64inst and return (TODO: DP instrs)
// - ASK ABOUT OFFSET CALCULATION
// - CREATE FUNC TO TIDY UP OPERANDS IN DP // - CREATE FUNC TO TIDY UP OPERANDS IN DP
//calculate offsets from string int isOperandRegister(char *operand){
void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ return((strcmp(&(operand[0]), "x")==0) || (strcmp(&(operand[0]), "w")==0));
}
if(strcmp(operandList[2][strlen(operandList[1])-1], "!")==0){ //calculate offsets from string
void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){
char *endptr;
uint8_t base = strtol(&(operandList[1][2]), &endptr, 10);
instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base;
if(strcmp(&(operandList[2][strlen(operandList[1])-1]), "!")==0){
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
} else if(strcmp(operandList[1][strlen(operandList[0])-1], "]") == 0) { instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10);
} else if(strcmp(&(operandList[1][strlen(operandList[0])-1]), "]") == 0) {
//post-indexed //post-indexed
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
} else if( (strcmp(operandList[2][strlen(operandList[1])-1], "x") == 0) instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10);
|| (strcmp(operandList[2][strlen(operandList[1])-1], "w") == 0)){ } else if( (isOperandRegister(&(operandList[2][0])) == 1)
|| (isOperandRegister(&(operandList[2][0])) == 1)){
//register //register
instr->data.SingleTransferData.processOpData.singleDataTransferData.processOpData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10);
} else { } else {
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
if(numOperands==3){
int offset = strtol(&(operandList[2][1]), &endptr, 10);
if(instr->data.SingleTransferData.regType == 1){
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
} else {
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4;
}
}
} }
} }
void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[]){ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){
switch(instr->type){ switch(instr->type){
case a64inst_SINGLETRANSFER: case a64inst_SINGLETRANSFER:
if(strcmp(operandList[0][0], "x")==0){ if(strcmp(&(operandList[0][0]), "x")==0){
//x-register //x-register
instr->data.SingleTransferData.regType = 1; instr->data.SingleTransferData.regType = 1;
} else { } else {
instr->data.SingleTransferData.regType = 0; instr->data.SingleTransferData.regType = 0;
} }
char *endptr; char *endptr;
instr->data.SingleTransferData.target = strtol(operandList[0][0]+1, endptr, 2); instr->data.SingleTransferData.target = strtol(&(operandList[0][0])+1, &endptr, 10);
calcluateAddressFormat(instr, operandList); calcluateAddressFormat(instr, operandList, numOperands);
break; break;
case a64inst_LOADLITERAL: case a64inst_LOADLITERAL:
break; break;
default:
break;
} }
} }
void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){
char *endptr;
switch(instr->data.BranchData.BranchType){ switch(instr->data.BranchData.BranchType){
case a64inst_UNCONDITIONAL: case a64inst_UNCONDITIONAL:
//define and sign extend immediate offset //define and sign extend immediate offset
//use symbol table //use symbol table
printf("unconditional");
break; break;
case a64inst_REGISTER: case a64inst_REGISTER:
char *endptr; instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, &endptr, 10);
instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, endptr, 2)
break; break;
case a64inst_CONDITIONAL: case a64inst_CONDITIONAL:
char* condition = strtok(strdup(opcode), "b."); {
condition = strtok(NULL, ""); char *condition = NULL;
if(strcmp(condition, "eq")==0){ condition = strcpy(condition, opcode);
instr->data.branchData.processOpData.conditionalData.cond = EQ; condition += 2;
} else if (strcmp(condition, "ne")==0){ if(strcmp(condition, "eq")==0){
instr->data.branchData.processOpData.conditionalData.cond = NE; instr->data.BranchData.processOpData.conditionalData.cond = EQ;
} else if (strcmp(condition, "ge")==0){ } else if (strcmp(condition, "ne")==0){
instr->data.branchData.processOpData.conditionalData.cond = GE; instr->data.BranchData.processOpData.conditionalData.cond = NE;
} else if (strcmp(condition, "lt")==0){ } else if (strcmp(condition, "ge")==0){
instr->data.branchData.processOpData.conditionalData.cond = LT; instr->data.BranchData.processOpData.conditionalData.cond = GE;
} else if (strcmp(condition, "gt")==0){ } else if (strcmp(condition, "lt")==0){
instr->data.branchData.processOpData.conditionalData.cond = GT; instr->data.BranchData.processOpData.conditionalData.cond = LT;
} else if (strcmp(condition, "le")==0){ } else if (strcmp(condition, "gt")==0){
instr->data.branchData.processOpData.conditionalData.cond = LE; instr->data.BranchData.processOpData.conditionalData.cond = GT;
} else if (srtcmp(condition, "al")==0){ } else if (strcmp(condition, "le")==0){
instr->data.branchData.processOpData.conditionalData.cond = AL; instr->data.BranchData.processOpData.conditionalData.cond = LE;
} else if (strcmp(condition, "al")==0){
instr->data.BranchData.processOpData.conditionalData.cond = AL;
}
break;
//calculate offset from symbol table.
} }
break;
//calculate offset from symbol table.
} }
} }
int isOperandRegister(char *operand){
return((strcmp(operand[0], "x")==0) || (strcmp(operand[0], "w")==0));
}
int classifyDPInst(char *operandList[]){ int classifyDPInst(char *operandList[]){
return(isOperandRegister(operandList[0]) && return(isOperandRegister(operandList[0]) &&
isOperandRegister(operandList[1]) && isOperandRegister(operandList[1]) &&
isOperandRegister(operandList[2])); isOperandRegister(operandList[2]));
} }
void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[]){ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){
int isUnconditional = strcmp(opcode, "b"); int isUnconditional = strcmp(opcode, "b");
int isRegister = strcmp(opcode, "br"); int isRegister = strcmp(opcode, "br");
int isLoad = strcmp(opcode, "ldr"); int isLoad = strcmp(opcode, "ldr");
@ -111,7 +132,6 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[
instr->data.BranchData.BranchType = a64inst_REGISTER; instr->data.BranchData.BranchType = a64inst_REGISTER;
} else { } else {
instr->data.BranchData.BranchType = a64inst_CONDITIONAL; instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
//instr->data.branchData.processOpData.cond = {remove first two chars of opcode}
} }
generateBranchOperands(instr, opcode, operandList); generateBranchOperands(instr, opcode, operandList);
} else if(isLoad == 0 || isStore == 0){ } else if(isLoad == 0 || isStore == 0){
@ -120,19 +140,28 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[
if( *address == '['){ if( *address == '['){
//type is register //type is register
instr->type = a64inst_SINGLETRANSFER; instr->type = a64inst_SINGLETRANSFER;
instr->data.singleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
if(isLoad == 0){ if(isLoad == 0){
instr->data.SingleTransferData.transferType = a64inst_LOAD; instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD;
} else { } else {
instr->data.SingleTransferData.processOpData.transferType = a64inst_STORE; instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE;
} }
} else { } else {
instr->type = a64inst_LOADLITERAL; instr->type = a64inst_LOADLITERAL;
//instr->data.processOpData.offset = {} to be defined by symbol table if(operandList[0][0] =='#'){
//offset is immediate
char *immOffset = NULL;
immOffset = strcpy(immOffset, operandList[0]);
immOffset++;
char *endptr = NULL;
int offset = strtol(immOffset, &endptr, 10);
instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset;
} else {
//offset is literal, use symbol table and calculate difference
}
} }
} else { } else {
int numOperands = sizeof(operandList) / sizeof(operandList[0])
if(classifyDPInst(operandList)){ if(classifyDPInst(operandList)){
instr->type = a64inst_DPREGISTER; instr->type = a64inst_DPREGISTER;
} else { } else {
@ -142,21 +171,24 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[
} }
} }
char *tokeniseOperands(char* str, int operandCount, char *operands[]){ void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){
char *operandsDupe = strdup(str); char *operandsDupe = NULL;
operandsDupe = strcpy(operandsDupe, str);
char *operand = strtok(operandsDupe, OPERAND_DELIMITER); char *operand = strtok(operandsDupe, OPERAND_DELIMITER);
operands[0] = operand; operands[0] = operand;
while (operand != NULL){ while (operand != NULL){
operandCount++; *operandCount = *(operandCount)+1;
operand = strtok(NULL, OPERAND_DELIMITER); operand = strtok(NULL, OPERAND_DELIMITER);
operands[operandCount] = operand; operands[*(operandCount)] = operand;
} }
*(numOperands) = *(operandCount)+1;
} }
//takes inputted assembly line and returns a //takes inputted assembly line and returns a
//pointer to an abstract representation of the instruction //pointer to an abstract representation of the instruction
a64inst_instruction *parser(char asmLine[]){ a64inst_instruction *parser(char asmLine[]){
int numOperands = 0;
a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); a64inst_instruction *instr = malloc(sizeof(a64inst_instruction));
if (instr == NULL){ if (instr == NULL){
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@ -169,7 +201,8 @@ a64inst_instruction *parser(char asmLine[]){
//"opcode operand1, {operand2}, ..." //"opcode operand1, {operand2}, ..."
//duplicated as strtok modifies the input string //duplicated as strtok modifies the input string
char *stringptr = strdup(asmLine); char *stringptr = NULL;
stringptr = strcpy(stringptr, asmLine);
char *opcode = strtok(stringptr, " "); char *opcode = strtok(stringptr, " ");
char *operands = strtok(NULL, ""); char *operands = strtok(NULL, "");
@ -178,18 +211,43 @@ a64inst_instruction *parser(char asmLine[]){
//type is directive //type is directive
instr->type = a64inst_DIRECTIVE; instr->type = a64inst_DIRECTIVE;
} else if(strcmp(opcode[strlen(opcode)-1], ":") == 0) { } else if(opcode[strlen(opcode)-1]== ':') {
//type is label //type is label
//add to symbol table //add to symbol table
instr->type = a64inst_LABEL; instr->type = a64inst_LABEL;
char *opcodeCpy = strdup(opcode); char *opcodeCpy = NULL;
opcodeCpy = strcpy(opcodeCpy, opcode);
char *labelData = strtok(opcodeCpy, ":"); char *labelData = strtok(opcodeCpy, ":");
instr->data.labelData.label = labelData; instr->data.LabelData.label = labelData;
} else { } else {
//type is instruction //type is instruction
int operandCount = 0; int operandCount = 0;
const char *operandList[4]; char *operandList[4];
tokeniseOperands(operands, &operandCount, operandList); //generate list of operands
tokeniseOperands(operands, &operandCount, operandList, &numOperands);
//categorise instruction type from opcode and operands
classifyOpcode(opcode, instr, operandList, operandCount);
//define struct values according to operands and type
switch(instr->type){
case a64inst_BRANCH:
generateBranchOperands(instr, opcode, operandList);
break;
case a64inst_SINGLETRANSFER:
generateLoadStoreOperands(instr, opcode, operandList, numOperands);
break;
case a64inst_LOADLITERAL:
generateLoadStoreOperands(instr, opcode, operandList, numOperands);
break;
case a64inst_DPREGISTER:
//generate DP operands;
break;
case a64inst_DPIMMEDIATE:
//generate DP operands;
break;
default:
printf("INVALID INSTRUCTION");
break;
}
} }

View File

@ -1,5 +1,2 @@
#ifndef __PARSERCONSTS__
#define __PARSERCONSTS__
#define OPERAND_DELIMITER ", " #define OPERAND_DELIMITER ", "
#define HALT_ASM_CMD "and x0, x0, x0" #define HALT_ASM_CMD "and x0, x0, x0"
#endif

View File

@ -1,6 +1,6 @@
# include "global.h" #include "global.h"
# include "a64instruction.h" #include "a64instruction.h"
# include "symboltable.h" #include "symboltable.h"
//generates assembled code based on two pass assembly method //generates assembled code based on two pass assembly method