From 61f6fc9506a9348046ba15780fd5c63388713dd9 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 31 May 2024 15:42:35 +0100 Subject: [PATCH 001/113] create binary file write function --- src/fileaccess.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/fileaccess.c diff --git a/src/fileaccess.c b/src/fileaccess.c new file mode 100644 index 0000000..aec7195 --- /dev/null +++ b/src/fileaccess.c @@ -0,0 +1,14 @@ +#include + +void writeBinaryFile(word instrs[], char outputFile[]){ + + + FILE *fp; + + fp = fopen(outputFile, "wb"); + fwrite(instrs, 4, sizeof(instrs), fp); + fclose(fp); + +} + +void readAssemblyFile() \ No newline at end of file From fa17a7fda35e98f8284587d473ab907b0ef72290 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 31 May 2024 15:53:41 +0100 Subject: [PATCH 002/113] add inputted filename format checking --- src/fileaccess.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/fileaccess.c b/src/fileaccess.c index aec7195..e3e5df8 100644 --- a/src/fileaccess.c +++ b/src/fileaccess.c @@ -1,7 +1,22 @@ #include +#include -void writeBinaryFile(word instrs[], char outputFile[]){ +bool isValidFileFormat(char filename[], char expectedExtension[]){ + int *pointLoc = strrchr(filename, '.'); + if(pointLoc != NULL){ + if(strcmp(pointLoc, expectedExtension)==0){ + return true; + } + } + return false; +} + +int writeBinaryFile(word instrs[], char outputFile[]){ + + if (!isValidFileFormat(filename, "bin")){ + return(1); + } FILE *fp; @@ -9,6 +24,9 @@ void writeBinaryFile(word instrs[], char outputFile[]){ fwrite(instrs, 4, sizeof(instrs), fp); fclose(fp); + return(0); } -void readAssemblyFile() \ No newline at end of file +int readAssemblyFile() { + +} \ No newline at end of file From 6203e65bdccc6c5fb3d3406f0384280739ec29c9 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 31 May 2024 16:16:18 +0100 Subject: [PATCH 003/113] add assembly file reading --- src/fileaccess.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/fileaccess.c b/src/fileaccess.c index e3e5df8..3ee3831 100644 --- a/src/fileaccess.c +++ b/src/fileaccess.c @@ -1,32 +1,56 @@ #include #include -bool isValidFileFormat(char filename[], char expectedExtension[]){ +#define MAX_ASM_LINE_LENGTH 100 + +int isValidFileFormat(char filename[], char expectedExtension[]){ int *pointLoc = strrchr(filename, '.'); if(pointLoc != NULL){ if(strcmp(pointLoc, expectedExtension)==0){ - return true; + return(1); } } - return false; + return(0); } int writeBinaryFile(word instrs[], char outputFile[]){ if (!isValidFileFormat(filename, "bin")){ - return(1); + return(-1); } FILE *fp; fp = fopen(outputFile, "wb"); + + if(fp == NULL){ + return(-1); + } + fwrite(instrs, 4, sizeof(instrs), fp); fclose(fp); return(0); } -int readAssemblyFile() { +int readAssemblyFile(char inputFile[]) { + if (!isValidFileFormat(filename, "s")){ + return(1); + } + FILE *fp; + char savedLine[MAX_ASM_LINE_LENGTH]; + + fp = fopen(inputFile, "r"); + + if(fp == NULL){ + return(-1); + } + + while (fgets(savedLine, MAX_ASM_LINE_LENGTH-1, fp) != NULL) { + //pass line to parser + } + + return(0); } \ No newline at end of file From 1ff18a4fb9b2e9da595e34d02e9617a9492ab88e Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Mon, 3 Jun 2024 21:23:18 +0100 Subject: [PATCH 004/113] rewrite error exits for consistency --- src/assemblylineconversion.c | 3 +++ src/assemblylineconversion.h | 0 src/fileaccess.c | 18 +++++++++++------- 3 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 src/assemblylineconversion.c create mode 100644 src/assemblylineconversion.h diff --git a/src/assemblylineconversion.c b/src/assemblylineconversion.c new file mode 100644 index 0000000..f612329 --- /dev/null +++ b/src/assemblylineconversion.c @@ -0,0 +1,3 @@ +#include +#include + diff --git a/src/assemblylineconversion.h b/src/assemblylineconversion.h new file mode 100644 index 0000000..e69de29 diff --git a/src/fileaccess.c b/src/fileaccess.c index 3ee3831..621de3c 100644 --- a/src/fileaccess.c +++ b/src/fileaccess.c @@ -3,6 +3,7 @@ #define MAX_ASM_LINE_LENGTH 100 +//validates inputted charlist as valid filename against expected extension int isValidFileFormat(char filename[], char expectedExtension[]){ int *pointLoc = strrchr(filename, '.'); @@ -14,10 +15,11 @@ int isValidFileFormat(char filename[], char expectedExtension[]){ return(0); } +//writes a list of words (list of binary instructions) to a named output file int writeBinaryFile(word instrs[], char outputFile[]){ if (!isValidFileFormat(filename, "bin")){ - return(-1); + exit(EXIT_FAILURE); } FILE *fp; @@ -25,18 +27,20 @@ int writeBinaryFile(word instrs[], char outputFile[]){ fp = fopen(outputFile, "wb"); if(fp == NULL){ - return(-1); + exit(EXIT_FAILURE); } - fwrite(instrs, 4, sizeof(instrs), fp); + fwrite(instrs, sizeof(word), sizeof(instrs), fp); fclose(fp); - return(0); + exit(EXIT_SUCCESS); } + +//reads assembly file of "inputFile" name, int readAssemblyFile(char inputFile[]) { if (!isValidFileFormat(filename, "s")){ - return(1); + exit(EXIT_FAILURE); } FILE *fp; @@ -45,12 +49,12 @@ int readAssemblyFile(char inputFile[]) { fp = fopen(inputFile, "r"); if(fp == NULL){ - return(-1); + exit(EXIT_FAILURE); } while (fgets(savedLine, MAX_ASM_LINE_LENGTH-1, fp) != NULL) { //pass line to parser } - return(0); + exit(EXIT_SUCCESS); } \ No newline at end of file From d69d3f0d88cd003eec2038b66836e9c306b50c96 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Mon, 3 Jun 2024 21:38:58 +0100 Subject: [PATCH 005/113] Requested upload to ensure no repeated code --- src/assemble.c | 2 ++ src/symboltable.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 src/symboltable.c diff --git a/src/assemble.c b/src/assemble.c index e2ad1c8..ae760c0 100755 --- a/src/assemble.c +++ b/src/assemble.c @@ -1,5 +1,7 @@ #include +#include int main(int argc, char **argv) { + return EXIT_SUCCESS; } diff --git a/src/symboltable.c b/src/symboltable.c new file mode 100644 index 0000000..38a0f7d --- /dev/null +++ b/src/symboltable.c @@ -0,0 +1,34 @@ + + +typedef struct st st; + + + +typedef struct { + const void* key; + void* value; + node* prev; + node* next; +} node; + +struct st { + node* head; + node* tail; +}; + +// add new node to the end +void st_add(st table, void* key, void* value) { + node n = {key, value, table.tail}; + (*(table.tail)).next = &n; + table.tail = &n; +} + +void* st_search(st table, void* key) { + return nodeSearch(table.head, key); +} + +nodeSearch(node* n, void* key) { + if (n == albuquerque) { + + } +} \ No newline at end of file From 43dd6be707f8e1834a1cdabdf75d68f317e9cd1c Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Mon, 3 Jun 2024 21:46:22 +0100 Subject: [PATCH 006/113] add a64instruction structs from emulator --- src/a64instruction.h | 28 +++++++++++ src/a64instruction_Branch.h | 41 +++++++++++++++ src/a64instruction_DP.h | 7 +++ src/a64instruction_DPImmediate.h | 42 ++++++++++++++++ src/a64instruction_DPRegister.h | 58 ++++++++++++++++++++++ src/a64instruction_SingleTransfer.h | 47 ++++++++++++++++++ src/a64instruction_global.h | 14 ++++++ src/assemblylineconversion.c | 3 -- src/parser.c | 11 ++++ src/{assemblylineconversion.h => parser.h} | 0 10 files changed, 248 insertions(+), 3 deletions(-) create mode 100644 src/a64instruction.h create mode 100644 src/a64instruction_Branch.h create mode 100644 src/a64instruction_DP.h create mode 100644 src/a64instruction_DPImmediate.h create mode 100644 src/a64instruction_DPRegister.h create mode 100644 src/a64instruction_SingleTransfer.h create mode 100644 src/a64instruction_global.h delete mode 100644 src/assemblylineconversion.c create mode 100644 src/parser.c rename src/{assemblylineconversion.h => parser.h} (100%) diff --git a/src/a64instruction.h b/src/a64instruction.h new file mode 100644 index 0000000..463e75a --- /dev/null +++ b/src/a64instruction.h @@ -0,0 +1,28 @@ +#ifndef __A64INSTRUCTION__ +#define __A64INSTRUCTION__ +#include "a64instruction_DPImmediate.h" +#include "a64instruction_Branch.h" +#include "a64instruction_SingleTransfer.h" + +// Define the types of instructions in subset of the AArch64 Instruction Set implemented. +// Each type is defined by the format of the instruction's operand(s). +typedef enum { + a64inst_DPIMMEDIATE, + a64inst_DPREGISTER, + a64inst_SINGLETRANSFER, + a64inst_LOADLITERAL, + a64inst_BRANCH, + a64inst_HALT +} a64inst_type; + +// Structure the holds the type and operand data of an instruction +typedef struct { + a64inst_type type; + union { + a64inst_DPImmediateData DPImmediateData; + a64inst_BranchData BranchData; + a64inst_SingleTransferData SingleTransferData; + } data; +} a64inst_instruction; + +#endif diff --git a/src/a64instruction_Branch.h b/src/a64instruction_Branch.h new file mode 100644 index 0000000..1681768 --- /dev/null +++ b/src/a64instruction_Branch.h @@ -0,0 +1,41 @@ +#include +#include "a64instruction_global.h" +#include "global.h" + +typedef enum { + a64inst_UNCONDITIONAL = 0, + a64inst_REGISTER = 1, + a64inst_CONDITIONAL = 2 +} a64inst_BranchType; + +typedef struct { + word unconditionalOffset; +} a64inst_Branch_UnconditionalData; + +typedef struct { + a64inst_regSpecifier src; +} a64inst_Branch_RegisterData; + +typedef enum { + EQ = 0, // Equal + NE = 1, // Not Equal + GE = 10, // Signed greater or equal + LT = 11, // Signed less than + GT = 12, // Signed greater than + LE = 13, // signed less than or equal + AL = 14 // Always +} a64inst_ConditionType; //a64inst_Branch_ConditionType? + +typedef struct { + a64inst_ConditionType cond; + word offset; +} a64inst_Branch_ConditionalData; + +typedef struct { + a64inst_BranchType BranchType; + union { + a64inst_Branch_UnconditionalData unconditionalData; + a64inst_Branch_RegisterData registerData; + a64inst_Branch_ConditionalData conditionalData; + } processOpData; +} a64inst_BranchData; diff --git a/src/a64instruction_DP.h b/src/a64instruction_DP.h new file mode 100644 index 0000000..113f589 --- /dev/null +++ b/src/a64instruction_DP.h @@ -0,0 +1,7 @@ +// Denotes the type of arithmetic operations supported by the architecture +typedef enum { + a64inst_ADD = 0, + a64inst_ADDS = 1, + a64inst_SUB = 2, + a64inst_SUBS = 3 +} a64inst_arithmOp; diff --git a/src/a64instruction_DPImmediate.h b/src/a64instruction_DPImmediate.h new file mode 100644 index 0000000..8b5e68c --- /dev/null +++ b/src/a64instruction_DPImmediate.h @@ -0,0 +1,42 @@ +#include +#include "a64instruction_global.h" +#include "a64instruction_DP.h" + +// Denotes the type of data processing operation +typedef enum { + a64inst_DPI_ARITHM, + a64inst_DPI_WIDEMOV +} a64inst_DPIOpType; + +// Denotes the type of wide move operations supported by the architecture +typedef enum { + a64inst_MOVN = 0, + a64inst_UNDEFINED = 1, + a64inst_MOVZ = 2, + a64inst_MOVK = 3 +} a64inst_wideMovOp; + +// Holds data specific to arithmetic immediate data processing instructions +typedef struct { + bool shiftImmediate; + uint16_t immediate; + a64inst_regSpecifier src; +} a64inst_DPImmediate_ArithmData; + +// Holds data specific to wide move immediate data processing instructions +typedef struct { + uint8_t shiftScalar; + uint16_t immediate; +} a64inst_DPImmediate_WideMovData; + +// Holds data for immediate data processing instructions +typedef struct { + a64inst_regType regType; + a64inst_DPIOpType DPIOpType; + unsigned int processOp; + union { + a64inst_DPImmediate_ArithmData arithmData; + a64inst_DPImmediate_WideMovData wideMovData; + } processOpData; + a64inst_regSpecifier dest; +} a64inst_DPImmediateData; diff --git a/src/a64instruction_DPRegister.h b/src/a64instruction_DPRegister.h new file mode 100644 index 0000000..d0252ee --- /dev/null +++ b/src/a64instruction_DPRegister.h @@ -0,0 +1,58 @@ +#include +#include "a64instruction_global.h" +#include "a64instruction_DP.h" + +// Denotes the type of data processing operation +typedef enum { + a64inst_DPR_ARITHMLOGIC, + a64inst_DPR_MULTIPLY +} a64inst_DPROpType; + +// Denotes the logical operations supported by the architecture +typedef enum { + a64inst_AND = 0, + a64inst_OR = 1, + a64inst_XOR = 2, + a64inst_AND_FLAGGED = 3 +} a64inst_logicOp; + +// Denotes the different kinds of shifts supported by the architecture +typedef enum { + a64inst_LSL = 0, + a64inst_LSR = 1, + a64inst_ASR = 2, + a64inst_ROR = 3 +} a64inst_ShiftType; + +// Holds data specific to arithmetic/logic register data processing instructions +typedef struct { + enum { + a64inst_DPR_ARITHM = 0, + a64inst_DPR_LOGIC = 1 + } type; + a64inst_ShiftType shiftType; + bool negShiftedSrc2; // Guaranteed to be 0 for arithmetic instructions +} a64inst_DPRegister_ArithmLogicData; + +// Holds data specific to multiply register data processing instructions +typedef struct { + bool negProd; + a64inst_regSpecifier summand; +} a64inst_DPRegister_MultiplyData; + +// Holds data for register data processing instructions +typedef struct { + a64inst_regType regType; + a64inst_DPROpType DPROpType; + union { + a64inst_logicOp logicOp; + a64inst_arithmOp arithmOp; + } processOpId; + a64inst_regSpecifier src2; + union { + a64inst_DPRegister_ArithmLogicData arithmLogicData; + a64inst_DPRegister_MultiplyData multiplydata; + } processOpData; + a64inst_regSpecifier src1; + a64inst_regSpecifier dest; +} a64inst_DPRegisterData; diff --git a/src/a64instruction_SingleTransfer.h b/src/a64instruction_SingleTransfer.h new file mode 100644 index 0000000..f661116 --- /dev/null +++ b/src/a64instruction_SingleTransfer.h @@ -0,0 +1,47 @@ +#include +#include "a64instruction_global.h" +#include "global.h" + +typedef enum { + a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER = 1, + a64inst_SINGLE_TRANSFER_LOAD_LITERAL = 0 +} a64inst_SingleTransferType; + +typedef enum { + a64inst_STORE, + a64inst_LOAD +} a64inst_TransferType; + +typedef enum { + a64inst_REGISTER_OFFSET = 2, + a64inst_PRE_INDEXED = 1, + a64inst_POST_INDEXED = 0, + a64inst_UNSIGNED_OFFSET = 3 +} a64inst_AddressingMode; + +typedef struct { + a64inst_TransferType transferType; + a64inst_AddressingMode addressingMode; + + union { + a64inst_regSpecifier offsetReg; + uint16_t indexedOffset; + uint16_t unsignedOffset; + } a64inst_addressingModeData; + + a64inst_regSpecifier base; +} a64inst_SingleDataTransferData; + +typedef struct { + uint32_t offset; +} a64inst_LoadLiteralData; + +typedef struct { + a64inst_SingleTransferType SingleTransferOpType; + a64inst_regType regType; + a64inst_regSpecifier target; + union { + a64inst_SingleDataTransferData singleDataTransferData; + a64inst_LoadLiteralData loadLiteralData; + } processOpData; +} a64inst_SingleTransferData; diff --git a/src/a64instruction_global.h b/src/a64instruction_global.h new file mode 100644 index 0000000..489fe06 --- /dev/null +++ b/src/a64instruction_global.h @@ -0,0 +1,14 @@ +#ifndef __A64INSTRUCTION_GLOBAL__ +#define __A64INSTRUCTION_GLOBAL__ +#include + +// Specifies the register being referred to +typedef uint8_t a64inst_regSpecifier; + +// Denotes the type of register being referred to +typedef enum { + a64inst_W = 0, + a64inst_R = 1 +} a64inst_regType; + +#endif diff --git a/src/assemblylineconversion.c b/src/assemblylineconversion.c deleted file mode 100644 index f612329..0000000 --- a/src/assemblylineconversion.c +++ /dev/null @@ -1,3 +0,0 @@ -#include -#include - diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..1572ef5 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,11 @@ +#include +#include +#include + +#include + +//takes input string, read from asm file and returns +//input as an a64 instruction +a64inst_instruction parser(char asmLine[]){ + +} \ No newline at end of file diff --git a/src/assemblylineconversion.h b/src/parser.h similarity index 100% rename from src/assemblylineconversion.h rename to src/parser.h From ba1b614fc1d4ca18284be1bb401bc45a344f3072 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Mon, 3 Jun 2024 22:02:41 +0100 Subject: [PATCH 007/113] comment code for understanding --- src/fileaccess.c | 7 +++++-- src/parser.c | 14 +++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/fileaccess.c b/src/fileaccess.c index 621de3c..c425ee7 100644 --- a/src/fileaccess.c +++ b/src/fileaccess.c @@ -37,7 +37,8 @@ int writeBinaryFile(word instrs[], char outputFile[]){ } -//reads assembly file of "inputFile" name, +//reads assembly file of "inputFile" name, and passes +//each line into a parser int readAssemblyFile(char inputFile[]) { if (!isValidFileFormat(filename, "s")){ exit(EXIT_FAILURE); @@ -53,7 +54,9 @@ int readAssemblyFile(char inputFile[]) { } while (fgets(savedLine, MAX_ASM_LINE_LENGTH-1, fp) != NULL) { - //pass line to parser + // removes newline char before saving them + savedLine[strcspn(savedLine, "\n")] = 0; + } exit(EXIT_SUCCESS); diff --git a/src/parser.c b/src/parser.c index 1572ef5..1c56c8e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,11 +1,19 @@ #include #include -#include +#include "parser.h" -#include +#include "a64instruction.h" //takes input string, read from asm file and returns //input as an a64 instruction + +//TODO: +// - use string matching to get opcode, and operands +// - check operand count +// - match opcode to a64 struct types +// - count operands and match type/values +// - generate final a64inst and return + a64inst_instruction parser(char asmLine[]){ - + } \ No newline at end of file From 036e163fe8a0ea92d5c0cc863ae08dfbb4320d3d Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Mon, 3 Jun 2024 23:07:31 +0100 Subject: [PATCH 008/113] classify asm line type, tokenise operands --- src/fileaccess.c | 4 +--- src/parser.c | 39 ++++++++++++++++++++++++++++++++++++++- src/parser.h | 1 + 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/fileaccess.c b/src/fileaccess.c index c425ee7..14bf70d 100644 --- a/src/fileaccess.c +++ b/src/fileaccess.c @@ -1,8 +1,6 @@ #include #include -#define MAX_ASM_LINE_LENGTH 100 - //validates inputted charlist as valid filename against expected extension int isValidFileFormat(char filename[], char expectedExtension[]){ int *pointLoc = strrchr(filename, '.'); @@ -45,7 +43,7 @@ int readAssemblyFile(char inputFile[]) { } FILE *fp; - char savedLine[MAX_ASM_LINE_LENGTH]; + char savedLine[sizeof(a64inst_instruction)]; fp = fopen(inputFile, "r"); diff --git a/src/parser.c b/src/parser.c index 1c56c8e..1d7c2cd 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,6 +14,43 @@ // - count operands and match type/values // - generate final a64inst and return -a64inst_instruction parser(char asmLine[]){ +char *splitOperands(char* str, int operandCount, char *operands[]){ + char *operandsDupe = strdup(str); + int operandCount = 0; + char *operand = strtok(operandsDupe, OPERAND_DELIMITER); + operands[0] = operand; + + while (operand != NULL){ + operandCount++; + operand = strtok(NULL, OPERAND_DELIMITER); + operands[operandCount] = operand; + } + return(operands); + +} + +a64inst_instruction *tokeniser(char asmLine[]){ + a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); + if (instr == NULL){ + exit(EXIT_FAILURE); + } + + //"opcode operand1, {operand2}, ..." + char *stringptr = strdup(asmLine); + + char *opcode = strtok(stringptr, " "); + char *operands = strtok(NULL, ""); + + if(opcode[0]=="."){ + //type is directive + } else if(opcode[strlen(opcode)-1]==":") { + //type is label + } else { + //type is instruction + int operandCount = 0; + const char *operandList[4]; + splitOperands(operands, &operandCount, operandList); + } + } \ No newline at end of file diff --git a/src/parser.h b/src/parser.h index e69de29..5542aca 100644 --- a/src/parser.h +++ b/src/parser.h @@ -0,0 +1 @@ +#define OPERAND_DELIMITER ", " \ No newline at end of file From cadac4e1bbcf3d0ec02629713bd16391a80243b3 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Mon, 3 Jun 2024 23:09:40 +0100 Subject: [PATCH 009/113] rename parser funcs for clarity --- src/parser.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/parser.c b/src/parser.c index 1d7c2cd..1c30fc3 100644 --- a/src/parser.c +++ b/src/parser.c @@ -8,13 +8,13 @@ //input as an a64 instruction //TODO: -// - use string matching to get opcode, and operands -// - check operand count +// - use string matching to get opcode, and operands (DONE) +// - check operand count (DONE) // - match opcode to a64 struct types // - count operands and match type/values // - generate final a64inst and return -char *splitOperands(char* str, int operandCount, char *operands[]){ +char *tokeniseOperands(char* str, int operandCount, char *operands[]){ char *operandsDupe = strdup(str); int operandCount = 0; char *operand = strtok(operandsDupe, OPERAND_DELIMITER); @@ -29,7 +29,7 @@ char *splitOperands(char* str, int operandCount, char *operands[]){ } -a64inst_instruction *tokeniser(char asmLine[]){ +a64inst_instruction *parser(char asmLine[]){ a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); if (instr == NULL){ exit(EXIT_FAILURE); @@ -49,8 +49,11 @@ a64inst_instruction *tokeniser(char asmLine[]){ //type is instruction int operandCount = 0; const char *operandList[4]; - splitOperands(operands, &operandCount, operandList); + tokeniseOperands(operands, &operandCount, operandList); } -} \ No newline at end of file + return(a64inst_instruction); + +} + From 422b0f3e62cc798c8a5bdc32ed6dcb7a67c50f83 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 4 Jun 2024 01:30:17 +0100 Subject: [PATCH 010/113] start classifying opcodes and writing skeleton for twopass assembly --- src/parser.c | 13 +++++++++++++ src/twopassassembly.c | 12 ++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 src/twopassassembly.c diff --git a/src/parser.c b/src/parser.c index 1c30fc3..4961480 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,6 +14,17 @@ // - count operands and match type/values // - generate final a64inst and return +void classifyOpcode(char* opcode, a64inst_instruction *instr){ + if(strcmp(opcode, "b") == 0 || strcmp(opcode, "br") == 0 || strncmp(opcode, "b.", strlen("b.")) == 2){ + instr->type = a64inst_BRANCH; + } else if(strcmp(opcode, "ldr") == 0 || strcmp(opcode, "str") == 0){ + //loading/storing instruction; classify operands + instr->type = a64inst_SINGLETRANSFER; + } else { + //data processing + } +} + char *tokeniseOperands(char* str, int operandCount, char *operands[]){ char *operandsDupe = strdup(str); int operandCount = 0; @@ -43,8 +54,10 @@ a64inst_instruction *parser(char asmLine[]){ if(opcode[0]=="."){ //type is directive + //define new type in a64instr struct } else if(opcode[strlen(opcode)-1]==":") { //type is label + //use symbol table to assemble } else { //type is instruction int operandCount = 0; diff --git a/src/twopassassembly.c b/src/twopassassembly.c new file mode 100644 index 0000000..24080f3 --- /dev/null +++ b/src/twopassassembly.c @@ -0,0 +1,12 @@ +//generates assembled code based on two pass assembly method + +void generateSymbolTable(a64inst_instruction instrs[], int numInstrs){ + //TODO: + //generate symbol table based on inputted assembly code and labels + for(int i=0; itype==LABEL){ + // symbol table stuff here + } + } +} From bb3218b53588b11aedf6b978ff9679632b7f4389 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 4 Jun 2024 03:07:00 +0100 Subject: [PATCH 011/113] add detail to assembly skeleton --- src/parser.c | 7 ++++--- src/twopassassembly.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/parser.c b/src/parser.c index 4961480..e944ebc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -22,6 +22,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr){ instr->type = a64inst_SINGLETRANSFER; } else { //data processing + } } @@ -52,12 +53,12 @@ a64inst_instruction *parser(char asmLine[]){ char *opcode = strtok(stringptr, " "); char *operands = strtok(NULL, ""); - if(opcode[0]=="."){ + if(strcmp(opcode, ".int") == 0){ //type is directive //define new type in a64instr struct - } else if(opcode[strlen(opcode)-1]==":") { + } else if(strcmp(opcode[strlen(opcode)-1], ":") == 0) { //type is label - //use symbol table to assemble + //add to symbol table } else { //type is instruction int operandCount = 0; diff --git a/src/twopassassembly.c b/src/twopassassembly.c index 24080f3..d2536dd 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -10,3 +10,31 @@ void generateSymbolTable(a64inst_instruction instrs[], int numInstrs){ } } } + +word assembleBranch(a64inst_instruction *instr){ + word binInstr = 0; + switch (instr->data.BranchData.BranchType) + { + case a64inst_UNCONDITIONAL: + //000101 + //25-0: sign extended simm26 + + break; + case a64inst_REGISTER: + //1101011 + //0000 + //11111 + //000000 + //9-5: address from register + //0000 + + break; + case a64inst_CONDITIONAL: + // 01010100 + // 25-5: sign extended offset + // 4-0: 0{condition} + break; + default: + break; + } +} From 13e2cc8c9d556cdb92d2f02de01980c36fbe8b8d Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 4 Jun 2024 03:35:06 +0100 Subject: [PATCH 012/113] classify branch type from opcode --- src/parser.c | 16 +++++++++++++--- src/twopassassembly.c | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/parser.c b/src/parser.c index e944ebc..0103a3d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -10,13 +10,23 @@ //TODO: // - use string matching to get opcode, and operands (DONE) // - check operand count (DONE) -// - match opcode to a64 struct types +// - match opcode to a64 struct types (PARTIALLY DONE) // - count operands and match type/values // - generate final a64inst and return void classifyOpcode(char* opcode, a64inst_instruction *instr){ - if(strcmp(opcode, "b") == 0 || strcmp(opcode, "br") == 0 || strncmp(opcode, "b.", strlen("b.")) == 2){ - instr->type = a64inst_BRANCH; + if((int isUnconditional = strcmp(opcode, "b")) == 0 || + (int isRegister = strcmp(opcode, "br")) == 0 || + strncmp(opcode, "b.", 2) == 0){ + instr->type = a64inst_BRANCH; + if(isUnconditional){ + instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; + } else if (isRegister){ + instr->data.BranchData.BranchType = a64inst_REGISTER; + } else { + instr->data.BranchData.BranchType = a64inst_CONDITIONAL; + //instr->data.branchData.processOpData.cond = {remove first two chars of opcode} + } } else if(strcmp(opcode, "ldr") == 0 || strcmp(opcode, "str") == 0){ //loading/storing instruction; classify operands instr->type = a64inst_SINGLETRANSFER; diff --git a/src/twopassassembly.c b/src/twopassassembly.c index d2536dd..f7b75f8 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -11,7 +11,7 @@ void generateSymbolTable(a64inst_instruction instrs[], int numInstrs){ } } -word assembleBranch(a64inst_instruction *instr){ +word assembleBranch(a64inst_instruction *instr, int ){ word binInstr = 0; switch (instr->data.BranchData.BranchType) { From ce0f825e1defb6efdb55240592d96f8920d93206 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 4 Jun 2024 04:24:56 +0100 Subject: [PATCH 013/113] add halt command handling --- src/parser.c | 10 ++++++++-- src/parser.h | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/parser.c b/src/parser.c index 0103a3d..98fa61a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,7 +14,7 @@ // - count operands and match type/values // - generate final a64inst and return -void classifyOpcode(char* opcode, a64inst_instruction *instr){ +void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[]){ if((int isUnconditional = strcmp(opcode, "b")) == 0 || (int isRegister = strcmp(opcode, "br")) == 0 || strncmp(opcode, "b.", 2) == 0){ @@ -29,7 +29,8 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr){ } } else if(strcmp(opcode, "ldr") == 0 || strcmp(opcode, "str") == 0){ //loading/storing instruction; classify operands - instr->type = a64inst_SINGLETRANSFER; + char *address = opcode[1]; + } else { //data processing @@ -57,6 +58,11 @@ a64inst_instruction *parser(char asmLine[]){ exit(EXIT_FAILURE); } + if(strcmp(asmLine, HALT_ASM_CMD) == 0){ + instr->type = a64inst_HALT; + return(instr); + } + //"opcode operand1, {operand2}, ..." char *stringptr = strdup(asmLine); diff --git a/src/parser.h b/src/parser.h index 5542aca..8088efa 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1 +1,2 @@ -#define OPERAND_DELIMITER ", " \ No newline at end of file +#define OPERAND_DELIMITER ", " +#define HALT_ASM_CMD "and x0, x0, x0" \ No newline at end of file From a8a1fd52a9fd651a6c8726f92a953e22e870ca5a Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 4 Jun 2024 04:31:46 +0100 Subject: [PATCH 014/113] add to twopassassembly skeleton --- src/twopassassembly.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/twopassassembly.c b/src/twopassassembly.c index f7b75f8..d6194a8 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -38,3 +38,16 @@ word assembleBranch(a64inst_instruction *instr, int ){ break; } } + +void firstPass(a64inst_instruction instrs[], int numInstrs){ + //TODO: + // -iterate over instructions, adding to symbol table + // create symbol table and map labels to addresses/lines +} + +void secondPass(a64inst_instruction instrs[], int numInstrs){ + //TODO: + // iterate over instructions again, this time replacing labels + // with values from symbol table + // after a line has had all the values replaced, assemble it and append +} \ No newline at end of file From 67a9c398322b4fa27e63e79f37966fe3ee3f780d Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Tue, 4 Jun 2024 14:02:09 +0100 Subject: [PATCH 015/113] Symbol basic functionality. --- src/symboltable.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/symboltable.c b/src/symboltable.c index 38a0f7d..fb448c1 100644 --- a/src/symboltable.c +++ b/src/symboltable.c @@ -1,4 +1,4 @@ - +#include typedef struct st st; @@ -23,12 +23,21 @@ void st_add(st table, void* key, void* value) { table.tail = &n; } +// returns the pointer to key of the specified node, or null, if it does not exist void* st_search(st table, void* key) { return nodeSearch(table.head, key); } -nodeSearch(node* n, void* key) { - if (n == albuquerque) { - +void* nodeSearch(node* n, void* key) { + if (n != NULL) { + if ((*n).key == key) { + return (*n).value; + } + else { + return nodeSearch((*n).next, key); + } + } + else { + return NULL; } } \ No newline at end of file From 0f04ac9e2271e3983d50573172ae2c1cd5a78475 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 4 Jun 2024 14:53:30 +0100 Subject: [PATCH 016/113] rename fileaccess --- src/{fileaccess.c => fileio.c} | 1 + src/parser.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) rename src/{fileaccess.c => fileio.c} (96%) diff --git a/src/fileaccess.c b/src/fileio.c similarity index 96% rename from src/fileaccess.c rename to src/fileio.c index 14bf70d..45b25e8 100644 --- a/src/fileaccess.c +++ b/src/fileio.c @@ -37,6 +37,7 @@ int writeBinaryFile(word instrs[], char outputFile[]){ //reads assembly file of "inputFile" name, and passes //each line into a parser +//TODO: allocate whole file in memory, line-by-line int readAssemblyFile(char inputFile[]) { if (!isValidFileFormat(filename, "s")){ exit(EXIT_FAILURE); diff --git a/src/parser.c b/src/parser.c index 1c30fc3..9f5f526 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,7 +14,10 @@ // - count operands and match type/values // - generate final a64inst and return -char *tokeniseOperands(char* str, int operandCount, char *operands[]){ +//takes string of operands, and reference to operandcounter +//takes input of result array +//outputs array of operands +void tokeniseOperands(char* str, int operandCount, char *operands[]){ char *operandsDupe = strdup(str); int operandCount = 0; char *operand = strtok(operandsDupe, OPERAND_DELIMITER); @@ -25,10 +28,10 @@ char *tokeniseOperands(char* str, int operandCount, char *operands[]){ operand = strtok(NULL, OPERAND_DELIMITER); operands[operandCount] = operand; } - return(operands); - } +//takes inputted assembly line and returns a +//pointer to an abstract representation of the instruction a64inst_instruction *parser(char asmLine[]){ a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); if (instr == NULL){ @@ -36,6 +39,7 @@ a64inst_instruction *parser(char asmLine[]){ } //"opcode operand1, {operand2}, ..." + //duplicated as strtok modifies the input string char *stringptr = strdup(asmLine); char *opcode = strtok(stringptr, " "); From 129bdf3954d850b27e105e7f956003ddff2bb8fd Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 5 Jun 2024 19:52:50 +0100 Subject: [PATCH 017/113] classify opcode load/store --- src/parser.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/parser.c b/src/parser.c index 98fa61a..b736d29 100644 --- a/src/parser.c +++ b/src/parser.c @@ -27,9 +27,21 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->data.BranchData.BranchType = a64inst_CONDITIONAL; //instr->data.branchData.processOpData.cond = {remove first two chars of opcode} } - } else if(strcmp(opcode, "ldr") == 0 || strcmp(opcode, "str") == 0){ + } else if((int isLoad = strcmp(opcode, "ldr")) == 0 || (int isStore = strcmp(opcode, "str")) == 0){ //loading/storing instruction; classify operands - char *address = opcode[1]; + char *address = operandList[1]; + if( *address == '['){ + //type is register + instr->type = a64inst_SINGLETRANSFER; + if(isLoad == 0){ + instr->data.processOpData.singleDataTransferData.transferType = a64inst_LOAD; + } else { + instr->data.processOpData.singleDataTransferData.transferType = a64inst_STORE; + } + } else { + instr->type = a64inst_LOADLITERAL; + //instr->data.processOpData.offset = {} to be defined by symbol table + } } else { //data processing @@ -39,7 +51,6 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ char *tokeniseOperands(char* str, int operandCount, char *operands[]){ char *operandsDupe = strdup(str); - int operandCount = 0; char *operand = strtok(operandsDupe, OPERAND_DELIMITER); operands[0] = operand; @@ -71,7 +82,7 @@ a64inst_instruction *parser(char asmLine[]){ if(strcmp(opcode, ".int") == 0){ //type is directive - //define new type in a64instr struct + } else if(strcmp(opcode[strlen(opcode)-1], ":") == 0) { //type is label //add to symbol table From 1d1089634fbdfbf1c7b0d0a65285c28b9004e017 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 4 Jun 2024 04:24:56 +0100 Subject: [PATCH 018/113] add halt command handling --- src/parser.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/parser.c b/src/parser.c index b736d29..15f010a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -74,6 +74,11 @@ a64inst_instruction *parser(char asmLine[]){ return(instr); } + if(strcmp(asmLine, HALT_ASM_CMD) == 0){ + instr->type = a64inst_HALT; + return(instr); + } + //"opcode operand1, {operand2}, ..." char *stringptr = strdup(asmLine); From 48efdf8284f3dce102e4baca2b907d1add2a0893 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 5 Jun 2024 20:53:56 +0100 Subject: [PATCH 019/113] classify directprocessing instruction type --- src/parser.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index 53b8e0c..dd7a761 100644 --- a/src/parser.c +++ b/src/parser.c @@ -44,7 +44,12 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } } else { - //data processing + int numOperands = sizeof(operandList) / sizeof(operandList[0]) + if(numOperands==3){ + instr->type = a64inst_DPREGISTER; + } else { + instr->type = a64inst_DPIMMEDIATE; + } } } @@ -97,10 +102,11 @@ a64inst_instruction *parser(char asmLine[]){ int operandCount = 0; const char *operandList[4]; tokeniseOperands(operands, &operandCount, operandList); + renameAliases(&opcode, &operandList); } - return(a64inst_instruction); + return(instr); } From f28d3b4047ae6a3f48131f6adfca28853057eab0 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 5 Jun 2024 21:01:39 +0100 Subject: [PATCH 020/113] removed alias function temporarily --- src/parser.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index dd7a761..8b59239 100644 --- a/src/parser.c +++ b/src/parser.c @@ -102,7 +102,6 @@ a64inst_instruction *parser(char asmLine[]){ int operandCount = 0; const char *operandList[4]; tokeniseOperands(operands, &operandCount, operandList); - renameAliases(&opcode, &operandList); } From 8931c151f8ab0c1a489952d2a12b749332c3ebac Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 5 Jun 2024 21:02:25 +0100 Subject: [PATCH 021/113] removed duped if statement --- src/parser.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index 8b59239..538dd9d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -79,11 +79,6 @@ a64inst_instruction *parser(char asmLine[]){ return(instr); } - if(strcmp(asmLine, HALT_ASM_CMD) == 0){ - instr->type = a64inst_HALT; - return(instr); - } - //"opcode operand1, {operand2}, ..." //duplicated as strtok modifies the input string char *stringptr = strdup(asmLine); From bb0f93953910b759985f616c05b56962b78dd975 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 5 Jun 2024 21:07:28 +0100 Subject: [PATCH 022/113] construct instr IR from label --- src/parser.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/parser.c b/src/parser.c index 538dd9d..be2f874 100644 --- a/src/parser.c +++ b/src/parser.c @@ -88,10 +88,15 @@ a64inst_instruction *parser(char asmLine[]){ if(strcmp(opcode, ".int") == 0){ //type is directive + instr->type = a64inst_DIRECTIVE; } else if(strcmp(opcode[strlen(opcode)-1], ":") == 0) { //type is label //add to symbol table + instr->type = a64inst_LABEL; + char *opcodeCpy = strdup(opcode); + char *labelData = strtok(opcodeCpy, ":"); + instr->data.label = labelData; } else { //type is instruction int operandCount = 0; From 262fd6219de30007ac878c2c1b4bda431ce13342 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 5 Jun 2024 21:10:31 +0100 Subject: [PATCH 023/113] conditional definition of parser constants --- src/parser.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser.h b/src/parser.h index 8088efa..5c3a461 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,2 +1,5 @@ +#ifndef __PARSERCONSTS__ +#define __PARSERCONSTS__ #define OPERAND_DELIMITER ", " -#define HALT_ASM_CMD "and x0, x0, x0" \ No newline at end of file +#define HALT_ASM_CMD "and x0, x0, x0" +#endif \ No newline at end of file From ce34f27fbd053d2d1ed524ba27a9b67fe98b698f Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:33:09 +0100 Subject: [PATCH 024/113] data processing immediate --- src/{symboltable.c => symboltable.h} | 10 ++++- src/twopassassembly.c | 65 +++++++++++++++++++++++----- 2 files changed, 61 insertions(+), 14 deletions(-) rename src/{symboltable.c => symboltable.h} (81%) diff --git a/src/symboltable.c b/src/symboltable.h similarity index 81% rename from src/symboltable.c rename to src/symboltable.h index fb448c1..cd2037c 100644 --- a/src/symboltable.c +++ b/src/symboltable.h @@ -19,8 +19,14 @@ struct st { // add new node to the end void st_add(st table, void* key, void* value) { node n = {key, value, table.tail}; - (*(table.tail)).next = &n; - table.tail = &n; + if (table.head == NULL) { + table.head = &n; + table.tail = &n; + } + else { + (*(table.tail)).next = &n; + table.tail = &n; + } } // returns the pointer to key of the specified node, or null, if it does not exist diff --git a/src/twopassassembly.c b/src/twopassassembly.c index d6194a8..48090b9 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -1,15 +1,8 @@ +# include "global.h" +# include "a64instruction.h" +# include "symboltable.h" //generates assembled code based on two pass assembly method -void generateSymbolTable(a64inst_instruction instrs[], int numInstrs){ - //TODO: - //generate symbol table based on inputted assembly code and labels - for(int i=0; itype==LABEL){ - // symbol table stuff here - } - } -} word assembleBranch(a64inst_instruction *instr, int ){ word binInstr = 0; @@ -39,15 +32,63 @@ word assembleBranch(a64inst_instruction *instr, int ){ } } -void firstPass(a64inst_instruction instrs[], int numInstrs){ +st* firstPass(a64inst_instruction instrs[], int numInstrs){ //TODO: // -iterate over instructions, adding to symbol table // create symbol table and map labels to addresses/lines + struct st table; + for(int i=0; i Date: Thu, 6 Jun 2024 13:00:27 +0100 Subject: [PATCH 025/113] data processing register --- src/twopassassembly.c | 56 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/twopassassembly.c b/src/twopassassembly.c index 48090b9..97944ab 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -75,6 +75,59 @@ word dpi(a64inst_instruction cI) { return out; } +word dpr(a64inst_instruction cI) { + word out = 0; + a64inst_DPRegisterData data = cI.data.DPRegisterData; + // sf + int sf = data.regType; + // bits 27-25 + out += 5*(2^25); + int m = data.DPROpType; + int opc = 0; + int opr = 0; + int rm = 0; + int operand = 0; + int rn = 0; + int rd = 0; + // multiply + if (m == 1) { + //opc = 0; + opr = 8; + if (data.processOpData.multiplydata.negProd) { + operand += 32; + } + operand += data.processOpData.multiplydata.summand; + } + // arithmetic and logical + else { + // shift + opr += 2*data.processOpData.arithmLogicData.shiftType; + // arithmetic + if (data.processOpData.arithmLogicData.type == 1){ + opr += 8; + } + // logical + else { + if (data.processOpData.arithmLogicData.negShiftedSrc2) { + opr += 1; + } + } + operand += data.processOpData.arithmLogicData.shiftAmount; + } + rm += data.src1; + rn += data.src2; + rd += data.dest; + out += sf*(2^31); + out += opc * (2^29); + out += m* (2^28); + out += opr * (2^21); + out += rm * (2^16); + out += operand * 1024; + out += rn * 32; + out += rd; + return out; +} + void secondPass(a64inst_instruction instrs[], int numInstrs, st* table){ //TODO: // iterate over instructions again, this time replacing labels @@ -86,6 +139,9 @@ void secondPass(a64inst_instruction instrs[], int numInstrs, st* table){ case a64inst_DPIMMEDIATE: dpi(cI); break; + case a64inst_DPREGISTER: + dpr(cI); + break; default: break; } From 6177b2f7481a8b80d44d95bc60fb134fcc69556e Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 13:01:26 +0100 Subject: [PATCH 026/113] assemble branch instructions --- src/parser.c | 2 +- src/twopassassembly.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index be2f874..004e5fb 100644 --- a/src/parser.c +++ b/src/parser.c @@ -10,7 +10,7 @@ //TODO: // - use string matching to get opcode, and operands (DONE) // - check operand count (DONE) -// - match opcode to a64 struct types (PARTIALLY DONE) +// - match opcode to a64 struct types (DONE) // - count operands and match type/values // - generate final a64inst and return diff --git a/src/twopassassembly.c b/src/twopassassembly.c index d6194a8..c701732 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -1,3 +1,5 @@ +#include "global.h" + //generates assembled code based on two pass assembly method void generateSymbolTable(a64inst_instruction instrs[], int numInstrs){ @@ -13,26 +15,28 @@ void generateSymbolTable(a64inst_instruction instrs[], int numInstrs){ word assembleBranch(a64inst_instruction *instr, int ){ word binInstr = 0; + binInstr += (5^28); //101 start of branch instr switch (instr->data.BranchData.BranchType) { case a64inst_UNCONDITIONAL: //000101 //25-0: sign extended simm26 - + binInstr += instr->data.processOpData.unconditionalOffset; break; case a64inst_REGISTER: - //1101011 - //0000 + //10000 //11111 //000000 //9-5: address from register //0000 - + binInstr += ((instr->processOpData.src)^5); break; case a64inst_CONDITIONAL: // 01010100 // 25-5: sign extended offset // 4-0: 0{condition} + binInstr += ((instr->processOpData.offset)^5); + binInstr += instr->processOpData.cond; break; default: break; From b93ab76b8257354baf6d3ab2987f739eb9c6411f Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 13:22:54 +0100 Subject: [PATCH 027/113] generate branch struct from operands (INCOMPLETE) --- src/parser.c | 34 ++++++++++++++++++++++++++++++++++ src/twopassassembly.c | 3 ++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 004e5fb..c3167f1 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,6 +14,39 @@ // - count operands and match type/values // - generate final a64inst and return +void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ + switch(instr->data.BranchType){ + case a64inst_UNCONDITIONAL: + //define and sign extend immediate offset + //use symbol table + break; + case a64inst_REGISTER: + char *endptr; + instr->data.BranchData.processOpData.src = strtol(operandList[0] + 1, endptr, 2) + break; + case a64inst_CONDITIONAL: + char* condition = strtok(strdup(opcode), "b."); + condition = strtok(NULL, ""); + if(strcmp(condition, "eq")==0){ + instr->data.BranchData.processOpData.cond = EQ; + } else if (strcmp(condition, "ne")==0){ + instr->data.BranchData.processOpData.cond = NE; + } else if (strcmp(condition, "ge")==0){ + instr->data.BranchData.processOpData.cond = GE; + } else if (strcmp(condition, "lt")==0){ + instr->data.BranchData.processOpData.cond = LT; + } else if (strcmp(condition, "gt")==0){ + instr->data.BranchData.processOpData.cond = GT; + } else if (strcmp(condition, "le")==0){ + instr->data.BranchData.processOpData.cond = LE; + } else if (srtcmp(condition, "al")==0){ + instr->data.BranchData.processOpData.cond = AL; + } + break; + //calculate offset from symbol table. + } +} + void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[]){ if((int isUnconditional = strcmp(opcode, "b")) == 0 || (int isRegister = strcmp(opcode, "br")) == 0 || @@ -27,6 +60,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->data.BranchData.BranchType = a64inst_CONDITIONAL; //instr->data.branchData.processOpData.cond = {remove first two chars of opcode} } + generateBranchOperands(instr, opcode, operandList); } else if((int isLoad = strcmp(opcode, "ldr")) == 0 || (int isStore = strcmp(opcode, "str")) == 0){ //loading/storing instruction; classify operands char *address = operandList[1]; diff --git a/src/twopassassembly.c b/src/twopassassembly.c index c701732..5729b61 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -13,7 +13,7 @@ void generateSymbolTable(a64inst_instruction instrs[], int numInstrs){ } } -word assembleBranch(a64inst_instruction *instr, int ){ +word assembleBranch(a64inst_instruction *instr){ word binInstr = 0; binInstr += (5^28); //101 start of branch instr switch (instr->data.BranchData.BranchType) @@ -41,6 +41,7 @@ word assembleBranch(a64inst_instruction *instr, int ){ default: break; } + return binInstr; } void firstPass(a64inst_instruction instrs[], int numInstrs){ From 1440ebd702eaf7159f3014b638ff2f383176c44d Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:34:14 +0100 Subject: [PATCH 028/113] single data transfer & load literal --- src/twopassassembly.c | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/twopassassembly.c b/src/twopassassembly.c index 97944ab..7643223 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -128,6 +128,53 @@ word dpr(a64inst_instruction cI) { return out; } +word sts(a64inst_instruction cI) { + a64inst_SingleTransferData data = cI.data.SingleTransferData; + word out = 0; + a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData; + // this deals with every bit in the 31-23 range apart from sf and U + out += (512+128+64+32)*(2^23); + int sf = data.regType; + int u = 0; + int l = data2.transferType; + int offset = 0; + int xn = data2.base; + int rt = data.target; + switch (data2.addressingMode) { + // register offset + case 2: + offset += 2074 + 64*data2.a64inst_addressingModeData.offsetReg; + break; + // unsigned offset + case 3: + offset += data2.a64inst_addressingModeData.unsignedOffset; + u = 1; + break; + // pre/post indexed + default: + offset = 1 + data2.addressingMode*2 + data2.a64inst_addressingModeData.indexedOffset*4; + break; + } + out += sf*(2^30); + out += u*(2^22); + out += offset*1024; + out += xn * 32; + out += rt; + return out; +} + +word ldl(a64inst_instruction cI) { + word out = 3*(2^27); + a64inst_SingleTransferData data = cI.data.SingleTransferData; + int sf = data.regType; + int simm19 = data.processOpData.loadLiteralData.offset; + int rt = data.target; + out += sf * (2^30); + out += simm19*32; + out += rt; + return out; +} + void secondPass(a64inst_instruction instrs[], int numInstrs, st* table){ //TODO: // iterate over instructions again, this time replacing labels @@ -142,6 +189,12 @@ void secondPass(a64inst_instruction instrs[], int numInstrs, st* table){ case a64inst_DPREGISTER: dpr(cI); break; + case a64inst_SINGLETRANSFER: + sts(cI); + break; + case a64inst_LOADLITERAL: + ldl(cI); + break; default: break; } From 1011d7be714b10cd1cb3ec3bd6ab61887fd297df Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 14:14:10 +0100 Subject: [PATCH 029/113] classify load store addressing type --- src/parser.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/parser.c b/src/parser.c index c3167f1..ba00c26 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,6 +14,36 @@ // - count operands and match type/values // - generate final a64inst and return +void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ + if(strcmp(operandList[0][strlen(operandList[0])-1], "{") == 0) { + //unsigned immediate offset + instr->data.processOpData.addressingMode = a64inst_UNSIGNED_OFFSET; + } else if(strcmp(operandList[0][strlen(operandList[0])-1], "]") == 0) { + //post-indexed + instr->data.processOpData.addressingMode = a64inst_POST_INDEXED; + } else { + //check second operand to distinguish between pre-indexed and register + if(strcmp(operandList[1][0], "#")==0){ + //pre-indexed + instr->data.processOpData.addressingMode = a64inst_PRE_INDEXED; + } else { + //register + instr->data.processOpData.addressingMode = a64inst_REGISTER_OFFSET; + } + } +} + +void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[]){ + switch(instr->data.type){ + case a64inst_SINGLETRANSFER: + calcluateAddressFormat(instr, operandList); + break; + case a64inst_LOADLITERAL: + break; + + } +} + void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ switch(instr->data.BranchType){ case a64inst_UNCONDITIONAL: From 872d4224f8ca1df526e017429d31e526762494a1 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 14:28:32 +0100 Subject: [PATCH 030/113] classify register type and base register for load/store --- src/parser.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index ba00c26..cbbabd7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,6 +14,7 @@ // - count operands and match type/values // - generate final a64inst and return +//calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ if(strcmp(operandList[0][strlen(operandList[0])-1], "{") == 0) { //unsigned immediate offset @@ -36,6 +37,14 @@ void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[]){ switch(instr->data.type){ case a64inst_SINGLETRANSFER: + if(strcmp(operandList[0][0], "x")==0){ + //x-register + instr->data.regType = 1; + } else { + instr->data.regType = 0; + } + char *endptr; + instr->processOpData.base = strtol(operandList[0][0]+1, endptr, 2); calcluateAddressFormat(instr, operandList); break; case a64inst_LOADLITERAL: @@ -97,10 +106,11 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ if( *address == '['){ //type is register instr->type = a64inst_SINGLETRANSFER; + instr->data.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; if(isLoad == 0){ - instr->data.processOpData.singleDataTransferData.transferType = a64inst_LOAD; + instr->data.processOpData.transferType = a64inst_LOAD; } else { - instr->data.processOpData.singleDataTransferData.transferType = a64inst_STORE; + instr->data.processOpData.transferType = a64inst_STORE; } } else { instr->type = a64inst_LOADLITERAL; From 70e02768b65497afb594c4fcc28a15167a34be15 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 14:30:13 +0100 Subject: [PATCH 031/113] fix struct access --- src/parser.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/parser.c b/src/parser.c index cbbabd7..0f57bb7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -44,7 +44,7 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o instr->data.regType = 0; } char *endptr; - instr->processOpData.base = strtol(operandList[0][0]+1, endptr, 2); + instr->data.target = strtol(operandList[0][0]+1, endptr, 2); calcluateAddressFormat(instr, operandList); break; case a64inst_LOADLITERAL: @@ -61,25 +61,25 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper break; case a64inst_REGISTER: char *endptr; - instr->data.BranchData.processOpData.src = strtol(operandList[0] + 1, endptr, 2) + instr->data.processOpData.src = strtol(operandList[0] + 1, endptr, 2) break; case a64inst_CONDITIONAL: char* condition = strtok(strdup(opcode), "b."); condition = strtok(NULL, ""); if(strcmp(condition, "eq")==0){ - instr->data.BranchData.processOpData.cond = EQ; + instr->data.processOpData.cond = EQ; } else if (strcmp(condition, "ne")==0){ - instr->data.BranchData.processOpData.cond = NE; + instr->data.processOpData.cond = NE; } else if (strcmp(condition, "ge")==0){ - instr->data.BranchData.processOpData.cond = GE; + instr->data.processOpData.cond = GE; } else if (strcmp(condition, "lt")==0){ - instr->data.BranchData.processOpData.cond = LT; + instr->data.processOpData.cond = LT; } else if (strcmp(condition, "gt")==0){ - instr->data.BranchData.processOpData.cond = GT; + instr->data.processOpData.cond = GT; } else if (strcmp(condition, "le")==0){ - instr->data.BranchData.processOpData.cond = LE; + instr->data.processOpData.cond = LE; } else if (srtcmp(condition, "al")==0){ - instr->data.BranchData.processOpData.cond = AL; + instr->data.processOpData.cond = AL; } break; //calculate offset from symbol table. From f1ac860d6a99725d6030fd8ca26df9ca8e9028c3 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 14:46:16 +0100 Subject: [PATCH 032/113] rewrite address format calculation as { is not in the actual asm syntax --- src/parser.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/parser.c b/src/parser.c index 0f57bb7..037188f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,22 +16,19 @@ //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ - if(strcmp(operandList[0][strlen(operandList[0])-1], "{") == 0) { - //unsigned immediate offset - instr->data.processOpData.addressingMode = a64inst_UNSIGNED_OFFSET; + + if(strcmp(operandList[1][strlen(operandList[1])-1], "!")==0){ + instr->data.processOpData.addressingMode = a64inst_PRE_INDEXED; } else if(strcmp(operandList[0][strlen(operandList[0])-1], "]") == 0) { //post-indexed instr->data.processOpData.addressingMode = a64inst_POST_INDEXED; + } else if( (strcmp(operandList[1][strlen(operandList[1])-1], "x") == 0) + || (strcmp(operandList[1][strlen(operandList[1])-1], "w") == 0)){ + //register + instr->data.processOpData.addressingMode = a64inst_REGISTER_OFFSET; } else { - //check second operand to distinguish between pre-indexed and register - if(strcmp(operandList[1][0], "#")==0){ - //pre-indexed - instr->data.processOpData.addressingMode = a64inst_PRE_INDEXED; - } else { - //register - instr->data.processOpData.addressingMode = a64inst_REGISTER_OFFSET; - } - } + instr->data.processOpData.addressingMode = a64inst_UNSIGNED_OFFSET; + } } void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[]){ From 5413d27026a9236a1a4e9b5e6a623f0c25a62a8c Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 14:46:56 +0100 Subject: [PATCH 033/113] adjust operand counts for calculating address format --- src/parser.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index 037188f..e9e05cf 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,14 +16,14 @@ //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ - - if(strcmp(operandList[1][strlen(operandList[1])-1], "!")==0){ + + if(strcmp(operandList[2][strlen(operandList[1])-1], "!")==0){ instr->data.processOpData.addressingMode = a64inst_PRE_INDEXED; - } else if(strcmp(operandList[0][strlen(operandList[0])-1], "]") == 0) { + } else if(strcmp(operandList[1][strlen(operandList[0])-1], "]") == 0) { //post-indexed instr->data.processOpData.addressingMode = a64inst_POST_INDEXED; - } else if( (strcmp(operandList[1][strlen(operandList[1])-1], "x") == 0) - || (strcmp(operandList[1][strlen(operandList[1])-1], "w") == 0)){ + } else if( (strcmp(operandList[2][strlen(operandList[1])-1], "x") == 0) + || (strcmp(operandList[2][strlen(operandList[1])-1], "w") == 0)){ //register instr->data.processOpData.addressingMode = a64inst_REGISTER_OFFSET; } else { From da50ee27a1c6d98a42b4810bf0561a1615713e17 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:40:24 +0100 Subject: [PATCH 034/113] added label, branch, halt, directive to switch statement --- src/a64instruction_Directive.h | 2 +- src/twopassassembly.c | 30 +++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/a64instruction_Directive.h b/src/a64instruction_Directive.h index 5c70dd4..da36624 100644 --- a/src/a64instruction_Directive.h +++ b/src/a64instruction_Directive.h @@ -1,5 +1,5 @@ #include "global.h" typedef struct { - dword value; + word value; } a64inst_DirectiveData; diff --git a/src/twopassassembly.c b/src/twopassassembly.c index 7643223..4710968 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -175,26 +175,46 @@ word ldl(a64inst_instruction cI) { return out; } -void secondPass(a64inst_instruction instrs[], int numInstrs, st* table){ +void secondPass(a64inst_instruction instrs[], int numInstrs, st* table, word arr[]){ //TODO: // iterate over instructions again, this time replacing labels // with values from symbol table // after a line has had all the values replaced, assemble it and append + int index = 0; + int lbl = 0; for (int i=0; i Date: Thu, 6 Jun 2024 17:10:18 +0100 Subject: [PATCH 035/113] fix use of cpp syntax --- src/parser.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parser.c b/src/parser.c index e9e05cf..8b98cb0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -84,8 +84,13 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper } void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[]){ - if((int isUnconditional = strcmp(opcode, "b")) == 0 || - (int isRegister = strcmp(opcode, "br")) == 0 || + int isUnconditional = strcmp(opcode, "b"); + int isRegister = strcmp(opcode, "br"); + int isLoad = strcmp(opcode, "ldr"); + int isStore = strcmp(opcode, "str"); + + if(isUnconditional == 0 || + isRegister == 0 || strncmp(opcode, "b.", 2) == 0){ instr->type = a64inst_BRANCH; if(isUnconditional){ @@ -97,7 +102,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ //instr->data.branchData.processOpData.cond = {remove first two chars of opcode} } generateBranchOperands(instr, opcode, operandList); - } else if((int isLoad = strcmp(opcode, "ldr")) == 0 || (int isStore = strcmp(opcode, "str")) == 0){ + } else if(isLoad == 0 || isStore == 0){ //loading/storing instruction; classify operands char *address = operandList[1]; if( *address == '['){ From f57e0a786f10c3cf7c0021985c2fddda12765773 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 17:14:56 +0100 Subject: [PATCH 036/113] rename assembler funcs for clarity --- src/twopassassembly.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/twopassassembly.c b/src/twopassassembly.c index cda97ec..50fe029 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -49,7 +49,7 @@ st* firstPass(a64inst_instruction instrs[], int numInstrs){ } return &table; } -word dpi(a64inst_instruction cI) { +word assembleDPI(a64inst_instruction cI) { word out = 0; a64inst_DPImmediateData data = cI.data.DPImmediateData; //sf @@ -78,7 +78,7 @@ word dpi(a64inst_instruction cI) { return out; } -word dpr(a64inst_instruction cI) { +word assembleDPR(a64inst_instruction cI) { word out = 0; a64inst_DPRegisterData data = cI.data.DPRegisterData; // sf @@ -131,7 +131,7 @@ word dpr(a64inst_instruction cI) { return out; } -word sts(a64inst_instruction cI) { +word assembleSTS(a64inst_instruction cI) { a64inst_SingleTransferData data = cI.data.SingleTransferData; word out = 0; a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData; @@ -166,7 +166,7 @@ word sts(a64inst_instruction cI) { return out; } -word ldl(a64inst_instruction cI) { +word assembleLDL(a64inst_instruction cI) { word out = 3*(2^27); a64inst_SingleTransferData data = cI.data.SingleTransferData; int sf = data.regType; From 1fa33798bfe7e62f878d0fc6e81e02b5be56a7d7 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 17:38:54 +0100 Subject: [PATCH 037/113] rewrite DP classification logic --- src/parser.c | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/parser.c b/src/parser.c index 8b98cb0..3296dac 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,5 +1,6 @@ #include #include +#include #include "parser.h" #include "a64instruction.h" @@ -25,23 +26,23 @@ void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ } else if( (strcmp(operandList[2][strlen(operandList[1])-1], "x") == 0) || (strcmp(operandList[2][strlen(operandList[1])-1], "w") == 0)){ //register - instr->data.processOpData.addressingMode = a64inst_REGISTER_OFFSET; + instr->data.SingleTransferData.singleDataTransferData.processOpData.addressingMode = a64inst_REGISTER_OFFSET; } else { instr->data.processOpData.addressingMode = a64inst_UNSIGNED_OFFSET; } } void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[]){ - switch(instr->data.type){ + switch(instr->type){ case a64inst_SINGLETRANSFER: if(strcmp(operandList[0][0], "x")==0){ //x-register - instr->data.regType = 1; + instr->data.SingleTransferData.regType = 1; } else { - instr->data.regType = 0; + instr->data.SingleTransferData.regType = 0; } char *endptr; - instr->data.target = strtol(operandList[0][0]+1, endptr, 2); + instr->data.SingleTransferData.target = strtol(operandList[0][0]+1, endptr, 2); calcluateAddressFormat(instr, operandList); break; case a64inst_LOADLITERAL: @@ -51,38 +52,48 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o } void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ - switch(instr->data.BranchType){ + switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset //use symbol table break; case a64inst_REGISTER: char *endptr; - instr->data.processOpData.src = strtol(operandList[0] + 1, endptr, 2) + instr->data.BranchData.processOpData.src = strtol(operandList[0] + 1, endptr, 2) break; case a64inst_CONDITIONAL: char* condition = strtok(strdup(opcode), "b."); condition = strtok(NULL, ""); if(strcmp(condition, "eq")==0){ - instr->data.processOpData.cond = EQ; + instr->data.branchData.processOpData.cond = EQ; } else if (strcmp(condition, "ne")==0){ - instr->data.processOpData.cond = NE; + instr->data.branchData.processOpData.cond = NE; } else if (strcmp(condition, "ge")==0){ - instr->data.processOpData.cond = GE; + instr->data.branchData.processOpData.cond = GE; } else if (strcmp(condition, "lt")==0){ - instr->data.processOpData.cond = LT; + instr->data.branchData.processOpData.cond = LT; } else if (strcmp(condition, "gt")==0){ - instr->data.processOpData.cond = GT; + instr->data.branchData.processOpData.cond = GT; } else if (strcmp(condition, "le")==0){ - instr->data.processOpData.cond = LE; + instr->data.branchData.processOpData.cond = LE; } else if (srtcmp(condition, "al")==0){ - instr->data.processOpData.cond = AL; + instr->data.branchData.processOpData.cond = AL; } break; //calculate offset from symbol table. } } +int isOperandRegister(char *operand){ + return((strcmp(operand[0], "x")==0) || (strcmp(operand[0], "w")==0)); +} + +int classifyDPInst(char *operandList[]){ + return(isOperandRegister(operandList[0]) && + isOperandRegister(operandList[1]) && + isOperandRegister(operandList[2])); +} + void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[]){ int isUnconditional = strcmp(opcode, "b"); int isRegister = strcmp(opcode, "br"); @@ -108,11 +119,11 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ if( *address == '['){ //type is register instr->type = a64inst_SINGLETRANSFER; - instr->data.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; + instr->data.singleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; if(isLoad == 0){ - instr->data.processOpData.transferType = a64inst_LOAD; + instr->data.SingleTransferData.transferType = a64inst_LOAD; } else { - instr->data.processOpData.transferType = a64inst_STORE; + instr->data.SingleTransferData.processOpData.transferType = a64inst_STORE; } } else { instr->type = a64inst_LOADLITERAL; @@ -121,7 +132,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } else { int numOperands = sizeof(operandList) / sizeof(operandList[0]) - if(numOperands==3){ + if(classifyDPInst(operandList)){ instr->type = a64inst_DPREGISTER; } else { instr->type = a64inst_DPIMMEDIATE; @@ -172,7 +183,7 @@ a64inst_instruction *parser(char asmLine[]){ instr->type = a64inst_LABEL; char *opcodeCpy = strdup(opcode); char *labelData = strtok(opcodeCpy, ":"); - instr->data.label = labelData; + instr->data.labelData.label = labelData; } else { //type is instruction int operandCount = 0; From 34060c3fad86449822591cbd48e57d3b4fac5c08 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 6 Jun 2024 17:54:35 +0100 Subject: [PATCH 038/113] fix hierarchy of struct access in parser --- src/parser.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/parser.c b/src/parser.c index 3296dac..1bcf31e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,21 +14,22 @@ // - match opcode to a64 struct types (DONE) // - count operands and match type/values // - generate final a64inst and return +// - CREATE FUNC TO TIDY UP OPERANDS IN DP //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ if(strcmp(operandList[2][strlen(operandList[1])-1], "!")==0){ - instr->data.processOpData.addressingMode = a64inst_PRE_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; } else if(strcmp(operandList[1][strlen(operandList[0])-1], "]") == 0) { //post-indexed - instr->data.processOpData.addressingMode = a64inst_POST_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; } else if( (strcmp(operandList[2][strlen(operandList[1])-1], "x") == 0) || (strcmp(operandList[2][strlen(operandList[1])-1], "w") == 0)){ //register - instr->data.SingleTransferData.singleDataTransferData.processOpData.addressingMode = a64inst_REGISTER_OFFSET; + instr->data.SingleTransferData.processOpData.singleDataTransferData.processOpData.addressingMode = a64inst_REGISTER_OFFSET; } else { - instr->data.processOpData.addressingMode = a64inst_UNSIGNED_OFFSET; + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; } } @@ -59,25 +60,25 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper break; case a64inst_REGISTER: char *endptr; - instr->data.BranchData.processOpData.src = strtol(operandList[0] + 1, endptr, 2) + instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, endptr, 2) break; case a64inst_CONDITIONAL: char* condition = strtok(strdup(opcode), "b."); condition = strtok(NULL, ""); if(strcmp(condition, "eq")==0){ - instr->data.branchData.processOpData.cond = EQ; + instr->data.branchData.processOpData.conditionalData.cond = EQ; } else if (strcmp(condition, "ne")==0){ - instr->data.branchData.processOpData.cond = NE; + instr->data.branchData.processOpData.conditionalData.cond = NE; } else if (strcmp(condition, "ge")==0){ - instr->data.branchData.processOpData.cond = GE; + instr->data.branchData.processOpData.conditionalData.cond = GE; } else if (strcmp(condition, "lt")==0){ - instr->data.branchData.processOpData.cond = LT; + instr->data.branchData.processOpData.conditionalData.cond = LT; } else if (strcmp(condition, "gt")==0){ - instr->data.branchData.processOpData.cond = GT; + instr->data.branchData.processOpData.conditionalData.cond = GT; } else if (strcmp(condition, "le")==0){ - instr->data.branchData.processOpData.cond = LE; + instr->data.branchData.processOpData.conditionalData.cond = LE; } else if (srtcmp(condition, "al")==0){ - instr->data.branchData.processOpData.cond = AL; + instr->data.branchData.processOpData.conditionalData.cond = AL; } break; //calculate offset from symbol table. From 04dda33987e478800ee7feee263221ab58b97a6f Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Sun, 9 Jun 2024 22:21:30 +0100 Subject: [PATCH 039/113] calculate base register from input --- src/parser.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/parser.c b/src/parser.c index 1bcf31e..192278e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -18,14 +18,21 @@ //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ + char *baseRegister = operandList[1]; + baseRegister++; + baseRegister++; + char *endptr; + uint8_t base = strtol(baseRegister, endptr, 10); + instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; if(strcmp(operandList[2][strlen(operandList[1])-1], "!")==0){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; + } else if(strcmp(operandList[1][strlen(operandList[0])-1], "]") == 0) { //post-indexed instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; - } else if( (strcmp(operandList[2][strlen(operandList[1])-1], "x") == 0) - || (strcmp(operandList[2][strlen(operandList[1])-1], "w") == 0)){ + } else if( (strcmp(operandList[2][0], "x") == 0) + || (strcmp(operandList[2][0], "w") == 0)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.processOpData.addressingMode = a64inst_REGISTER_OFFSET; } else { @@ -43,7 +50,7 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o instr->data.SingleTransferData.regType = 0; } char *endptr; - instr->data.SingleTransferData.target = strtol(operandList[0][0]+1, endptr, 2); + instr->data.SingleTransferData.target = strtol(operandList[0][0]+1, endptr, 10); calcluateAddressFormat(instr, operandList); break; case a64inst_LOADLITERAL: @@ -60,7 +67,7 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper break; case a64inst_REGISTER: char *endptr; - instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, endptr, 2) + instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, endptr, 10) break; case a64inst_CONDITIONAL: char* condition = strtok(strdup(opcode), "b."); From 4098ea5a5f26c8d243195219df43d376fb6a3fdb Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Sun, 9 Jun 2024 22:43:37 +0100 Subject: [PATCH 040/113] calculate offsets for different store instructions --- src/parser.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/src/parser.c b/src/parser.c index 192278e..699bdb6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -17,8 +17,8 @@ // - CREATE FUNC TO TIDY UP OPERANDS IN DP //calculate offsets from string -void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ - char *baseRegister = operandList[1]; +void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ + char *baseRegister = strdup(operandList[1]); baseRegister++; baseRegister++; char *endptr; @@ -27,20 +27,39 @@ void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[]){ if(strcmp(operandList[2][strlen(operandList[1])-1], "!")==0){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; - + char *offsetParam = strdup(operandList[2]); + offsetParam++; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(operandList[2], endptr, 10); } else if(strcmp(operandList[1][strlen(operandList[0])-1], "]") == 0) { //post-indexed instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; + char *offsetParam = strdup(operandList[2]); + offsetParam++; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(operandList[2], endptr, 10); } else if( (strcmp(operandList[2][0], "x") == 0) || (strcmp(operandList[2][0], "w") == 0)){ //register - instr->data.SingleTransferData.processOpData.singleDataTransferData.processOpData.addressingMode = a64inst_REGISTER_OFFSET; + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; + char *offsetRegister = strdup(operandList[2]); + offsetRegister++; + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingModeData.offsetReg = strtol(offsetRegister, endptr, 10); } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; + if(numOperands==3){ + char *offsetParam = strdup(operandList[2]); + offsetParam++; + int offset = strtol(operandList[2], endptr, 10); + if(instr->data.SingleTransferData.regType == 1){ + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; + } else { + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4; + + } + } } } -void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[]){ +void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){ switch(instr->type){ case a64inst_SINGLETRANSFER: if(strcmp(operandList[0][0], "x")==0){ @@ -51,7 +70,7 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o } char *endptr; instr->data.SingleTransferData.target = strtol(operandList[0][0]+1, endptr, 10); - calcluateAddressFormat(instr, operandList); + calcluateAddressFormat(instr, operandList, numOperands); break; case a64inst_LOADLITERAL: break; @@ -102,7 +121,7 @@ int classifyDPInst(char *operandList[]){ isOperandRegister(operandList[2])); } -void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[]){ +void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){ int isUnconditional = strcmp(opcode, "b"); int isRegister = strcmp(opcode, "br"); int isLoad = strcmp(opcode, "ldr"); @@ -149,7 +168,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } } -char *tokeniseOperands(char* str, int operandCount, char *operands[]){ +char *tokeniseOperands(char* str, int operandCount, char *operands[], int numOperands){ char *operandsDupe = strdup(str); char *operand = strtok(operandsDupe, OPERAND_DELIMITER); operands[0] = operand; @@ -159,11 +178,13 @@ char *tokeniseOperands(char* str, int operandCount, char *operands[]){ operand = strtok(NULL, OPERAND_DELIMITER); operands[operandCount] = operand; } + numOperands = operandCount+1; } //takes inputted assembly line and returns a //pointer to an abstract representation of the instruction a64inst_instruction *parser(char asmLine[]){ + int numOperands = 0; a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); if (instr == NULL){ exit(EXIT_FAILURE); @@ -196,7 +217,7 @@ a64inst_instruction *parser(char asmLine[]){ //type is instruction int operandCount = 0; const char *operandList[4]; - tokeniseOperands(operands, &operandCount, operandList); + tokeniseOperands(operands, &operandCount, operandList, &numOperands); } From 44bb327b7d13a85d5095c0265cb9dd99fda9670d Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Sun, 9 Jun 2024 22:54:27 +0100 Subject: [PATCH 041/113] begin formulating parser pipeline --- src/parser.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/parser.c b/src/parser.c index 699bdb6..40e264f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -36,8 +36,8 @@ void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], in char *offsetParam = strdup(operandList[2]); offsetParam++; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(operandList[2], endptr, 10); - } else if( (strcmp(operandList[2][0], "x") == 0) - || (strcmp(operandList[2][0], "w") == 0)){ + } else if( (isOperandRegister(operandList[2][0], "x") == 1) + || (isOperandRegister(operandList[2][0], "w") == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; char *offsetRegister = strdup(operandList[2]); @@ -53,7 +53,6 @@ void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], in instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4; - } } } @@ -218,6 +217,27 @@ a64inst_instruction *parser(char asmLine[]){ int operandCount = 0; const char *operandList[4]; tokeniseOperands(operands, &operandCount, operandList, &numOperands); + classifyOpcode(opcode, instr, operandList, operandCount); + switch(instr->type){ + case a64inst_BRANCH: + generateBranchOperands(instr, opcode, operandList); + break; + case a64inst_SINGLETRANSFER: + generateLoadStoreOperands(instr, opcode, operandList, numOperands); + break; + case a64inst_LOADLITERAL: + generateLoadStoreOperands(instr, opcode, operandList, numOperands); + break; + case a64inst_DPREGISTER: + //generate DP operands; + break; + case a64inst_DPIMMEDIATE: + //generate DP operands; + break; + default: + printf("INVALID INSTRUCTION"); + break; + } } From d0be871e8f5bc3699a9710ea4ffaada01fc489c0 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Sun, 9 Jun 2024 23:07:45 +0100 Subject: [PATCH 042/113] generate offset operand for load literal with immediate value --- src/parser.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index 40e264f..9a1ac25 100644 --- a/src/parser.c +++ b/src/parser.c @@ -153,11 +153,19 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } } else { instr->type = a64inst_LOADLITERAL; - //instr->data.processOpData.offset = {} to be defined by symbol table + if(strcmp(operandList[0][0], "#")==0){ + //offset is immediate + char *immOffset = strdup(operandList[0]) + immOffset++; + char *endptr; + int offset = strtol(immOffset, endptr, 10); + instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; + } else { + //offset is literal, use symbol table and calculate difference + } } } else { - int numOperands = sizeof(operandList) / sizeof(operandList[0]) if(classifyDPInst(operandList)){ instr->type = a64inst_DPREGISTER; } else { From 92719b6b33159a328624e95d341bc83ada815719 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Sun, 9 Jun 2024 23:10:39 +0100 Subject: [PATCH 043/113] comments for clarity in parser --- src/parser.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 9a1ac25..c0a3caa 100644 --- a/src/parser.c +++ b/src/parser.c @@ -136,7 +136,6 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->data.BranchData.BranchType = a64inst_REGISTER; } else { instr->data.BranchData.BranchType = a64inst_CONDITIONAL; - //instr->data.branchData.processOpData.cond = {remove first two chars of opcode} } generateBranchOperands(instr, opcode, operandList); } else if(isLoad == 0 || isStore == 0){ @@ -224,8 +223,11 @@ a64inst_instruction *parser(char asmLine[]){ //type is instruction int operandCount = 0; const char *operandList[4]; + //generate list of operands tokeniseOperands(operands, &operandCount, operandList, &numOperands); + //categorise instruction type from opcode and operands classifyOpcode(opcode, instr, operandList, operandCount); + //define struct values according to operands and type switch(instr->type){ case a64inst_BRANCH: generateBranchOperands(instr, opcode, operandList); From 6153db77377dc812983096fff61e9337f49fa88d Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 11 Jun 2024 17:35:23 +0100 Subject: [PATCH 044/113] fix compile issues git add . :) --- src/assemble.c | 1 + src/emulate.c | 6 +++ src/parser.c | 130 ++++++++++++++++++++++++------------------------- src/parser.h | 3 -- 4 files changed, 72 insertions(+), 68 deletions(-) mode change 100755 => 100644 src/emulate.c diff --git a/src/assemble.c b/src/assemble.c index ae760c0..aa54b4e 100755 --- a/src/assemble.c +++ b/src/assemble.c @@ -1,5 +1,6 @@ #include #include +#include "parser.c" int main(int argc, char **argv) { diff --git a/src/emulate.c b/src/emulate.c old mode 100755 new mode 100644 index 82245e9..be41f56 --- a/src/emulate.c +++ b/src/emulate.c @@ -8,6 +8,11 @@ #include "decode.h" #include "execute.h" +int main(int arg, char **argv){ + return EXIT_SUCCESS; +} + +/* extern a64inst_instruction *decode(word w); int main(int argc, char **argv) { @@ -59,3 +64,4 @@ int main(int argc, char **argv) { return EXIT_SUCCESS; } +*/ diff --git a/src/parser.c b/src/parser.c index c0a3caa..fcef57b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -12,43 +12,37 @@ // - use string matching to get opcode, and operands (DONE) // - check operand count (DONE) // - match opcode to a64 struct types (DONE) -// - count operands and match type/values -// - generate final a64inst and return +// - count operands and match type/values (DONE) +// - generate final a64inst and return (TODO: DP instrs) +// - ASK ABOUT OFFSET CALCULATION // - CREATE FUNC TO TIDY UP OPERANDS IN DP +int isOperandRegister(char *operand){ + return((strcmp(&(operand[0]), "x")==0) || (strcmp(&(operand[0]), "w")==0)); +} + //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ - char *baseRegister = strdup(operandList[1]); - baseRegister++; - baseRegister++; char *endptr; - uint8_t base = strtol(baseRegister, endptr, 10); + uint8_t base = strtol(&(operandList[1][2]), &endptr, 10); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; - if(strcmp(operandList[2][strlen(operandList[1])-1], "!")==0){ + if(strcmp(&(operandList[2][strlen(operandList[1])-1]), "!")==0){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; - char *offsetParam = strdup(operandList[2]); - offsetParam++; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(operandList[2], endptr, 10); - } else if(strcmp(operandList[1][strlen(operandList[0])-1], "]") == 0) { + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); + } else if(strcmp(&(operandList[1][strlen(operandList[0])-1]), "]") == 0) { //post-indexed instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; - char *offsetParam = strdup(operandList[2]); - offsetParam++; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(operandList[2], endptr, 10); - } else if( (isOperandRegister(operandList[2][0], "x") == 1) - || (isOperandRegister(operandList[2][0], "w") == 1)){ + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); + } else if( (isOperandRegister(&(operandList[2][0])) == 1) + || (isOperandRegister(&(operandList[2][0])) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - char *offsetRegister = strdup(operandList[2]); - offsetRegister++; - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingModeData.offsetReg = strtol(offsetRegister, endptr, 10); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10); } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(numOperands==3){ - char *offsetParam = strdup(operandList[2]); - offsetParam++; - int offset = strtol(operandList[2], endptr, 10); + int offset = strtol(&(operandList[2][1]), &endptr, 10); if(instr->data.SingleTransferData.regType == 1){ instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; } else { @@ -61,59 +55,61 @@ void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], in void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){ switch(instr->type){ case a64inst_SINGLETRANSFER: - if(strcmp(operandList[0][0], "x")==0){ + if(strcmp(&(operandList[0][0]), "x")==0){ //x-register instr->data.SingleTransferData.regType = 1; } else { instr->data.SingleTransferData.regType = 0; } char *endptr; - instr->data.SingleTransferData.target = strtol(operandList[0][0]+1, endptr, 10); + instr->data.SingleTransferData.target = strtol(&(operandList[0][0])+1, &endptr, 10); calcluateAddressFormat(instr, operandList, numOperands); break; case a64inst_LOADLITERAL: break; + default: + break; } } void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ + char *endptr; switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset //use symbol table + printf("unconditional"); break; case a64inst_REGISTER: - char *endptr; - instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, endptr, 10) + instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, &endptr, 10); break; case a64inst_CONDITIONAL: - char* condition = strtok(strdup(opcode), "b."); - condition = strtok(NULL, ""); - if(strcmp(condition, "eq")==0){ - instr->data.branchData.processOpData.conditionalData.cond = EQ; - } else if (strcmp(condition, "ne")==0){ - instr->data.branchData.processOpData.conditionalData.cond = NE; - } else if (strcmp(condition, "ge")==0){ - instr->data.branchData.processOpData.conditionalData.cond = GE; - } else if (strcmp(condition, "lt")==0){ - instr->data.branchData.processOpData.conditionalData.cond = LT; - } else if (strcmp(condition, "gt")==0){ - instr->data.branchData.processOpData.conditionalData.cond = GT; - } else if (strcmp(condition, "le")==0){ - instr->data.branchData.processOpData.conditionalData.cond = LE; - } else if (srtcmp(condition, "al")==0){ - instr->data.branchData.processOpData.conditionalData.cond = AL; + { + char *condition = NULL; + condition = strcpy(condition, opcode); + condition += 2; + if(strcmp(condition, "eq")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = EQ; + } else if (strcmp(condition, "ne")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = NE; + } else if (strcmp(condition, "ge")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = GE; + } else if (strcmp(condition, "lt")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = LT; + } else if (strcmp(condition, "gt")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = GT; + } else if (strcmp(condition, "le")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = LE; + } else if (strcmp(condition, "al")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = AL; + } + break; + //calculate offset from symbol table. } - break; - //calculate offset from symbol table. } } -int isOperandRegister(char *operand){ - return((strcmp(operand[0], "x")==0) || (strcmp(operand[0], "w")==0)); -} - int classifyDPInst(char *operandList[]){ return(isOperandRegister(operandList[0]) && isOperandRegister(operandList[1]) && @@ -144,20 +140,21 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ if( *address == '['){ //type is register instr->type = a64inst_SINGLETRANSFER; - instr->data.singleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; + instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; if(isLoad == 0){ - instr->data.SingleTransferData.transferType = a64inst_LOAD; + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; } else { - instr->data.SingleTransferData.processOpData.transferType = a64inst_STORE; + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; } } else { instr->type = a64inst_LOADLITERAL; - if(strcmp(operandList[0][0], "#")==0){ + if(operandList[0][0] =='#'){ //offset is immediate - char *immOffset = strdup(operandList[0]) + char *immOffset = NULL; + immOffset = strcpy(immOffset, operandList[0]); immOffset++; - char *endptr; - int offset = strtol(immOffset, endptr, 10); + char *endptr = NULL; + int offset = strtol(immOffset, &endptr, 10); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { //offset is literal, use symbol table and calculate difference @@ -174,17 +171,18 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } } -char *tokeniseOperands(char* str, int operandCount, char *operands[], int numOperands){ - char *operandsDupe = strdup(str); +void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){ + char *operandsDupe = NULL; + operandsDupe = strcpy(operandsDupe, str); char *operand = strtok(operandsDupe, OPERAND_DELIMITER); operands[0] = operand; while (operand != NULL){ - operandCount++; + *operandCount = *(operandCount)+1; operand = strtok(NULL, OPERAND_DELIMITER); - operands[operandCount] = operand; + operands[*(operandCount)] = operand; } - numOperands = operandCount+1; + *(numOperands) = *(operandCount)+1; } //takes inputted assembly line and returns a @@ -203,7 +201,8 @@ a64inst_instruction *parser(char asmLine[]){ //"opcode operand1, {operand2}, ..." //duplicated as strtok modifies the input string - char *stringptr = strdup(asmLine); + char *stringptr = NULL; + stringptr = strcpy(stringptr, asmLine); char *opcode = strtok(stringptr, " "); char *operands = strtok(NULL, ""); @@ -212,17 +211,18 @@ a64inst_instruction *parser(char asmLine[]){ //type is directive instr->type = a64inst_DIRECTIVE; - } else if(strcmp(opcode[strlen(opcode)-1], ":") == 0) { + } else if(opcode[strlen(opcode)-1]== ':') { //type is label //add to symbol table instr->type = a64inst_LABEL; - char *opcodeCpy = strdup(opcode); + char *opcodeCpy = NULL; + opcodeCpy = strcpy(opcodeCpy, opcode); char *labelData = strtok(opcodeCpy, ":"); - instr->data.labelData.label = labelData; + instr->data.LabelData.label = labelData; } else { //type is instruction int operandCount = 0; - const char *operandList[4]; + char *operandList[4]; //generate list of operands tokeniseOperands(operands, &operandCount, operandList, &numOperands); //categorise instruction type from opcode and operands diff --git a/src/parser.h b/src/parser.h index 5c3a461..e303b58 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,5 +1,2 @@ -#ifndef __PARSERCONSTS__ -#define __PARSERCONSTS__ #define OPERAND_DELIMITER ", " #define HALT_ASM_CMD "and x0, x0, x0" -#endif \ No newline at end of file From 173bdf08ec6a6e59d0e63f18c29fb3799f1c3831 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 11 Jun 2024 20:23:00 +0100 Subject: [PATCH 045/113] fix incorrect fileio.c --- src/fileio.c | 88 ++++++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/src/fileio.c b/src/fileio.c index 1dcdd77..96e5cd3 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -1,48 +1,56 @@ -#include #include #include -#include "fileio.h" -#include "global.h" -/* Loads a binary file located at filePath to memory, taking up a block of exactly memorySize bytes, - and returns the starting address of the data. If memorySize is insufficient to store the entire file, - an appropriate error is reported. Excess memory is set to 0 bit values. */ +#define MAX_ASM_LINE_LENGTH 100 -byte *fileio_loadBin(const char *filePath, size_t memorySize) { - FILE *file = fopen(filePath, "rb"); - if (file == NULL) { - fprintf(stderr, "Couldn't open %s!\n", filePath); - exit(EXIT_FAILURE); - } +int isValidFileFormat(char filename[], char expectedExtension[]){ + int *pointLoc = strrchr(filename, '.'); - byte *fileData = malloc(memorySize); - if (fileData == NULL) { - fprintf(stderr, "Ran out of memory attempting to load %s!\n", filePath); - exit(EXIT_FAILURE); - } - - // Loop while reading from the file yields data. Only terminates if EOF is reached or ERROR occurs. - // Explicitly deal with attempting to write too much data to memory block, rather than allow segfault. - const size_t byteCount = memorySize/sizeof(byte); - int i = 0; - while (fread(fileData + i, sizeof(byte), 1, file)) { - if (i >= byteCount) { - fprintf(stderr, "Attempting to load binary %s to memory of smaller size %zu!\n", filePath, memorySize); - exit(EXIT_FAILURE); + if(pointLoc != NULL){ + if(strcmp(pointLoc, expectedExtension)==0){ + return(1); } - - i++; } - - if (ferror(file)) { - fprintf(stderr, "Encountered error attempting to read %s!\n", filePath); - exit(EXIT_FAILURE); - } - assert(fclose(file) != EOF); - - // If part of memory block was left uninitialized, initialize it to zero. - if (i < byteCount) { - memset(fileData + i, 0, (byteCount - i) * sizeof(byte)); - } - return fileData; + return(0); +} + +int writeBinaryFile(word instrs[], char outputFile[]){ + + if (!isValidFileFormat(filename, "bin")){ + return(-1); + } + + FILE *fp; + + fp = fopen(outputFile, "wb"); + + if(fp == NULL){ + return(-1); + } + + fwrite(instrs, 4, sizeof(instrs), fp); + fclose(fp); + + return(0); +} + +int readAssemblyFile(char inputFile[]) { + if (!isValidFileFormat(filename, "s")){ + return(1); + } + + FILE *fp; + char savedLine[MAX_ASM_LINE_LENGTH]; + + fp = fopen(inputFile, "r"); + + if(fp == NULL){ + return(-1); + } + + while (fgets(savedLine, MAX_ASM_LINE_LENGTH-1, fp) != NULL) { + //pass line to parser + } + + return(0); } From 647f47e39d4ecf6019bd992a37e8c286048ba9e4 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Tue, 11 Jun 2024 21:16:47 +0100 Subject: [PATCH 046/113] rewrite fileio to load file into memory --- src/fileio.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/src/fileio.c b/src/fileio.c index 96e5cd3..32cfdb2 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -1,7 +1,7 @@ #include #include -#define MAX_ASM_LINE_LENGTH 100 +#define MAX_ASM_LINE_LENGTH 30 int isValidFileFormat(char filename[], char expectedExtension[]){ int *pointLoc = strrchr(filename, '.'); @@ -34,23 +34,47 @@ int writeBinaryFile(word instrs[], char outputFile[]){ return(0); } -int readAssemblyFile(char inputFile[]) { +char **readAssemblyFile(char inputFile[]) { if (!isValidFileFormat(filename, "s")){ - return(1); + return(NULL); } - FILE *fp; - char savedLine[MAX_ASM_LINE_LENGTH]; + FILE *fp = fopen(inputFile, "r"); - fp = fopen(inputFile, "r"); + if (fp == NULL){ + return(NULL); + } - if(fp == NULL){ - return(-1); + int lineCount = 0; + char ch; + while ((ch = fgetc(fp)) != EOF) + { + if (ch == '\n' || ch == '\0') + { + count++; + } } - while (fgets(savedLine, MAX_ASM_LINE_LENGTH-1, fp) != NULL) { - //pass line to parser + char **heap = malloc(sizeof(char *) * count); + + rewind(fp); + + for( int i=0; i Date: Tue, 11 Jun 2024 21:23:26 +0100 Subject: [PATCH 047/113] fix syntax errors in fileio.c --- src/assemble.c | 2 +- src/fileio.c | 20 ++++++++++---------- src/twopassassembly.c | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/assemble.c b/src/assemble.c index aa54b4e..4ed2733 100755 --- a/src/assemble.c +++ b/src/assemble.c @@ -1,8 +1,8 @@ #include #include #include "parser.c" +#include "fileio.c" int main(int argc, char **argv) { - return EXIT_SUCCESS; } diff --git a/src/fileio.c b/src/fileio.c index 32cfdb2..85fd8d1 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -4,7 +4,7 @@ #define MAX_ASM_LINE_LENGTH 30 int isValidFileFormat(char filename[], char expectedExtension[]){ - int *pointLoc = strrchr(filename, '.'); + char *pointLoc = strrchr(filename, '.'); if(pointLoc != NULL){ if(strcmp(pointLoc, expectedExtension)==0){ @@ -14,9 +14,9 @@ int isValidFileFormat(char filename[], char expectedExtension[]){ return(0); } -int writeBinaryFile(word instrs[], char outputFile[]){ +int writeBinaryFile(word instrs[], char outputFile[], int numInstrs){ - if (!isValidFileFormat(filename, "bin")){ + if (!isValidFileFormat(outputFile, "bin")){ return(-1); } @@ -28,14 +28,14 @@ int writeBinaryFile(word instrs[], char outputFile[]){ return(-1); } - fwrite(instrs, 4, sizeof(instrs), fp); + fwrite(instrs, 4, sizeof(word) * numInstrs, fp); fclose(fp); return(0); } char **readAssemblyFile(char inputFile[]) { - if (!isValidFileFormat(filename, "s")){ + if (!isValidFileFormat(inputFile, "s")){ return(NULL); } @@ -51,24 +51,24 @@ char **readAssemblyFile(char inputFile[]) { { if (ch == '\n' || ch == '\0') { - count++; + lineCount++; } } - char **heap = malloc(sizeof(char *) * count); + char **heap = malloc(sizeof(char *) * lineCount); rewind(fp); - for( int i=0; i Date: Tue, 11 Jun 2024 23:02:26 +0100 Subject: [PATCH 048/113] Added all instruction in subfolder a64instruction --- src/{ => a64instruction}/a64instruction.h | 0 src/{ => a64instruction}/a64instruction_Branch.h | 2 +- src/{ => a64instruction}/a64instruction_DP.h | 0 src/{ => a64instruction}/a64instruction_DPImmediate.h | 0 src/{ => a64instruction}/a64instruction_DPRegister.h | 0 src/{ => a64instruction}/a64instruction_Directive.h | 2 +- src/{ => a64instruction}/a64instruction_Label.h | 0 src/{ => a64instruction}/a64instruction_SingleTransfer.h | 2 +- src/{ => a64instruction}/a64instruction_global.h | 0 src/assemble.c | 4 ++-- src/decode.h | 2 +- src/emulate.c | 2 +- src/execute.h | 2 +- src/parser.c | 2 +- src/twopassassembly.c | 2 +- 15 files changed, 10 insertions(+), 10 deletions(-) rename src/{ => a64instruction}/a64instruction.h (100%) rename src/{ => a64instruction}/a64instruction_Branch.h (97%) rename src/{ => a64instruction}/a64instruction_DP.h (100%) rename src/{ => a64instruction}/a64instruction_DPImmediate.h (100%) rename src/{ => a64instruction}/a64instruction_DPRegister.h (100%) rename src/{ => a64instruction}/a64instruction_Directive.h (71%) rename src/{ => a64instruction}/a64instruction_Label.h (100%) rename src/{ => a64instruction}/a64instruction_SingleTransfer.h (98%) rename src/{ => a64instruction}/a64instruction_global.h (100%) diff --git a/src/a64instruction.h b/src/a64instruction/a64instruction.h similarity index 100% rename from src/a64instruction.h rename to src/a64instruction/a64instruction.h diff --git a/src/a64instruction_Branch.h b/src/a64instruction/a64instruction_Branch.h similarity index 97% rename from src/a64instruction_Branch.h rename to src/a64instruction/a64instruction_Branch.h index b732d6d..2e525bb 100644 --- a/src/a64instruction_Branch.h +++ b/src/a64instruction/a64instruction_Branch.h @@ -1,6 +1,6 @@ #include #include "a64instruction_global.h" -#include "global.h" +#include "../global.h" typedef enum { a64inst_UNCONDITIONAL = 0, diff --git a/src/a64instruction_DP.h b/src/a64instruction/a64instruction_DP.h similarity index 100% rename from src/a64instruction_DP.h rename to src/a64instruction/a64instruction_DP.h diff --git a/src/a64instruction_DPImmediate.h b/src/a64instruction/a64instruction_DPImmediate.h similarity index 100% rename from src/a64instruction_DPImmediate.h rename to src/a64instruction/a64instruction_DPImmediate.h diff --git a/src/a64instruction_DPRegister.h b/src/a64instruction/a64instruction_DPRegister.h similarity index 100% rename from src/a64instruction_DPRegister.h rename to src/a64instruction/a64instruction_DPRegister.h diff --git a/src/a64instruction_Directive.h b/src/a64instruction/a64instruction_Directive.h similarity index 71% rename from src/a64instruction_Directive.h rename to src/a64instruction/a64instruction_Directive.h index da36624..5506b80 100644 --- a/src/a64instruction_Directive.h +++ b/src/a64instruction/a64instruction_Directive.h @@ -1,4 +1,4 @@ -#include "global.h" +#include "../global.h" typedef struct { word value; diff --git a/src/a64instruction_Label.h b/src/a64instruction/a64instruction_Label.h similarity index 100% rename from src/a64instruction_Label.h rename to src/a64instruction/a64instruction_Label.h diff --git a/src/a64instruction_SingleTransfer.h b/src/a64instruction/a64instruction_SingleTransfer.h similarity index 98% rename from src/a64instruction_SingleTransfer.h rename to src/a64instruction/a64instruction_SingleTransfer.h index f661116..cbd7529 100644 --- a/src/a64instruction_SingleTransfer.h +++ b/src/a64instruction/a64instruction_SingleTransfer.h @@ -1,6 +1,6 @@ #include #include "a64instruction_global.h" -#include "global.h" +#include "../global.h" typedef enum { a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER = 1, diff --git a/src/a64instruction_global.h b/src/a64instruction/a64instruction_global.h similarity index 100% rename from src/a64instruction_global.h rename to src/a64instruction/a64instruction_global.h diff --git a/src/assemble.c b/src/assemble.c index 4ed2733..856a097 100755 --- a/src/assemble.c +++ b/src/assemble.c @@ -1,7 +1,7 @@ #include #include -#include "parser.c" -#include "fileio.c" +#include "parser.h" +#include "fileio.h" int main(int argc, char **argv) { return EXIT_SUCCESS; diff --git a/src/decode.h b/src/decode.h index 132130e..d509ae1 100644 --- a/src/decode.h +++ b/src/decode.h @@ -1,5 +1,5 @@ #include "global.h" -#include "a64instruction.h" +#include "a64instruction/a64instruction.h" #define HALT_WORD 0x8a000000 diff --git a/src/emulate.c b/src/emulate.c index be41f56..4c7f42e 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -1,6 +1,6 @@ #include #include -#include "a64instruction.h" +#include "a64instruction/a64instruction.h" #include "emulator.h" #include "fileio.h" #include "global.h" diff --git a/src/execute.h b/src/execute.h index fcf39ec..8b691e6 100644 --- a/src/execute.h +++ b/src/execute.h @@ -1,6 +1,6 @@ #ifndef __EXECUTE__ #define __EXECUTE__ -#include "a64instruction.h" +#include "a64instruction/a64instruction.h" #include "emulator.h" void execute(Machine *state, a64inst_instruction *inst); diff --git a/src/parser.c b/src/parser.c index fcef57b..807f591 100644 --- a/src/parser.c +++ b/src/parser.c @@ -3,7 +3,7 @@ #include #include "parser.h" -#include "a64instruction.h" +#include "a64instruction/a64instruction.h" //takes input string, read from asm file and returns //input as an a64 instruction diff --git a/src/twopassassembly.c b/src/twopassassembly.c index be94e5c..7f13f95 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -1,5 +1,5 @@ #include "global.h" -#include "a64instruction.h" +#include "a64instruction/a64instruction.h" #include "symboltable.h" //generates assembled code based on two pass assembly method From 31c1ae90f7109947460c64023c889e5d6616f02d Mon Sep 17 00:00:00 2001 From: sBubshait Date: Tue, 11 Jun 2024 23:13:23 +0100 Subject: [PATCH 049/113] Attempt to make code compile, fix syntax errors --- src/parser.c | 1 + src/symboltable.c | 48 +++++++++++++++++++++++++++++++++++++++++++ src/twopassassembly.c | 20 +++++++++--------- 3 files changed, 59 insertions(+), 10 deletions(-) create mode 100644 src/symboltable.c diff --git a/src/parser.c b/src/parser.c index 807f591..8cb6609 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,4 +1,5 @@ #include +#include #include #include #include "parser.h" diff --git a/src/symboltable.c b/src/symboltable.c new file mode 100644 index 0000000..a4c99e1 --- /dev/null +++ b/src/symboltable.c @@ -0,0 +1,48 @@ +#include + +typedef struct st st; +typedef struct node node; // forward declaration + +typedef struct node { + const void* key; + void* value; + node* prev; + node* next; +} node; + +struct st { + node* head; + node* tail; +}; + +// add new node to the end +void st_add(st table, void* key, void* value) { + node n = {key, value, table.tail}; + if (table.head == NULL) { + table.head = &n; + table.tail = &n; + } + else { + (*(table.tail)).next = &n; + table.tail = &n; + } +} + +void* nodeSearch(node* n, void* key) { + if (n != NULL) { + if ((*n).key == key) { + return (*n).value; + } + else { + return nodeSearch((*n).next, key); + } + } + else { + return NULL; + } +} + +// returns the pointer to key of the specified node, or null, if it does not exist +void* st_search(st table, void* key) { + return nodeSearch(table.head, key); +} diff --git a/src/twopassassembly.c b/src/twopassassembly.c index 7f13f95..b5dccae 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -1,6 +1,6 @@ #include "global.h" #include "a64instruction/a64instruction.h" -#include "symboltable.h" +#include "symboltable.c" //generates assembled code based on two pass assembly method @@ -12,7 +12,7 @@ word assembleBranch(a64inst_instruction *instr){ case a64inst_UNCONDITIONAL: //000101 //25-0: sign extended simm26 - binInstr += instr->data.processOpData.unconditionalOffset; + binInstr += instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset; break; case a64inst_REGISTER: //10000 @@ -20,14 +20,14 @@ word assembleBranch(a64inst_instruction *instr){ //000000 //9-5: address from register //0000 - binInstr += ((instr->processOpData.src)^5); + binInstr += ((instr->data.BranchData.processOpData.registerData.src)^5); break; case a64inst_CONDITIONAL: // 01010100 // 25-5: sign extended offset // 4-0: 0{condition} - binInstr += ((instr->processOpData.offset)^5); - binInstr += instr->processOpData.cond; + binInstr += ((instr->data.BranchData.processOpData.conditionalData.offset)^5); + binInstr += instr->data.BranchData.processOpData.conditionalData.cond; break; default: break; @@ -49,7 +49,7 @@ st* firstPass(a64inst_instruction instrs[], int numInstrs){ } return &table; } -word assembleDPI(a64inst_instruction cI) { +word dpi(a64inst_instruction cI) { word out = 0; a64inst_DPImmediateData data = cI.data.DPImmediateData; //sf @@ -78,7 +78,7 @@ word assembleDPI(a64inst_instruction cI) { return out; } -word assembleDPR(a64inst_instruction cI) { +word dpr(a64inst_instruction cI) { word out = 0; a64inst_DPRegisterData data = cI.data.DPRegisterData; // sf @@ -131,7 +131,7 @@ word assembleDPR(a64inst_instruction cI) { return out; } -word assembleSTS(a64inst_instruction cI) { +word sts(a64inst_instruction cI) { a64inst_SingleTransferData data = cI.data.SingleTransferData; word out = 0; a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData; @@ -166,7 +166,7 @@ word assembleSTS(a64inst_instruction cI) { return out; } -word assembleLDL(a64inst_instruction cI) { +word ldl(a64inst_instruction cI) { word out = 3*(2^27); a64inst_SingleTransferData data = cI.data.SingleTransferData; int sf = data.regType; @@ -216,7 +216,7 @@ void secondPass(a64inst_instruction instrs[], int numInstrs, st* table, word arr lbl++; break; case a64inst_BRANCH: - arr[index] = assembleBranch(&cI, table, lbl); + arr[index] = assembleBranch(&cI); index++; default: break; From 999f36facd4dd95bacf730a7b9b7357381126669 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Tue, 11 Jun 2024 23:19:04 +0100 Subject: [PATCH 050/113] Fix bugs to make code compile. Code now compiles --- src/symboltable.h | 49 ----------------- src/twopassassembly.c | 121 +++++++++++++++++++++--------------------- 2 files changed, 60 insertions(+), 110 deletions(-) delete mode 100644 src/symboltable.h diff --git a/src/symboltable.h b/src/symboltable.h deleted file mode 100644 index cd2037c..0000000 --- a/src/symboltable.h +++ /dev/null @@ -1,49 +0,0 @@ -#include - -typedef struct st st; - - - -typedef struct { - const void* key; - void* value; - node* prev; - node* next; -} node; - -struct st { - node* head; - node* tail; -}; - -// add new node to the end -void st_add(st table, void* key, void* value) { - node n = {key, value, table.tail}; - if (table.head == NULL) { - table.head = &n; - table.tail = &n; - } - else { - (*(table.tail)).next = &n; - table.tail = &n; - } -} - -// returns the pointer to key of the specified node, or null, if it does not exist -void* st_search(st table, void* key) { - return nodeSearch(table.head, key); -} - -void* nodeSearch(node* n, void* key) { - if (n != NULL) { - if ((*n).key == key) { - return (*n).value; - } - else { - return nodeSearch((*n).next, key); - } - } - else { - return NULL; - } -} \ No newline at end of file diff --git a/src/twopassassembly.c b/src/twopassassembly.c index b5dccae..e6899d5 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -1,32 +1,33 @@ #include "global.h" #include "a64instruction/a64instruction.h" #include "symboltable.c" -//generates assembled code based on two pass assembly method +#include +#include +// Generates assembled code based on the two-pass assembly method -word assembleBranch(a64inst_instruction *instr){ +word assembleBranch(a64inst_instruction *instr) { word binInstr = 0; - binInstr += (5^28); //101 start of branch instr - switch (instr->data.BranchData.BranchType) - { + binInstr += (5 << 28); // 101 start of branch instr + switch (instr->data.BranchData.BranchType) { case a64inst_UNCONDITIONAL: - //000101 - //25-0: sign extended simm26 + // 000101 + // 25-0: sign extended simm26 binInstr += instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset; break; case a64inst_REGISTER: - //10000 - //11111 - //000000 - //9-5: address from register - //0000 - binInstr += ((instr->data.BranchData.processOpData.registerData.src)^5); + // 10000 + // 11111 + // 000000 + // 9-5: address from register + // 0000 + binInstr += ((instr->data.BranchData.processOpData.registerData.src) << 5); break; case a64inst_CONDITIONAL: - // 01010100 + // 01010100 // 25-5: sign extended offset // 4-0: 0{condition} - binInstr += ((instr->data.BranchData.processOpData.conditionalData.offset)^5); + binInstr += ((instr->data.BranchData.processOpData.conditionalData.offset) << 5); binInstr += instr->data.BranchData.processOpData.conditionalData.cond; break; default: @@ -35,43 +36,43 @@ word assembleBranch(a64inst_instruction *instr){ return binInstr; } -st* firstPass(a64inst_instruction instrs[], int numInstrs){ - //TODO: +st* firstPass(a64inst_instruction instrs[], int numInstrs) { + // TODO: // -iterate over instructions, adding to symbol table // create symbol table and map labels to addresses/lines - struct st table; - for(int i=0; i Date: Tue, 11 Jun 2024 23:46:40 +0100 Subject: [PATCH 051/113] Add overall assemble structure --- src/assemble.c | 26 ++++++++++++++++++++++++++ src/fileio.c | 2 ++ src/fileio.h | 4 +++- src/parser.c | 22 ++++++++++++++++------ src/parser.h | 4 ++++ src/twopassassembly.c | 5 +++-- 6 files changed, 54 insertions(+), 9 deletions(-) diff --git a/src/assemble.c b/src/assemble.c index 856a097..967b02e 100755 --- a/src/assemble.c +++ b/src/assemble.c @@ -1,8 +1,34 @@ #include #include +#include "a64instruction/a64instruction.h" #include "parser.h" #include "fileio.h" +#include "parser.h" +#include "twopassassembly.c" int main(int argc, char **argv) { + // Check the arguments + if (argc < 3) { + fprintf(stderr, "Error: A source file and an object output file are required. Syntax: ./assemble "); + return EXIT_FAILURE; + } + + // Load the source file into memory + char **source = readAssemblyFile(argv[1]); + + // Parse the source file + a64inst_instruction *instructions = parse(source); + + // First Pass: Create the symbol table + st *table = firstPass(instructions, 1000); // 1000 is just a temp fix. + + // Second Pass: Assemble the instructions + word *binary = secondPass(instructions, 1000, table); // 1000 is just a temp fix. + + // Write the binary to the output file + writeBinaryFile(binary, argv[2], 1000); // 1000 is just a temp fix. + + /* TODO: FREE MEMORY!! */ + return EXIT_SUCCESS; } diff --git a/src/fileio.c b/src/fileio.c index 85fd8d1..8be2e38 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -1,5 +1,7 @@ #include #include +#include "global.h" +#include "fileio.h" #define MAX_ASM_LINE_LENGTH 30 diff --git a/src/fileio.h b/src/fileio.h index a2d4262..88e9cca 100644 --- a/src/fileio.h +++ b/src/fileio.h @@ -5,5 +5,7 @@ #define EXIT_FAILURE 1 -extern byte *fileio_loadBin(const char *filePath, size_t memorySize); +char **readAssemblyFile(char inputFile[]); +int writeBinaryFile(word instrs[], char outputFile[], int numInstrs); + #endif diff --git a/src/parser.c b/src/parser.c index 8cb6609..a66b6f3 100644 --- a/src/parser.c +++ b/src/parser.c @@ -3,7 +3,6 @@ #include #include #include "parser.h" - #include "a64instruction/a64instruction.h" //takes input string, read from asm file and returns @@ -188,16 +187,15 @@ void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOp //takes inputted assembly line and returns a //pointer to an abstract representation of the instruction -a64inst_instruction *parser(char asmLine[]){ +void parser_instruction(char asmLine[], a64inst_instruction *instr) { int numOperands = 0; - a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); if (instr == NULL){ exit(EXIT_FAILURE); } if(strcmp(asmLine, HALT_ASM_CMD) == 0){ instr->type = a64inst_HALT; - return(instr); + return; } //"opcode operand1, {operand2}, ..." @@ -252,7 +250,19 @@ a64inst_instruction *parser(char asmLine[]){ } - return(instr); - } +// Takes an array of strings, each string representing an assembly instruction. +// Returns an array of a64inst_instruction pointers, each representing an instruction. +// Note. The array of strings must be NULL-terminated???? +a64inst_instruction *parse(char **asmLines) { + a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * 1000); + + int i = 0; + while (asmLines[i] != NULL) { + parser_instruction(asmLines[i], &instructions[i]); + i++; + } + + return instructions; +} diff --git a/src/parser.h b/src/parser.h index e303b58..1f7ab70 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,2 +1,6 @@ +#include "a64instruction/a64instruction.h" + #define OPERAND_DELIMITER ", " #define HALT_ASM_CMD "and x0, x0, x0" + +a64inst_instruction *parse(char **asmLines); diff --git a/src/twopassassembly.c b/src/twopassassembly.c index e6899d5..b4ecdec 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -178,11 +178,12 @@ word ldl(a64inst_instruction cI) { return out; } -void secondPass(a64inst_instruction instrs[], int numInstrs, st* table, word arr[]) { +word *secondPass(a64inst_instruction instrs[], int numInstrs, st* table) { // TODO: // iterate over instructions again, this time replacing labels // with values from symbol table // after a line has had all the values replaced, assemble it and append + word *arr = (word*)malloc(sizeof(word) * numInstrs); int index = 0; for (int i = 0; i < numInstrs; i++) { a64inst_instruction cI = instrs[i]; @@ -221,5 +222,5 @@ void secondPass(a64inst_instruction instrs[], int numInstrs, st* table, word arr break; } } - return; + return arr; } From 17d31a74e38bc73c860ee858363875b72461e364 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Tue, 11 Jun 2024 23:49:40 +0100 Subject: [PATCH 052/113] Add a working Makefile for testing purposes --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 7106e44..2fe219c 100755 --- a/src/Makefile +++ b/src/Makefile @@ -9,7 +9,7 @@ CFLAGS ?= -std=c17 -g\ all: assemble emulate -assemble: assemble.o +assemble: assemble.o parser.o fileio.o emulate: emulate.o clean: From 269a150926f36b17c60bdb5111288f2c412cee72 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Wed, 12 Jun 2024 00:49:25 +0100 Subject: [PATCH 053/113] Fix types, signatures, and arguments. --- src/assemble.c | 11 +- src/execute.c | 448 ------------------------------------------------- src/parser.c | 15 +- src/parser.h | 2 +- 4 files changed, 15 insertions(+), 461 deletions(-) delete mode 100644 src/execute.c diff --git a/src/assemble.c b/src/assemble.c index 967b02e..ef1b06d 100755 --- a/src/assemble.c +++ b/src/assemble.c @@ -14,19 +14,20 @@ int main(int argc, char **argv) { } // Load the source file into memory - char **source = readAssemblyFile(argv[1]); + int lineCount = countLines(argv[1]); + char **source = readAssemblyFile(argv[1], lineCount); // Parse the source file - a64inst_instruction *instructions = parse(source); + a64inst_instruction *instructions = parse(source, lineCount); // First Pass: Create the symbol table - st *table = firstPass(instructions, 1000); // 1000 is just a temp fix. + st *table = firstPass(instructions, lineCount); // Second Pass: Assemble the instructions - word *binary = secondPass(instructions, 1000, table); // 1000 is just a temp fix. + word *binary = secondPass(instructions, lineCount, table); // 1000 is just a temp fix. // Write the binary to the output file - writeBinaryFile(binary, argv[2], 1000); // 1000 is just a temp fix. + writeBinaryFile(binary, argv[2], lineCount); // 1000 is just a temp fix. /* TODO: FREE MEMORY!! */ diff --git a/src/execute.c b/src/execute.c deleted file mode 100644 index fc436af..0000000 --- a/src/execute.c +++ /dev/null @@ -1,448 +0,0 @@ -#include -#include -#include "execute.h" -#include "print.h" - -// Defines the maximum value that can be held in a register -#define MAX_REG_VAL ((1 << DWORD_BITS) - 1) - -// The number of bits to shift the immediate value in an arithmetic immediate data processing -// instruction if the shift flag is enabled. -#define DPI_ARITHM_SHIFT 12 - -// The number of bits to shift the immediate value in a wide move immediate data processing -// instruction if the shift flag is enabled. -#define DPI_WIDEMOV_SHIFT 16 - -// Prototypes -void execute_SDT(Machine *state, a64inst_instruction *inst); -void execute_Branch(Machine *state, a64inst_instruction *inst); -void executeMultiply(Machine *state, a64inst_instruction *inst); - -// Return maximum of two dwords -static dword max(dword a, dword b) { - return a > b ? a : b; -} - -// Truncate a given value to the size of a word or dword depending on the register type -static dword truncateValue(dword value, a64inst_regType regType) { - if (regType == a64inst_X) { - return value; - } else { - return (word)value; - //return value & (dword)(((dword)1 << WORD_BITS) - 1); - } -} - -// Sign extend a given value to a 64-bit signed integer given the number of bits -static int64_t signExtend(dword value, unsigned int n) { - if (n == 0 || n >= 64) { - // If n_bits is 0 or greater than or equal to 64, return the value as is - return (int64_t)value; - } - - uint64_t sign_bit_mask = (uint64_t)1 << (n - 1); - - // Mask to isolate the n-bit value - uint64_t n_bit_mask = (sign_bit_mask << 1) - 1; - - // Check if the sign bit is set - if (value & sign_bit_mask) { - // Sign bit is set, extend the sign - return (int64_t)(value | ~n_bit_mask); - } else { - // Sign bit is not set, return the value as is - return (int64_t)(value & n_bit_mask); - } -} - -// Read from processor register, ensuring that a valid register specifier is given -// and accounting for the case where the zero register is accessed. Truncate -// the 32 most significant bits stored in the R register when reading W register. -static dword readRegister(Machine *state, a64inst_regSpecifier reg, a64inst_regType regType) { - assert(reg <= REGISTER_COUNT); - if (reg == ZERO_REGISTER) { - return 0; - } else { - return truncateValue(state->registers[reg], regType); - } -} - -// TODO: - -// Write to a processor register, ensuring that a valid register specifier is given -// and truncating the value being written when it can't fit in the specified register -static void writeRegister(Machine *state, a64inst_regSpecifier reg, a64inst_regType regType, dword value) { - assert(reg <= REGISTER_COUNT); - if (reg != ZERO_REGISTER) { - state->registers[reg] = truncateValue(value, regType); - } -} - -// Returns the position of the MSB of the given register type -inline static dword getMSBPos(a64inst_regType regType) { - return (regType ? DWORD_BITS : WORD_BITS) - 1; -} - -// Returns the MSB of the given value assuming it's of the size stored in the given register type -inline static uint8_t getMSB(dword value, a64inst_regType regType) { - return value >> getMSBPos(regType); -} - -// Updates N and Z condition codes given the machine and a result value -static void updateCondNZ(Machine *state, dword result, a64inst_regType regType) { - state->conditionCodes.Negative = getMSB(result, regType); - state->conditionCodes.Zero = result == 0; -} - -// Execute a data processing immediate instruction -static void executeDPImmediate(Machine *state, a64inst_instruction *inst) { - assert(inst->type == a64inst_DPIMMEDIATE); - - a64inst_regType regType = inst->data.DPImmediateData.regType; - a64inst_regSpecifier dest = inst->data.DPImmediateData.dest; - switch(inst->data.DPImmediateData.DPIOpType) { - - // Execute an arithmetic immediate data processing instruction - case a64inst_DPI_ARITHM:; - - // If shift flag is enabled, logical left shift by the number of bits specified by the architecture - dword arithmImm = inst->data.DPImmediateData.processOpData.arithmData.immediate; - dword srcVal = state->registers[inst->data.DPImmediateData.processOpData.arithmData.src]; - if (inst->data.DPImmediateData.processOpData.arithmData.shiftImmediate) { - arithmImm = truncateValue(arithmImm << DPI_ARITHM_SHIFT, regType); - } - - switch(inst->data.DPImmediateData.processOp) { - - dword result; - case(a64inst_ADDS): - result = srcVal + arithmImm; - writeRegister(state, dest, regType, result); - - updateCondNZ(state, result, regType); - state->conditionCodes.Overflow = max(srcVal, arithmImm) > result; - state->conditionCodes.Carry = state->conditionCodes.Overflow; - break; - - case(a64inst_ADD): - writeRegister(state, dest, regType, srcVal + arithmImm); - break; - - case(a64inst_SUBS): - result = srcVal - arithmImm; - writeRegister(state, dest, regType, result); - - updateCondNZ(state, result, regType); - state->conditionCodes.Overflow = srcVal < result; - state->conditionCodes.Carry = state->conditionCodes.Overflow; - break; - - case(a64inst_SUB): - writeRegister(state, dest, regType, srcVal - arithmImm); - break; - - // Unknown opcode detected! - default: - fprintf(stderr, "Unknown opcode detected in a DPI arithmetic instruction!\n"); - break; - } - break; - - // Execute a wide move immediate data processing instruction - case a64inst_DPI_WIDEMOV:; - uint8_t shiftScalar = inst->data.DPImmediateData.processOpData.wideMovData.shiftScalar; - dword wideMovImm = inst->data.DPImmediateData.processOpData.wideMovData.immediate; - - // NOTE: Not checking that shiftScalar has valid value for 32bit registers. Possibly add explicit error. - //printf("%x\n", wideMovImm << (shiftScalar * DPI_WIDEMOV_SHIFT) & ); - wideMovImm = truncateValue(wideMovImm << (shiftScalar * DPI_WIDEMOV_SHIFT), regType); - switch(inst->data.DPImmediateData.processOp) { - - case(a64inst_MOVN): - writeRegister(state, dest, regType, ~wideMovImm); - break; - - case(a64inst_MOVZ): - writeRegister(state, dest, regType, wideMovImm); - break; - - case(a64inst_MOVK):; - dword result = readRegister(state, dest, regType); - result = (result & ~(((1lu << DPI_WIDEMOV_SHIFT) - 1) << shiftScalar * DPI_WIDEMOV_SHIFT)) | wideMovImm; - writeRegister(state, dest, regType, result); - break; - - default: - fprintf(stderr, "Unknown opcode detected in a DPI wide move instruction!\n"); - break; - } - break; - - // Unknown instruction detected! - default: - fprintf(stderr, "Attempting to execute instruction with unknown DPI operand type!\n"); - break; - } -} - -// Execute a data processing register instruction -static void executeDPRegister(Machine *state, a64inst_instruction *inst) { - assert(inst->type == a64inst_DPREGISTER); - - a64inst_regType regType = inst->data.DPRegisterData.regType; - a64inst_regSpecifier dest = inst->data.DPRegisterData.dest; - dword src1Val = readRegister(state, inst->data.DPRegisterData.src1, regType); - dword src2Val = readRegister(state, inst->data.DPRegisterData.src2, regType); - - switch(inst->data.DPRegisterData.DPROpType) { - - // Execute an arithmetic or logic register data processing instruction - case a64inst_DPR_ARITHMLOGIC:; - - // Apply shift to value held in second register - a64inst_DPRegister_ArithmLogicData *arithmLogicData = &inst->data.DPRegisterData.processOpData.arithmLogicData; - uint8_t shiftAmount = arithmLogicData->shiftAmount; - switch(arithmLogicData->shiftType) { - - case a64inst_LSL: - src2Val = truncateValue(src2Val << shiftAmount, regType); - break; - - case a64inst_LSR: - src2Val = truncateValue(src2Val >> shiftAmount, regType); - break; - - case a64inst_ASR: - if (regType == a64inst_X) { - src2Val = truncateValue((int64_t)src2Val >> shiftAmount, regType); - } else { - src2Val = truncateValue((int32_t)src2Val >> shiftAmount, regType); - } - break; - - case a64inst_ROR: - if (arithmLogicData->type != a64inst_DPR_LOGIC) { - fprintf(stderr, "Attempting to perform ROR shift on non-logic register data processing instruction!\n"); - } - src2Val = truncateValue(src2Val >> shiftAmount | src2Val << (getMSBPos(regType) - shiftAmount), regType); - break; - - default: - fprintf(stderr, "Attempting to execute arithmetic/logic register data processing instruction with invalid shift type!\n"); - break; - } - - // Negate second operand if negShiftedSrc2 flag is enabled - if (arithmLogicData->negShiftedSrc2) { - src2Val = truncateValue(~src2Val, regType); - } - - dword result; - switch(arithmLogicData->type) { - - case a64inst_DPR_ARITHM: - switch(inst->data.DPRegisterData.processOp) { - - case(a64inst_ADDS): - result = src1Val + src2Val; - writeRegister(state, dest, regType, result); - - updateCondNZ(state, result, regType); - state->conditionCodes.Overflow = max(src1Val, src2Val) > result; - state->conditionCodes.Carry = state->conditionCodes.Overflow; - break; - - case(a64inst_ADD): - writeRegister(state, dest, regType, src1Val + src2Val); - break; - - case(a64inst_SUBS): - result = src1Val - src2Val; - writeRegister(state, dest, regType, result); - - updateCondNZ(state, result, regType); - state->conditionCodes.Overflow = getMSB(src1Val, regType) != getMSB(src2Val, regType) && getMSB(src1Val, regType) != getMSB(result, regType); - state->conditionCodes.Carry = src1Val >= src2Val; - break; - - case(a64inst_SUB): - writeRegister(state, dest, regType, src1Val - src2Val); - break; - - // Unknown opcode detected! - default: - fprintf(stderr, "Unknown opcode detected in a DPI arithmetic instruction!\n"); - break; - } - break; - - case a64inst_DPR_LOGIC: - switch(inst->data.DPRegisterData.processOp) { - - case a64inst_AND: - writeRegister(state, dest, regType, src1Val & src2Val); - break; - - case a64inst_OR: - writeRegister(state, dest, regType, src1Val | src2Val); - break; - - case a64inst_XOR: - writeRegister(state, dest, regType, src1Val ^ src2Val); - break; - - case a64inst_AND_FLAGGED:; - result = src1Val & src2Val; - writeRegister(state, dest, regType, result); - state->conditionCodes.Overflow = 0; - state->conditionCodes.Carry = 0; - updateCondNZ(state, result, regType); - break; - } - break; - - default: - fprintf(stderr, "Attempting to execute an instruction with an unknown DPR arithmetic or logic subtype!\n"); - break; - } - break; - - // Execute a multiply register data processing instruction - case a64inst_DPR_MULTIPLY: - break; - - // Unknown instruction detected! - default: - fprintf(stderr, "Attempting to execute instruction with unknown DPR operand type!\n"); - break; - } -} - -void execute(Machine *state, a64inst_instruction *inst) { - - switch (inst->type) { - - // Halt the program - case a64inst_HALT: - break; - - // Execute a data processing immediate instruction - case a64inst_DPIMMEDIATE: - executeDPImmediate(state, inst); - break; - - // Execute a branch instruction - case a64inst_BRANCH: - execute_Branch(state, inst); - break; - - // Execute a data processing register instruction - case a64inst_DPREGISTER: - if (inst->data.DPRegisterData.DPROpType == a64inst_DPR_MULTIPLY) - executeMultiply(state, inst); - else - executeDPRegister(state, inst); - break; - - case a64inst_SINGLETRANSFER: - execute_SDT(state, inst); - break; - - // Unknown instruction - default: - break; - } - -} - -void execute_SDT(Machine *state, a64inst_instruction *inst) { - word address; - bool isLoad; - if (inst->data.SingleTransferData.SingleTransferOpType == a64inst_SINGLE_TRANSFER_LOAD_LITERAL) { - // Load Literal - isLoad = true; - address = state->pc + inst->data.SingleTransferData.processOpData.loadLiteralData.offset * 4; - } else { - address = state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.base]; - isLoad = inst->data.SingleTransferData.processOpData.singleDataTransferData.transferType == a64inst_LOAD; - switch (inst->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode) { - case a64inst_UNSIGNED_OFFSET: - address += inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset * (inst->data.SingleTransferData.regType == a64inst_W ? 4 : 8); - break; - case a64inst_REGISTER_OFFSET: - address += state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg]; - break; - case a64inst_PRE_INDEXED: - address += inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset; - state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.base] = address; - break; - case a64inst_POST_INDEXED: - state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.base] = address + inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset; - break; - } - } - - if (isLoad) { - if (inst->data.SingleTransferData.regType == a64inst_W) { - // 32 bit access - state->registers[inst->data.SingleTransferData.target] = readWord(state->memory, address); - } else { - state->registers[inst->data.SingleTransferData.target] = readDoubleWord(state->memory, address); - } - } else { - *(word *)(state->memory + address) = state->registers[inst->data.SingleTransferData.target]; - - // Update base register if post indexed - if (inst->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode == a64inst_POST_INDEXED) { - writeRegister(state, inst->data.SingleTransferData.processOpData.singleDataTransferData.base, inst->data.SingleTransferData.regType == a64inst_W, address + inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset); - } - } - -} - -static bool isConditionMet(Machine* state, a64inst_ConditionType cond) { - switch(cond) { - case EQ: - return state->conditionCodes.Zero; - case NE: - return !state->conditionCodes.Zero; - case GE: - return state->conditionCodes.Negative == state->conditionCodes.Overflow; - case LT: - return state->conditionCodes.Negative != state->conditionCodes.Overflow; - case GT: - return !state->conditionCodes.Zero && (state->conditionCodes.Negative == state->conditionCodes.Overflow); - case LE: - return state->conditionCodes.Zero || (state->conditionCodes.Negative != state->conditionCodes.Overflow); - case AL: - return true; - default: - fprintf(stderr, "Unknown condition specified!\n"); - exit(1); - } -} - -void execute_Branch(Machine *state, a64inst_instruction *inst) { - switch (inst->data.BranchData.BranchType) { - case a64inst_UNCONDITIONAL: - state->pc += signExtend(inst->data.BranchData.processOpData.unconditionalData.unconditionalOffset * 4, 26); - break; - - case a64inst_REGISTER: - state->pc = state->registers[inst->data.BranchData.processOpData.registerData.src]; - break; - - case a64inst_CONDITIONAL: - if (isConditionMet(state, inst->data.BranchData.processOpData.conditionalData.cond)) { - state->pc += signExtend(inst->data.BranchData.processOpData.conditionalData.offset * 4, 19); - } - break; - } -} - -void executeMultiply(Machine *state, a64inst_instruction *inst) { - dword product = state->registers[inst->data.DPRegisterData.src1] * state->registers[inst->data.DPRegisterData.src2]; - dword sum = readRegister(state, inst->data.DPRegisterData.processOpData.multiplydata.summand, inst->data.DPRegisterData.regType) + (inst->data.DPRegisterData.processOpData.multiplydata.negProd ? -product : product); - writeRegister(state, inst->data.DPRegisterData.dest, inst->data.DPRegisterData.regType, sum); -} diff --git a/src/parser.c b/src/parser.c index a66b6f3..31b5ffc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -172,8 +173,9 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){ - char *operandsDupe = NULL; - operandsDupe = strcpy(operandsDupe, str); + assert(str != NULL); + char operandsDupe[strlen(str)+1]; + strcpy(operandsDupe, str); char *operand = strtok(operandsDupe, OPERAND_DELIMITER); operands[0] = operand; @@ -200,8 +202,8 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { //"opcode operand1, {operand2}, ..." //duplicated as strtok modifies the input string - char *stringptr = NULL; - stringptr = strcpy(stringptr, asmLine); + char stringptr[strlen(asmLine) + 1]; + strcpy(stringptr, asmLine); char *opcode = strtok(stringptr, " "); char *operands = strtok(NULL, ""); @@ -254,9 +256,8 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { // Takes an array of strings, each string representing an assembly instruction. // Returns an array of a64inst_instruction pointers, each representing an instruction. -// Note. The array of strings must be NULL-terminated???? -a64inst_instruction *parse(char **asmLines) { - a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * 1000); +a64inst_instruction *parse(char **asmLines, int lineCount) { + a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); int i = 0; while (asmLines[i] != NULL) { diff --git a/src/parser.h b/src/parser.h index 1f7ab70..2d7f382 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,4 +3,4 @@ #define OPERAND_DELIMITER ", " #define HALT_ASM_CMD "and x0, x0, x0" -a64inst_instruction *parse(char **asmLines); +a64inst_instruction *parse(char **asmLines, int lineCount); From 53ab6a2bf6e6313dda08822b24aaad40272a57be Mon Sep 17 00:00:00 2001 From: sBubshait Date: Wed, 12 Jun 2024 00:50:00 +0100 Subject: [PATCH 054/113] Update fileio, add countLine, rewrite read and write file funcs --- src/fileio.c | 110 +++++++++++++++++++++++++++------------------------ src/fileio.h | 6 ++- 2 files changed, 63 insertions(+), 53 deletions(-) diff --git a/src/fileio.c b/src/fileio.c index 8be2e38..f2f47fd 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -1,9 +1,8 @@ -#include #include #include "global.h" #include "fileio.h" -#define MAX_ASM_LINE_LENGTH 30 +#define MAX_ASM_LINE_LENGTH 300 int isValidFileFormat(char filename[], char expectedExtension[]){ char *pointLoc = strrchr(filename, '.'); @@ -16,67 +15,76 @@ int isValidFileFormat(char filename[], char expectedExtension[]){ return(0); } -int writeBinaryFile(word instrs[], char outputFile[], int numInstrs){ - - if (!isValidFileFormat(outputFile, "bin")){ - return(-1); +void writeBinaryFile(word instrs[], char outputFile[], int numInstrs) { + FILE *fp = fopen(outputFile, "wb"); + if (fp == NULL) { + fprintf(stderr, "Error: Could not open file %s\n", outputFile); + exit(EXIT_FAILURE); } - - FILE *fp; - fp = fopen(outputFile, "wb"); - - if(fp == NULL){ - return(-1); - } - - fwrite(instrs, 4, sizeof(word) * numInstrs, fp); + fwrite(instrs, sizeof(word), numInstrs, fp); fclose(fp); - - return(0); } -char **readAssemblyFile(char inputFile[]) { - if (!isValidFileFormat(inputFile, "s")){ - return(NULL); +int countLines(char *filename) { + FILE *file = fopen(filename, "r"); + if (file == NULL) { + fprintf(stderr, "Error: Could not read file %s\n", filename); + exit(EXIT_FAILURE); } - FILE *fp = fopen(inputFile, "r"); + int count = 0; + char c; - if (fp == NULL){ - return(NULL); - } - - int lineCount = 0; - char ch; - while ((ch = fgetc(fp)) != EOF) - { - if (ch == '\n' || ch == '\0') - { - lineCount++; + while ((c = fgetc(file)) != EOF) { + if (c == '\n') { + count++; } } - - char **heap = malloc(sizeof(char *) * lineCount); - rewind(fp); - - for( int i=0; i #include #include "global.h" #define EXIT_FAILURE 1 -char **readAssemblyFile(char inputFile[]); -int writeBinaryFile(word instrs[], char outputFile[], int numInstrs); +char **readAssemblyFile(char filename[], int lineCount); +void writeBinaryFile(word instrs[], char outputFile[], int numInstrs); +int countLines(char *filename); #endif From f32304afb7b6deec3b81354f84ced54ac47a3976 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 16:24:42 +0100 Subject: [PATCH 055/113] create helper function to generate number from operand --- src/parser.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/parser.c b/src/parser.c index 31b5ffc..97eef8d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -18,6 +18,14 @@ // - ASK ABOUT OFFSET CALCULATION // - CREATE FUNC TO TIDY UP OPERANDS IN DP +int getOperandNumber(char *operand){ + char *operandCpy = strcpy(operandCpy, operand); + operandCpy++; + char **endptr; + int number = strtol(operandCpy, endptr, 10); + return number; +} + int isOperandRegister(char *operand){ return((strcmp(&(operand[0]), "x")==0) || (strcmp(&(operand[0]), "w")==0)); } @@ -161,6 +169,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ //offset is literal, use symbol table and calculate difference } } + generateLoadStoreOperands(instr, opcode, operandList, numOperands); } else { if(classifyDPInst(operandList)){ From c6ff7e1c4e4405e10076a0335cb713250df0217f Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 16:29:29 +0100 Subject: [PATCH 056/113] comment getOperandNumber --- src/parser.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index 97eef8d..cd7c9ed 100644 --- a/src/parser.c +++ b/src/parser.c @@ -18,6 +18,8 @@ // - ASK ABOUT OFFSET CALCULATION // - CREATE FUNC TO TIDY UP OPERANDS IN DP +//takes inputted char array and returns the integer of the operand, skipping the first character +//e.g. for a passed "R32", it skips the 'R' and returns 32 int getOperandNumber(char *operand){ char *operandCpy = strcpy(operandCpy, operand); operandCpy++; @@ -159,11 +161,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->type = a64inst_LOADLITERAL; if(operandList[0][0] =='#'){ //offset is immediate - char *immOffset = NULL; - immOffset = strcpy(immOffset, operandList[0]); - immOffset++; - char *endptr = NULL; - int offset = strtol(immOffset, &endptr, 10); + int offset = getOperandNumber(operandList[0]); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { //offset is literal, use symbol table and calculate difference From 28b38f4a80b70c41e45fd46fdad886f1a17ed037 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:31:09 +0100 Subject: [PATCH 057/113] small update, sync helper function --- src/parser.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/parser.c b/src/parser.c index 97eef8d..75df4a9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -176,6 +176,25 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->type = a64inst_DPREGISTER; } else { instr->type = a64inst_DPIMMEDIATE; + a64inst_DPImmediateData data = instr->data.DPImmediateData; + char t = operandList[0][0]; + char *immOffset = NULL; + immOffset = strcpy(immOffset, operandList[0]); + immOffset++; + char *endptr = NULL; + int reg = strtol(immOffset, &endptr, 10); + data.dest=reg; + if (t == 'w') { + data.regType=0; + } + else { + data.regType=1; + } + // add + if (opcode[1] == 'd') { + data.DPIOpType = 0; + + } } } From f4fd71a33014d02ab4b29acd91666f00d9b19f35 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 16:46:59 +0100 Subject: [PATCH 058/113] fix syntax error with getOperandNumber --- src/parser.c | 7 ++++--- test.sh | 0 2 files changed, 4 insertions(+), 3 deletions(-) mode change 100644 => 100755 test.sh diff --git a/src/parser.c b/src/parser.c index cd7c9ed..ba29ccf 100644 --- a/src/parser.c +++ b/src/parser.c @@ -21,9 +21,10 @@ //takes inputted char array and returns the integer of the operand, skipping the first character //e.g. for a passed "R32", it skips the 'R' and returns 32 int getOperandNumber(char *operand){ - char *operandCpy = strcpy(operandCpy, operand); + char *operandCpy = NULL; + strcpy(operandCpy, operand); operandCpy++; - char **endptr; + char **endptr = NULL; int number = strtol(operandCpy, endptr, 10); return number; } @@ -74,7 +75,6 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o } char *endptr; instr->data.SingleTransferData.target = strtol(&(operandList[0][0])+1, &endptr, 10); - calcluateAddressFormat(instr, operandList, numOperands); break; case a64inst_LOADLITERAL: break; @@ -242,6 +242,7 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { break; case a64inst_SINGLETRANSFER: generateLoadStoreOperands(instr, opcode, operandList, numOperands); + calcluateAddressFormat(instr, operandList, numOperands); break; case a64inst_LOADLITERAL: generateLoadStoreOperands(instr, opcode, operandList, numOperands); diff --git a/test.sh b/test.sh old mode 100644 new mode 100755 From 9f92eb476672ffe3ab2b1edc178ef9095814a597 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 16:54:38 +0100 Subject: [PATCH 059/113] fix tokeniseOperands param order --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index ba29ccf..64c748f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -213,7 +213,7 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { strcpy(stringptr, asmLine); char *opcode = strtok(stringptr, " "); - char *operands = strtok(NULL, ""); + char *operands = strtok(stringptr, ""); if(strcmp(opcode, ".int") == 0){ //type is directive From 3290896f6e975caf1e25091ead0bac252ec5909f Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:55:30 +0100 Subject: [PATCH 060/113] add, adds, cmn --- src/parser.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/parser.c b/src/parser.c index 75df4a9..a11766e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -178,11 +178,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->type = a64inst_DPIMMEDIATE; a64inst_DPImmediateData data = instr->data.DPImmediateData; char t = operandList[0][0]; - char *immOffset = NULL; - immOffset = strcpy(immOffset, operandList[0]); - immOffset++; - char *endptr = NULL; - int reg = strtol(immOffset, &endptr, 10); + int reg = getOperandNumber(operandList[0]); data.dest=reg; if (t == 'w') { data.regType=0; @@ -190,10 +186,25 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ else { data.regType=1; } - // add + // add, adds if (opcode[1] == 'd') { data.DPIOpType = 0; - + data.processOpData.arithmData.src = getOperandNumber(operandList[1]); + data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); + if (opcode[-1] == 's') { + data.processOpData.arithmData.shiftImmediate = true; + + } + else { + data.processOpData.arithmData.shiftImmediate = false; + } + } + // cmn + else if (opcode == "cmn") { + data.DPIOpType = 0; + data.processOpData.arithmData.src = ZERO_REGISTER; + data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); + data.processOpData.arithmData.shiftImmediate = true; } } From 53f5b05210c8e8e349c6a9f56355c0772487e549 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 17:02:52 +0100 Subject: [PATCH 061/113] rewrite uses of strcpy w/ S --- src/parser.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/parser.c b/src/parser.c index 64c748f..4010764 100644 --- a/src/parser.c +++ b/src/parser.c @@ -21,9 +21,8 @@ //takes inputted char array and returns the integer of the operand, skipping the first character //e.g. for a passed "R32", it skips the 'R' and returns 32 int getOperandNumber(char *operand){ - char *operandCpy = NULL; - strcpy(operandCpy, operand); - operandCpy++; + char operandCpy[strlen(operand)]; + strcpy(operandCpy, operand+1); char **endptr = NULL; int number = strtol(operandCpy, endptr, 10); return number; @@ -97,9 +96,8 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper break; case a64inst_CONDITIONAL: { - char *condition = NULL; - condition = strcpy(condition, opcode); - condition += 2; + char condition[strlen(opcode)+1]; + strcpy(condition, opcode+2); if(strcmp(condition, "eq")==0){ instr->data.BranchData.processOpData.conditionalData.cond = EQ; } else if (strcmp(condition, "ne")==0){ @@ -223,8 +221,8 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { //type is label //add to symbol table instr->type = a64inst_LABEL; - char *opcodeCpy = NULL; - opcodeCpy = strcpy(opcodeCpy, opcode); + char opcodeCpy[strlen(opcode)+1]; + strcpy(opcodeCpy, opcode); char *labelData = strtok(opcodeCpy, ":"); instr->data.LabelData.label = labelData; } else { From f5dabe26b2d7a7449e1f95d0af1503d4ebb8fb92 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:06:20 +0100 Subject: [PATCH 062/113] DPI arithmetic --- src/parser.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index a11766e..0cdccb5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -186,6 +186,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ else { data.regType=1; } + // arithmetic // add, adds if (opcode[1] == 'd') { data.DPIOpType = 0; @@ -193,10 +194,11 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); if (opcode[-1] == 's') { data.processOpData.arithmData.shiftImmediate = true; - + data.processOp = 1; } else { data.processOpData.arithmData.shiftImmediate = false; + data.processOp = 0; } } // cmn @@ -205,7 +207,31 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ data.processOpData.arithmData.src = ZERO_REGISTER; data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); data.processOpData.arithmData.shiftImmediate = true; + data.processOp = 1; } + // sub, subs + else if (opcode[0] == 's') { + data.DPIOpType = 0; + data.processOpData.arithmData.src = getOperandNumber(operandList[1]); + data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); + if (opcode[-1] == 's') { + data.processOpData.arithmData.shiftImmediate = true; + data.processOp = 3; + } + else { + data.processOpData.arithmData.shiftImmediate = false; + data.processOp = 2; + } + } + // cmp + else if (opcode == 'cmp') { + data.DPIOpType = 0; + data.processOpData.arithmData.src = ZERO_REGISTER; + data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); + data.processOpData.arithmData.shiftImmediate = true; + data.processOp = 3; + } + // wide move } } From 31b5174b20ffc36f76495cea2f7b36bb0a36c60e Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:13:48 +0100 Subject: [PATCH 063/113] corrections to arithmetic DPI --- src/parser.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/parser.c b/src/parser.c index 0cdccb5..c5bc66f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -178,8 +178,6 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->type = a64inst_DPIMMEDIATE; a64inst_DPImmediateData data = instr->data.DPImmediateData; char t = operandList[0][0]; - int reg = getOperandNumber(operandList[0]); - data.dest=reg; if (t == 'w') { data.regType=0; } @@ -190,6 +188,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ // add, adds if (opcode[1] == 'd') { data.DPIOpType = 0; + data.dest = getOperandNumber(operandList[0]); data.processOpData.arithmData.src = getOperandNumber(operandList[1]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); if (opcode[-1] == 's') { @@ -204,7 +203,8 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ // cmn else if (opcode == "cmn") { data.DPIOpType = 0; - data.processOpData.arithmData.src = ZERO_REGISTER; + data.dest = ZERO_REGISTER; + data.processOpData.arithmData.src = getOperandNumber(operandList[0]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); data.processOpData.arithmData.shiftImmediate = true; data.processOp = 1; @@ -212,6 +212,7 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ // sub, subs else if (opcode[0] == 's') { data.DPIOpType = 0; + data.dest = getOperandNumber(operandList[0]); data.processOpData.arithmData.src = getOperandNumber(operandList[1]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); if (opcode[-1] == 's') { @@ -226,11 +227,27 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ // cmp else if (opcode == 'cmp') { data.DPIOpType = 0; - data.processOpData.arithmData.src = ZERO_REGISTER; + data.dest = ZERO_REGISTER; + data.processOpData.arithmData.src = getOperandNumber(operandList[0]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); data.processOpData.arithmData.shiftImmediate = true; data.processOp = 3; } + // neg, negs + else if (opcode[0] == 'n') { + data.DPIOpType = 0; + data.dest = getOperandNumber(operandList[1]); + data.processOpData.arithmData.src = ZERO_REGISTER; + data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); + if (opcode[-1] == 's') { + data.processOpData.arithmData.shiftImmediate = true; + data.processOp = 3; + } + else { + data.processOpData.arithmData.shiftImmediate = false; + data.processOp = 2; + } + } // wide move } From 18df8e0fe4c051b32de95f0d12260c7837aa5709 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:26:49 +0100 Subject: [PATCH 064/113] DPI wide move --- src/parser.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/parser.c b/src/parser.c index c5bc66f..2d71001 100644 --- a/src/parser.c +++ b/src/parser.c @@ -249,6 +249,28 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } } // wide move + else { + data.DPIOpType = 1; + data.dest = getOperandNumber(operandList[0]); + switch (opcode[3]) { + case 'k': + data.processOp = 3; + break; + case 'n': + data.processOp = 0; + break; + case 'z': + data.processOp = 2; + break; + default: + data.processOp = 1; + break; + } + data.processOpData.wideMovData.immediate = getOperandNumber(operandList[1]); + if (numOperands == 3){ + data.processOpData.wideMovData.shiftScalar = getOperandNumber(strtok(operandList[2], ' ')[1]); + } + } } } From 06b18706ed5a8ea68de68ce0ce87cdcf7b8bbf45 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 17:34:14 +0100 Subject: [PATCH 065/113] rewrite opcode/operand splitting logic --- src/parser.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/parser.c b/src/parser.c index 4010764..432e926 100644 --- a/src/parser.c +++ b/src/parser.c @@ -142,7 +142,6 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } else { instr->data.BranchData.BranchType = a64inst_CONDITIONAL; } - generateBranchOperands(instr, opcode, operandList); } else if(isLoad == 0 || isStore == 0){ //loading/storing instruction; classify operands char *address = operandList[1]; @@ -165,7 +164,6 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ //offset is literal, use symbol table and calculate difference } } - generateLoadStoreOperands(instr, opcode, operandList, numOperands); } else { if(classifyDPInst(operandList)){ @@ -209,9 +207,13 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { //duplicated as strtok modifies the input string char stringptr[strlen(asmLine) + 1]; strcpy(stringptr, asmLine); - - char *opcode = strtok(stringptr, " "); - char *operands = strtok(stringptr, ""); + char *token; + token = strtok(stringptr, " "); + char opcode[strlen(token)+1]; + strcpy(opcode, token); + token = strtok(NULL, ""); + char operands[strlen(token)+1]; + strcpy(operands, token); if(strcmp(opcode, ".int") == 0){ //type is directive @@ -228,7 +230,7 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { } else { //type is instruction int operandCount = 0; - char *operandList[4]; + char *operandList[5]; //generate list of operands tokeniseOperands(operands, &operandCount, operandList, &numOperands); //categorise instruction type from opcode and operands From b8f3ded0af912a46caf19a0798df61ced4e51647 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:39:37 +0100 Subject: [PATCH 066/113] DPR multiplication --- src/parser.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/parser.c b/src/parser.c index 2d71001..9f3cc21 100644 --- a/src/parser.c +++ b/src/parser.c @@ -174,6 +174,49 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } else { if(classifyDPInst(operandList)){ instr->type = a64inst_DPREGISTER; + a64inst_DPRegisterData data = instr->data.DPRegisterData; + char t = operandList[0][0]; + if (t == 'w') { + data.regType=0; + } + else { + data.regType=1; + } + // multiply + // mul, mneg, madd, msub + if (opcode[0] == 'm') { + data.DPROpType = 1; + data.dest = getOperandNumber(operandList[0]); + data.src1 = getOperandNumber(operandList[1]); + data.src2 = getOperandNumber(operandList[2]); + switch (opcode[1]) { + // madd + case 'a': + data.processOpData.multiplydata.summand = getOperandNumber(operandList[3]); + data.processOpData.multiplydata.negProd = false; + break; + // mneg + case 'n': + data.processOpData.multiplydata.summand = ZERO_REGISTER; + data.processOpData.multiplydata.negProd = true; + break; + // msub + case 's': + data.processOpData.multiplydata.summand = getOperandNumber(operandList[3]); + data.processOpData.multiplydata.negProd = true; + break; + // mul + default: + data.processOpData.multiplydata.summand = ZERO_REGISTER; + data.processOpData.multiplydata.negProd = false; + break; + } + } + // arithmlogic + else { + + } + } else { instr->type = a64inst_DPIMMEDIATE; a64inst_DPImmediateData data = instr->data.DPImmediateData; From fab4047d22907f8c63c19dcb4f447617bc5284a7 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Wed, 12 Jun 2024 17:48:23 +0100 Subject: [PATCH 067/113] Update fileio, skip new lines --- src/fileio.c | 9 ++++++++- src/parser.c | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/fileio.c b/src/fileio.c index f2f47fd..cd4fcc6 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -35,11 +35,13 @@ int countLines(char *filename) { int count = 0; char c; + char prevC = '\n'; while ((c = fgetc(file)) != EOF) { - if (c == '\n') { + if (c == '\n' && prevC != '\n') { count++; } + prevC = c; } return count; @@ -71,6 +73,11 @@ char **readAssemblyFile(char filename[], int lineCount) { exit(EXIT_FAILURE); } + if (*buffer == '\n') { + // Skip empty lines. + continue; + } + lines[currentLine] = malloc(strlen(buffer) + 1); if (lines[currentLine] == NULL) { fprintf(stderr, "Error: Could not allocate memory to store the assembly line"); diff --git a/src/parser.c b/src/parser.c index ba29ccf..39e0e3b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -213,7 +213,8 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { strcpy(stringptr, asmLine); char *opcode = strtok(stringptr, " "); - char *operands = strtok(NULL, ""); + char *operands = strtok(stringptr, ""); + if(strcmp(opcode, ".int") == 0){ //type is directive @@ -223,8 +224,8 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { //type is label //add to symbol table instr->type = a64inst_LABEL; - char *opcodeCpy = NULL; - opcodeCpy = strcpy(opcodeCpy, opcode); + char opcodeCpy[strlen(opcode)]; + strcpy(opcodeCpy, opcode); char *labelData = strtok(opcodeCpy, ":"); instr->data.LabelData.label = labelData; } else { From 51283891bdd42188bfde8ffe3b96494a630b1062 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 18:28:49 +0100 Subject: [PATCH 068/113] DPR logic --- src/parser.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 141 insertions(+), 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index 9f3cc21..27932c1 100644 --- a/src/parser.c +++ b/src/parser.c @@ -182,13 +182,14 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ else { data.regType=1; } + data.dest = getOperandNumber(operandList[0]); + data.src1 = getOperandNumber(operandList[1]); + data.src2 = getOperandNumber(operandList[2]); // multiply // mul, mneg, madd, msub if (opcode[0] == 'm') { data.DPROpType = 1; - data.dest = getOperandNumber(operandList[0]); - data.src1 = getOperandNumber(operandList[1]); - data.src2 = getOperandNumber(operandList[2]); + switch (opcode[1]) { // madd case 'a': @@ -214,9 +215,144 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } // arithmlogic else { - + data.DPROpType = 0; + data.processOpData.arithmLogicData.negShiftedSrc2 = 0; + // logical + data.processOpData.arithmLogicData.type = 0; + // three special cases + if (opcode == 'tst') { + data.dest = ZERO_REGISTER; + data.src1 = getOperandNumber(operandList[0]); + data.src2 = getOperandNumber(operandList[1]); + data.processOp = 3; + if (strlen(operandList) == 3) { + char *split[] = strtok(operandList[2], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; + } + } + } + else if (opcode == 'mvn') { + data.dest = getOperandNumber(operandList[0]); + data.src1 = ZERO_REGISTER; + data.src2 = getOperandNumber(operandList[1]); + data.processOp = 1; + data.processOpData.arithmLogicData.negShiftedSrc2 = 1; + if (strlen(operandList) == 3) { + char *split[] = strtok(operandList[2], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; + } + } + } + else if (opcode == 'mov') { + data.dest = getOperandNumber(operandList[0]); + data.src1 = ZERO_REGISTER; + data.src2 = getOperandNumber(operandList[1]); + data.processOp = 1; + } + else { + // handles shifts + if (strlen(operandList) == 4) { + char *split[] = strtok(operandList[3], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; + } + } + switch (opcode[0]) { + // and, ands + case 'a': + // ands + if (strlen(opcode) == 4) { + data.processOp = 3; + } + // and + else { + data.processOp = 0; + } + break; + // bic, bics + case 'b': + data.processOpData.arithmLogicData.negShiftedSrc2 = 1; + // bics + if (strlen(opcode) == 4) { + data.processOp = 3; + } + // bic + else { + data.processOp = 0; + } + break; + // orr, orn + case 'o': + data.processOp = 1; + // orn + if (opcode[2] == 'n') { + data.processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + // eor, eon + default: + data.processOp = 2; + // eon + if (opcode[2] == 'n') { + data.processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + } + } } - } else { instr->type = a64inst_DPIMMEDIATE; a64inst_DPImmediateData data = instr->data.DPImmediateData; From 27148301175ed20d3ab52766a2ef83169bbaffaa Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Wed, 12 Jun 2024 18:58:54 +0100 Subject: [PATCH 069/113] DPR arithmetic --- src/parser.c | 421 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 307 insertions(+), 114 deletions(-) diff --git a/src/parser.c b/src/parser.c index 27932c1..594d477 100644 --- a/src/parser.c +++ b/src/parser.c @@ -184,12 +184,11 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } data.dest = getOperandNumber(operandList[0]); data.src1 = getOperandNumber(operandList[1]); - data.src2 = getOperandNumber(operandList[2]); // multiply // mul, mneg, madd, msub if (opcode[0] == 'm') { data.DPROpType = 1; - + data.src2 = getOperandNumber(operandList[2]); switch (opcode[1]) { // madd case 'a': @@ -216,76 +215,19 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ // arithmlogic else { data.DPROpType = 0; + // overridden when neccesary data.processOpData.arithmLogicData.negShiftedSrc2 = 0; - // logical - data.processOpData.arithmLogicData.type = 0; - // three special cases - if (opcode == 'tst') { - data.dest = ZERO_REGISTER; - data.src1 = getOperandNumber(operandList[0]); - data.src2 = getOperandNumber(operandList[1]); - data.processOp = 3; - if (strlen(operandList) == 3) { - char *split[] = strtok(operandList[2], ' '); - switch (split[1][0]) { - case 'L': - // LSR - if (split[1][2] == 'R') { - data.processOpData.arithmLogicData.shiftType = 1; - } - // LSL - else { - data.processOpData.arithmLogicData.shiftType = 0; - } - break; - // ROR - case 'R': - data.processOpData.arithmLogicData.shiftType = 3; - break; - // ASR - default: - data.processOpData.arithmLogicData.shiftType = 2; - break; - } + // arithmetic + // add, adds + if (opcode[1] == 'd') { + data.processOpData.arithmLogicData.type = 1; + data.src2 = getOperandNumber(operandList[2]); + if (strlen(opcode) == 4) { + data.processOp = 1; } - } - else if (opcode == 'mvn') { - data.dest = getOperandNumber(operandList[0]); - data.src1 = ZERO_REGISTER; - data.src2 = getOperandNumber(operandList[1]); - data.processOp = 1; - data.processOpData.arithmLogicData.negShiftedSrc2 = 1; - if (strlen(operandList) == 3) { - char *split[] = strtok(operandList[2], ' '); - switch (split[1][0]) { - case 'L': - // LSR - if (split[1][2] == 'R') { - data.processOpData.arithmLogicData.shiftType = 1; - } - // LSL - else { - data.processOpData.arithmLogicData.shiftType = 0; - } - break; - // ROR - case 'R': - data.processOpData.arithmLogicData.shiftType = 3; - break; - // ASR - default: - data.processOpData.arithmLogicData.shiftType = 2; - break; - } + else { + data.processOp = 0; } - } - else if (opcode == 'mov') { - data.dest = getOperandNumber(operandList[0]); - data.src1 = ZERO_REGISTER; - data.src2 = getOperandNumber(operandList[1]); - data.processOp = 1; - } - else { // handles shifts if (strlen(operandList) == 4) { char *split[] = strtok(operandList[3], ' '); @@ -310,46 +252,278 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ break; } } - switch (opcode[0]) { - // and, ands - case 'a': - // ands - if (strlen(opcode) == 4) { - data.processOp = 3; + } + // cmn + else if (opcode == 'cmn') { + data.dest = ZERO_REGISTER; + data.src1 = getOperandNumber(operandList[0]); + data.src2 = getOperandNumber(operandList[1]); + data.processOpData.arithmLogicData.type = 1; + data.processOp = 1; + // handles shifts + if (strlen(operandList) == 3) { + char *split[] = strtok(operandList[2], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; + } + } + } + // sub, subs + else if (opcode[2] == 'b') { + data.src2 = getOperandNumber(operandList[2]); + data.processOpData.arithmLogicData.type = 1; + if (strlen(opcode) == 4) { + data.processOp = 3; + } + else { + data.processOp = 2; + } + // handles shifts + if (strlen(operandList) == 4) { + char *split[] = strtok(operandList[3], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; + } + } + } + // cmp + else if (opcode == 'cmp') { + data.dest = ZERO_REGISTER; + data.src1 = getOperandNumber(operandList[0]); + data.src2 = getOperandNumber(operandList[1]); + data.processOpData.arithmLogicData.type = 1; + data.processOp = 3; + // handles shifts + if (strlen(operandList) == 3) { + char *split[] = strtok(operandList[2], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; + } + } + } + // neg, negs + else if (opcode[0] == 'n') { + data.src1 = ZERO_REGISTER; + data.src2 = getOperandNumber(operandList[1]); + data.processOpData.arithmLogicData.type = 1; + if (strlen(opcode) == 4) { + data.processOp = 3; + } + else { + data.processOp = 2; + } + // handles shifts + if (strlen(operandList) == 3) { + char *split[] = strtok(operandList[2], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; + } + } + } + else { + // logical + data.processOpData.arithmLogicData.type = 0; + // three special cases + if (opcode == 'tst') { + data.dest = ZERO_REGISTER; + data.src1 = getOperandNumber(operandList[0]); + data.src2 = getOperandNumber(operandList[1]); + data.processOp = 3; + if (strlen(operandList) == 3) { + char *split[] = strtok(operandList[2], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; } - // and - else { - data.processOp = 0; + } + } + else if (opcode == 'mvn') { + data.dest = getOperandNumber(operandList[0]); + data.src1 = ZERO_REGISTER; + data.src2 = getOperandNumber(operandList[1]); + data.processOp = 1; + data.processOpData.arithmLogicData.negShiftedSrc2 = 1; + if (strlen(operandList) == 3) { + char *split[] = strtok(operandList[2], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; } - break; - // bic, bics - case 'b': - data.processOpData.arithmLogicData.negShiftedSrc2 = 1; - // bics - if (strlen(opcode) == 4) { - data.processOp = 3; + } + } + else if (opcode == 'mov') { + data.dest = getOperandNumber(operandList[0]); + data.src1 = ZERO_REGISTER; + data.src2 = getOperandNumber(operandList[1]); + data.processOp = 1; + } + else { + data.src2 = getOperandNumber(operandList[2]); + // handles shifts + if (strlen(operandList) == 4) { + char *split[] = strtok(operandList[3], ' '); + switch (split[1][0]) { + case 'L': + // LSR + if (split[1][2] == 'R') { + data.processOpData.arithmLogicData.shiftType = 1; + } + // LSL + else { + data.processOpData.arithmLogicData.shiftType = 0; + } + break; + // ROR + case 'R': + data.processOpData.arithmLogicData.shiftType = 3; + break; + // ASR + default: + data.processOpData.arithmLogicData.shiftType = 2; + break; } - // bic - else { - data.processOp = 0; - } - break; - // orr, orn - case 'o': - data.processOp = 1; - // orn - if (opcode[2] == 'n') { + } + switch (opcode[0]) { + // and, ands + case 'a': + // ands + if (strlen(opcode) == 4) { + data.processOp = 3; + } + // and + else { + data.processOp = 0; + } + break; + // bic, bics + case 'b': data.processOpData.arithmLogicData.negShiftedSrc2 = 1; - } - break; - // eor, eon - default: - data.processOp = 2; - // eon - if (opcode[2] == 'n') { - data.processOpData.arithmLogicData.negShiftedSrc2 = 1; - } - break; + // bics + if (strlen(opcode) == 4) { + data.processOp = 3; + } + // bic + else { + data.processOp = 0; + } + break; + // orr, orn + case 'o': + data.processOp = 1; + // orn + if (opcode[2] == 'n') { + data.processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + // eor, eon + default: + data.processOp = 2; + // eon + if (opcode[2] == 'n') { + data.processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + } } } } @@ -364,18 +538,23 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ data.regType=1; } // arithmetic + // can be overwritten later + data.processOpData.arithmData.shiftImmediate = false; // add, adds if (opcode[1] == 'd') { data.DPIOpType = 0; data.dest = getOperandNumber(operandList[0]); data.processOpData.arithmData.src = getOperandNumber(operandList[1]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); - if (opcode[-1] == 's') { - data.processOpData.arithmData.shiftImmediate = true; + if (strlen(operandList) == 4) { + if (strlen(operandList[3]) == 8) { + data.processOpData.arithmData.shiftImmediate = true; + } + } + if (strlen(opcode) == 4) { data.processOp = 1; } else { - data.processOpData.arithmData.shiftImmediate = false; data.processOp = 0; } } @@ -385,8 +564,12 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ data.dest = ZERO_REGISTER; data.processOpData.arithmData.src = getOperandNumber(operandList[0]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); - data.processOpData.arithmData.shiftImmediate = true; data.processOp = 1; + if (strlen(operandList) == 3) { + if (strlen(operandList[2]) == 8) { + data.processOpData.arithmData.shiftImmediate = true; + } + } } // sub, subs else if (opcode[0] == 's') { @@ -394,12 +577,15 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ data.dest = getOperandNumber(operandList[0]); data.processOpData.arithmData.src = getOperandNumber(operandList[1]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); + if (strlen(operandList) == 4) { + if (strlen(operandList[3]) == 8) { + data.processOpData.arithmData.shiftImmediate = true; + } + } if (opcode[-1] == 's') { - data.processOpData.arithmData.shiftImmediate = true; data.processOp = 3; } else { - data.processOpData.arithmData.shiftImmediate = false; data.processOp = 2; } } @@ -409,8 +595,12 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ data.dest = ZERO_REGISTER; data.processOpData.arithmData.src = getOperandNumber(operandList[0]); data.processOpData.arithmData.immediate = getOperandNumber(operandList[1]); - data.processOpData.arithmData.shiftImmediate = true; data.processOp = 3; + if (strlen(operandList) == 3) { + if (strlen(operandList[2]) == 8) { + data.processOpData.arithmData.shiftImmediate = true; + } + } } // neg, negs else if (opcode[0] == 'n') { @@ -418,12 +608,15 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ data.dest = getOperandNumber(operandList[1]); data.processOpData.arithmData.src = ZERO_REGISTER; data.processOpData.arithmData.immediate = getOperandNumber(operandList[2]); + if (strlen(operandList) == 3) { + if (strlen(operandList[2]) == 8) { + data.processOpData.arithmData.shiftImmediate = true; + } + } if (opcode[-1] == 's') { - data.processOpData.arithmData.shiftImmediate = true; data.processOp = 3; } else { - data.processOpData.arithmData.shiftImmediate = false; data.processOp = 2; } } From 344f455be3150374e84e28d9a2fe6dd15c833a47 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 19:25:05 +0100 Subject: [PATCH 070/113] fix loadreg struct construction --- src/parser.c | 44 ++++++++++++++++++++++++------------------- src/twopassassembly.c | 6 +++--- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/parser.c b/src/parser.c index 432e926..682373d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -28,25 +28,28 @@ int getOperandNumber(char *operand){ return number; } -int isOperandRegister(char *operand){ - return((strcmp(&(operand[0]), "x")==0) || (strcmp(&(operand[0]), "w")==0)); +int isOperandRegister(char regStartChar){ + return((regStartChar == 'x') || (regStartChar == 'w')); } //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ char *endptr; - uint8_t base = strtol(&(operandList[1][2]), &endptr, 10); + char baseRegParam[strlen(operandList[1])]; + strcpy(baseRegParam, operandList[1]); + char *startptr = &baseRegParam[1]; + int base = getOperandNumber(startptr); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; - if(strcmp(&(operandList[2][strlen(operandList[1])-1]), "!")==0){ + if(operandList[2][strlen(operandList[2])-1] == '!'){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); - } else if(strcmp(&(operandList[1][strlen(operandList[0])-1]), "]") == 0) { + } else if(operandList[1][strlen(operandList[1])-1] == ']') { //post-indexed instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); - } else if( (isOperandRegister(&(operandList[2][0])) == 1) - || (isOperandRegister(&(operandList[2][0])) == 1)){ + } else if( (isOperandRegister(operandList[1][0]) == 1) + || (isOperandRegister(operandList[2][0]) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10); @@ -65,16 +68,16 @@ void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], in void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){ switch(instr->type){ - case a64inst_SINGLETRANSFER: - if(strcmp(&(operandList[0][0]), "x")==0){ + case a64inst_SINGLETRANSFER: { + if(operandList[0][0] == 'x'){ //x-register instr->data.SingleTransferData.regType = 1; } else { instr->data.SingleTransferData.regType = 0; } - char *endptr; - instr->data.SingleTransferData.target = strtol(&(operandList[0][0])+1, &endptr, 10); + instr->data.SingleTransferData.target = getOperandNumber(operandList[0]); break; + } case a64inst_LOADLITERAL: break; default: @@ -120,9 +123,9 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper } int classifyDPInst(char *operandList[]){ - return(isOperandRegister(operandList[0]) && - isOperandRegister(operandList[1]) && - isOperandRegister(operandList[2])); + return(isOperandRegister(operandList[0][0]) && + isOperandRegister(operandList[1][0]) && + isOperandRegister(operandList[2][0])); } void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){ @@ -144,15 +147,14 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ } } else if(isLoad == 0 || isStore == 0){ //loading/storing instruction; classify operands - char *address = operandList[1]; - if( *address == '['){ + if( operandList[1][0] == '['){ //type is register instr->type = a64inst_SINGLETRANSFER; instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; if(isLoad == 0){ - instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; - } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; + } else { + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; } } else { instr->type = a64inst_LOADLITERAL; @@ -177,7 +179,8 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){ assert(str != NULL); - char operandsDupe[strlen(str)+1]; + char *operandsDupe = malloc(strlen(str)+1); + assert(operandsDupe != NULL); strcpy(operandsDupe, str); char *operand = strtok(operandsDupe, OPERAND_DELIMITER); operands[0] = operand; @@ -193,6 +196,7 @@ void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOp //takes inputted assembly line and returns a //pointer to an abstract representation of the instruction void parser_instruction(char asmLine[], a64inst_instruction *instr) { + printf("%s", asmLine); int numOperands = 0; if (instr == NULL){ exit(EXIT_FAILURE); @@ -205,6 +209,7 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { //"opcode operand1, {operand2}, ..." //duplicated as strtok modifies the input string + char stringptr[strlen(asmLine) + 1]; strcpy(stringptr, asmLine); char *token; @@ -236,6 +241,7 @@ void parser_instruction(char asmLine[], a64inst_instruction *instr) { //categorise instruction type from opcode and operands classifyOpcode(opcode, instr, operandList, operandCount); //define struct values according to operands and type + printf("got to here"); switch(instr->type){ case a64inst_BRANCH: generateBranchOperands(instr, opcode, operandList); diff --git a/src/twopassassembly.c b/src/twopassassembly.c index b4ecdec..36942b4 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -145,11 +145,11 @@ word sts(a64inst_instruction cI) { int rt = data.target; switch (data2.addressingMode) { // register offset - case 2: - offset += 2074 + 64 * data2.a64inst_addressingModeData.offsetReg; + case a64inst_REGISTER_OFFSET: + offset += 2080 + 64 * data2.a64inst_addressingModeData.offsetReg; break; // unsigned offset - case 3: + case a64inst_UNSIGNED_OFFSET: offset += data2.a64inst_addressingModeData.unsignedOffset; u = 1; break; From c52de918db2d1f0c842055b95624dae63da4fad5 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 20:22:08 +0100 Subject: [PATCH 071/113] build struct for loadstore instructions --- src/parser.c | 57 +++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/src/parser.c b/src/parser.c index 682373d..8696230 100644 --- a/src/parser.c +++ b/src/parser.c @@ -34,34 +34,32 @@ int isOperandRegister(char regStartChar){ //calculate offsets from string void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ - char *endptr; char baseRegParam[strlen(operandList[1])]; strcpy(baseRegParam, operandList[1]); char *startptr = &baseRegParam[1]; int base = getOperandNumber(startptr); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; - if(operandList[2][strlen(operandList[2])-1] == '!'){ + if(operandList[2][strlen(operandList[2])-2] == '!'){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); - } else if(operandList[1][strlen(operandList[1])-1] == ']') { + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operandList[2]); + } else if(operandList[1][strlen(operandList[1])-2] == ']') { //post-indexed + char immOffset[strlen(operandList[2])+1]; + strcpy(immOffset, operandList[2]); instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = strtol(&(operandList[2][1]), &endptr, 10); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(immOffset); } else if( (isOperandRegister(operandList[1][0]) == 1) || (isOperandRegister(operandList[2][0]) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = strtol(&(operandList[2][1]), &endptr, 10); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operandList[2]); } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(numOperands==3){ - int offset = strtol(&(operandList[2][1]), &endptr, 10); - if(instr->data.SingleTransferData.regType == 1){ - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; - } else { - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/4; - } + int offset = getOperandNumber(operandList[2]); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; + //NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER } } } @@ -79,6 +77,19 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o break; } case a64inst_LOADLITERAL: + if(operandList[0][0] == 'x') { + instr->data.SingleTransferData.regType = 1; + } else { + instr->data.SingleTransferData.regType = 0; + } + instr->data.SingleTransferData.target = getOperandNumber(operandList[0]); + if(operandList[1][0] =='#'){ + //offset is immediate + int offset = getOperandNumber(operandList[1]); + instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; + } else { + //offset is literal, use symbol table and calculate difference + } break; default: break; @@ -87,7 +98,6 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o } void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ - char *endptr; switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset @@ -95,7 +105,7 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper printf("unconditional"); break; case a64inst_REGISTER: - instr->data.BranchData.processOpData.registerData.src = strtol(operandList[0] + 1, &endptr, 10); + instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[0]); break; case a64inst_CONDITIONAL: { @@ -138,10 +148,10 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ isRegister == 0 || strncmp(opcode, "b.", 2) == 0){ instr->type = a64inst_BRANCH; - if(isUnconditional){ - instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; - } else if (isRegister){ + if(isRegister == 0){ instr->data.BranchData.BranchType = a64inst_REGISTER; + } else if (isUnconditional == 0){ + instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; } else { instr->data.BranchData.BranchType = a64inst_CONDITIONAL; } @@ -152,19 +162,12 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[ instr->type = a64inst_SINGLETRANSFER; instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; if(isLoad == 0){ - instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; - } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; + } else { + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; } } else { instr->type = a64inst_LOADLITERAL; - if(operandList[0][0] =='#'){ - //offset is immediate - int offset = getOperandNumber(operandList[0]); - instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; - } else { - //offset is literal, use symbol table and calculate difference - } } } else { @@ -190,7 +193,7 @@ void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOp operand = strtok(NULL, OPERAND_DELIMITER); operands[*(operandCount)] = operand; } - *(numOperands) = *(operandCount)+1; + *(numOperands) = *(operandCount); } //takes inputted assembly line and returns a From 6ddf18be9600484236f2e5213b31d67c4267aba6 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Wed, 12 Jun 2024 20:40:38 +0100 Subject: [PATCH 072/113] fix halt command encoding --- src/assemble.c | 2 +- src/parser.h | 2 +- src/twopassassembly.c | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/assemble.c b/src/assemble.c index ef1b06d..0a8687d 100755 --- a/src/assemble.c +++ b/src/assemble.c @@ -19,7 +19,7 @@ int main(int argc, char **argv) { // Parse the source file a64inst_instruction *instructions = parse(source, lineCount); - + // First Pass: Create the symbol table st *table = firstPass(instructions, lineCount); diff --git a/src/parser.h b/src/parser.h index 2d7f382..81885af 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,6 +1,6 @@ #include "a64instruction/a64instruction.h" #define OPERAND_DELIMITER ", " -#define HALT_ASM_CMD "and x0, x0, x0" +#define HALT_ASM_CMD "and x0, x0, x0\n" a64inst_instruction *parse(char **asmLines, int lineCount); diff --git a/src/twopassassembly.c b/src/twopassassembly.c index 36942b4..34e379d 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -4,6 +4,8 @@ #include #include +#define HALT_BINARY 2315255808 + // Generates assembled code based on the two-pass assembly method word assembleBranch(a64inst_instruction *instr) { @@ -209,7 +211,7 @@ word *secondPass(a64inst_instruction instrs[], int numInstrs, st* table) { index++; break; case a64inst_HALT: - arr[index] = 69U * (1 << 25); + arr[index] = HALT_BINARY; index++; break; case a64inst_LABEL: From 38e5cd06faed911efb6bc354fa46ff70b198a5f3 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 17:23:30 +0100 Subject: [PATCH 073/113] Add tokeniser.c to make parsing easier --- src/add_imm_sh.s | 3 ++ src/tokeniser.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 src/add_imm_sh.s create mode 100644 src/tokeniser.c diff --git a/src/add_imm_sh.s b/src/add_imm_sh.s new file mode 100644 index 0000000..8271daa --- /dev/null +++ b/src/add_imm_sh.s @@ -0,0 +1,3 @@ +add x0, x0, #1, lsl #12 + +and x0, x0, x0 diff --git a/src/tokeniser.c b/src/tokeniser.c new file mode 100644 index 0000000..1bb2fe5 --- /dev/null +++ b/src/tokeniser.c @@ -0,0 +1,103 @@ +// Tokeniser.c +#include +#include +#include +#include +#include + +#define MAX_TOKEN_COUNT 5 +#define MAX_OPERAND_COUNT 4 +#define OPERAND_DELIMITER ", " + +char **tokenise(char *line, int *numTokens) { + char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\ + if (!tokens) { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + + *numTokens = 0; + char *token = strtok(line, " "); + assert(token != NULL); + + tokens[(*numTokens)++] = token; + + char *operandStart = strtok(NULL, ""); + assert(operandStart != NULL); + + bool inBracket = false; + char *currentToken = operandStart; + + for (char *c = operandStart; *c != '\0'; ++c) { + if (*c == '[' || *c == '{') { + inBracket = true; + } else if (*c == ']' || *c == '}') { + inBracket = false; + } + + + if (*c == ',' && !inBracket) { + *c = '\0'; + tokens[(*numTokens)++] = currentToken; + currentToken = c + 2; // Skip ", " + } + } + + if (*currentToken != '\0') { + tokens[*numTokens] = currentToken; + + if (tokens[*numTokens][strlen(tokens[*numTokens]) - 1] == '\n') { + tokens[*numTokens][strlen(tokens[*numTokens]) - 1] = '\0'; + } + + (*numTokens)++; + } + + return tokens; +} + +char **tokeniseOperands(char *line, int *numTokens) { + char **tokens = malloc(MAX_OPERAND_COUNT * sizeof(char *)); + if (!tokens) { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + + if (*line == '[') { + line++; // skip '[' + line[strlen(line) - 1] = '\0'; // remove ']' + } else if (*line == '{') { + line++; // skip '{' + line[strlen(line) - 1] = '\0'; // remove '}' + } + + *numTokens = 0; + bool inBracket = false; + char *currentToken = line; + + for (char *c = line; *c != '\0'; ++c) { + if (*c == '[' || *c == '{') { + inBracket = true; + } else if (*c == ']' || *c == '}') { + inBracket = false; + } + + if (*c == ',' && !inBracket) { + *c = '\0'; + tokens[(*numTokens)++] = currentToken; + currentToken = c + 2; // Skip ", " + } + } + + if (*currentToken != '\0') { + tokens[*numTokens] = currentToken; + + if (tokens[*numTokens][strlen(tokens[*numTokens]) - 1] == '\n') { + tokens[*numTokens][strlen(tokens[*numTokens]) - 1] = '\0'; + } + + (*numTokens)++; + } + + return tokens; +} From 995c6d02fa8e2023623bf4dd50debcd036a7775c Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 17:24:09 +0100 Subject: [PATCH 074/113] Rewrite the parser for better structure, Add DPI parsing --- src/parser.c | 421 ++++++++++++++++++++++++++++----------------------- 1 file changed, 233 insertions(+), 188 deletions(-) diff --git a/src/parser.c b/src/parser.c index 8696230..793bb68 100644 --- a/src/parser.c +++ b/src/parser.c @@ -5,18 +5,159 @@ #include #include "parser.h" #include "a64instruction/a64instruction.h" +#include "tokeniser.c" -//takes input string, read from asm file and returns -//input as an a64 instruction +/** Prototypes */ +void parse_instruction(char asmLine[], a64inst_instruction *instr); +static char *duplicateString(char *str); +void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); +void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); +void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); +void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); + +/** Constants */ +static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; +static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"}; +static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; +static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; +static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; + +a64inst_instruction *parse(char **asmLines, int lineCount) { + a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); + + int i = 0; + while (asmLines[i] != NULL) { + parse_instruction(asmLines[i], &instructions[i]); + i++; + } + + return instructions; +} + +static char *duplicateString(char *str) { + char *newStr = malloc(strlen(str) + 1); + strcpy(newStr, str); + return newStr; +} + +static bool isStringIn(char *str, const char *arr[], int arrSize) { + for (int i = 0; i < arrSize; i++) { + if (strcmp(str, arr[i]) == 0) { + return true; + } + } + return false; +} + +// If more than one occurance, return the last index +static int indexStringIn(char *str, const char *arr[], int arrSize) { + for (int i = arrSize - 1; i >= 0; i--) { + if (strcmp(str, arr[i]) == 0) { + return i; + } + } + return -1; +} + +int isOperandRegister(char regStartChar) { + return((regStartChar == 'x') || (regStartChar == 'w')); +} + +int classifyDPInst(char *operandList[]){ + return(isOperandRegister(operandList[1][0]) && + isOperandRegister(operandList[2][0]) && + isOperandRegister(operandList[3][0])); +} + +void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int tokensCount){ + + if (isStringIn(opcode, BRANCH_OPCODES, 9)) { + instr->type = a64inst_BRANCH; + + if (strcmp(opcode, "br") == 0) { + instr->data.BranchData.BranchType = a64inst_REGISTER; + } else if (strcmp(opcode, "b") == 0) { + instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; + } else { + instr->data.BranchData.BranchType = a64inst_CONDITIONAL; + } + + } else if (isStringIn(opcode, SINGLE_TRANSFER_OPCODES, 2)) { + instr->type = a64inst_SINGLETRANSFER; + if (*tokens[2] == '[') { + instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0; + + } else { + instr->type = a64inst_LOADLITERAL; + } + } else if (classifyDPInst(tokens)) { + instr->type = a64inst_DPREGISTER; + } else { + instr->type = a64inst_DPIMMEDIATE; + } + +} + + +void parse_instruction(char asmLine[], a64inst_instruction *instr) { + if (instr == NULL){ + exit(EXIT_FAILURE); + } + + if(strcmp(asmLine, HALT_ASM_CMD) == 0){ + instr->type = a64inst_HALT; + return; + } + + char *asmLineCopy = duplicateString(asmLine); + int tokensCount = 0; + char **tokens = tokenise(asmLineCopy, &tokensCount); + char *opcode = tokens[0]; + + if(strcmp(opcode, ".int") == 0){ + // Directive + instr->type = a64inst_DIRECTIVE; + + } else if(opcode[strlen(opcode)-1]== ':') { + // Label + instr->type = a64inst_LABEL; + opcode[strlen(opcode) - 1] = '\0'; // Remove the colon + instr->data.LabelData.label = opcode; + + } else { + // Instruction + classifyOpcode(opcode, instr, tokens, tokensCount); + + switch(instr->type){ + case a64inst_BRANCH: + parseBranch(instr, opcode, tokens); + break; + + case a64inst_SINGLETRANSFER: + parseSingleTransfer(instr, opcode, tokens, tokensCount); + calcluateAddressFormat(instr, tokens, tokensCount); + break; + case a64inst_LOADLITERAL: + parseSingleTransfer(instr, opcode, tokens, tokensCount); + break; + case a64inst_DPREGISTER: + //generate DP operands; + break; + case a64inst_DPIMMEDIATE: + parseDPImmediate(instr, tokens, tokensCount); + break; + default: + printf("Error: Invalid Instruction\n"); + break; + } + + } + + /* TODO: FREE MEMORY! */ + +} -//TODO: -// - use string matching to get opcode, and operands (DONE) -// - check operand count (DONE) -// - match opcode to a64 struct types (DONE) -// - count operands and match type/values (DONE) -// - generate final a64inst and return (TODO: DP instrs) -// - ASK ABOUT OFFSET CALCULATION -// - CREATE FUNC TO TIDY UP OPERANDS IN DP //takes inputted char array and returns the integer of the operand, skipping the first character //e.g. for a passed "R32", it skips the 'R' and returns 32 @@ -28,64 +169,61 @@ int getOperandNumber(char *operand){ return number; } -int isOperandRegister(char regStartChar){ - return((regStartChar == 'x') || (regStartChar == 'w')); -} -//calculate offsets from string -void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands){ - char baseRegParam[strlen(operandList[1])]; - strcpy(baseRegParam, operandList[1]); - char *startptr = &baseRegParam[1]; - int base = getOperandNumber(startptr); - instr->data.SingleTransferData.processOpData.singleDataTransferData.base = base; +void calcluateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { + assert(*tokens[2] == '['); - if(operandList[2][strlen(operandList[2])-2] == '!'){ + int operandCount = 0; + char **operands = tokeniseOperands(tokens[2], &operandCount); + + int baseRegister = getOperandNumber(operands[0]); + + instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; + + if(operands[1][strlen(operands[1])-1] == '!') { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operandList[2]); - } else if(operandList[1][strlen(operandList[1])-2] == ']') { - //post-indexed - char immOffset[strlen(operandList[2])+1]; - strcpy(immOffset, operandList[2]); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); + + } else if(operands[1][strlen(operands[1])-1] == ']') { + // POST_INDEXED instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(immOffset); - } else if( (isOperandRegister(operandList[1][0]) == 1) - || (isOperandRegister(operandList[2][0]) == 1)){ + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); + + } else if( (isOperandRegister(*operands[0]) == 1) + && (isOperandRegister(*operands[1]) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operandList[2]); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); + } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; - if(numOperands==3){ - int offset = getOperandNumber(operandList[2]); + if(operandCount > 1){ + int offset = getOperandNumber(operands[1]); instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; //NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER } } } -void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands){ +static int parseRegisterType(char *operand) { + return operand[0] == 'x'; +} + +void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { + switch(instr->type){ - case a64inst_SINGLETRANSFER: { - if(operandList[0][0] == 'x'){ - //x-register - instr->data.SingleTransferData.regType = 1; - } else { - instr->data.SingleTransferData.regType = 0; - } - instr->data.SingleTransferData.target = getOperandNumber(operandList[0]); + case a64inst_SINGLETRANSFER: + instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); break; - } + case a64inst_LOADLITERAL: - if(operandList[0][0] == 'x') { - instr->data.SingleTransferData.regType = 1; - } else { - instr->data.SingleTransferData.regType = 0; - } - instr->data.SingleTransferData.target = getOperandNumber(operandList[0]); - if(operandList[1][0] =='#'){ + instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); + + if(*tokens[2] =='#'){ //offset is immediate - int offset = getOperandNumber(operandList[1]); + int offset = getOperandNumber(tokens[1]); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { //offset is literal, use symbol table and calculate difference @@ -97,7 +235,7 @@ void generateLoadStoreOperands(a64inst_instruction *instr, char *opcode, char *o } } -void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *operandList[]){ +void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) { switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset @@ -132,155 +270,62 @@ void generateBranchOperands(a64inst_instruction *instr, char* opcode, char *oper } } -int classifyDPInst(char *operandList[]){ - return(isOperandRegister(operandList[0][0]) && - isOperandRegister(operandList[1][0]) && - isOperandRegister(operandList[2][0])); -} +void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) { + a64inst_DPImmediateData *data = &inst->data.DPImmediateData; + data->dest = getOperandNumber(tokens[1]); + data->regType = parseRegisterType(tokens[1]); -void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[], int numOperands){ - int isUnconditional = strcmp(opcode, "b"); - int isRegister = strcmp(opcode, "br"); - int isLoad = strcmp(opcode, "ldr"); - int isStore = strcmp(opcode, "str"); - - if(isUnconditional == 0 || - isRegister == 0 || - strncmp(opcode, "b.", 2) == 0){ - instr->type = a64inst_BRANCH; - if(isRegister == 0){ - instr->data.BranchData.BranchType = a64inst_REGISTER; - } else if (isUnconditional == 0){ - instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; - } else { - instr->data.BranchData.BranchType = a64inst_CONDITIONAL; - } - } else if(isLoad == 0 || isStore == 0){ - //loading/storing instruction; classify operands - if( operandList[1][0] == '['){ - //type is register - instr->type = a64inst_SINGLETRANSFER; - instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; - if(isLoad == 0){ - instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_LOAD; - } else { - instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = a64inst_STORE; - } - } else { - instr->type = a64inst_LOADLITERAL; + if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 3)) { + data->DPIOpType = a64inst_DPI_WIDEMOV; + data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 3); + data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); + if (tokensCount >= 3) { + data->processOpData.wideMovData.shiftScalar = getOperandNumber(tokens[3]); } } else { - if(classifyDPInst(operandList)){ - instr->type = a64inst_DPREGISTER; - } else { - instr->type = a64inst_DPIMMEDIATE; + data->DPIOpType = a64inst_DPI_ARITHM; + data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOpData.arithmData.src = getOperandNumber(tokens[2]); + data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]); + + if (tokensCount >= 5) { + int shiftAmount = getOperandNumber(tokens[4]); + if (shiftAmount > 0) { + data->processOpData.arithmData.shiftImmediate = true; + } } } } -void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){ - assert(str != NULL); - char *operandsDupe = malloc(strlen(str)+1); - assert(operandsDupe != NULL); - strcpy(operandsDupe, str); - char *operand = strtok(operandsDupe, OPERAND_DELIMITER); - operands[0] = operand; +void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) { + a64inst_DPRegisterData *data = &inst->data.DPRegisterData; + data->dest = getOperandNumber(tokens[1]); + data->regType = parseRegisterType(tokens[1]); + data->src1 = getOperandNumber(tokens[2]); + data->src2 = getOperandNumber(tokens[3]); - while (operand != NULL){ - *operandCount = *(operandCount)+1; - operand = strtok(NULL, OPERAND_DELIMITER); - operands[*(operandCount)] = operand; - } - *(numOperands) = *(operandCount); -} + if (isStringIn(tokens[0], MULTIPLY_OPCODES, 4)) { + // Multiply + data->DPROpType = a64inst_DPR_MULTIPLY; + if (tokensCount >= 5) { + data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]); + data->processOpData.multiplydata.negProd = strcmp(tokens[4], "mneg") == 0; + } -//takes inputted assembly line and returns a -//pointer to an abstract representation of the instruction -void parser_instruction(char asmLine[], a64inst_instruction *instr) { - printf("%s", asmLine); - int numOperands = 0; - if (instr == NULL){ - exit(EXIT_FAILURE); - } - - if(strcmp(asmLine, HALT_ASM_CMD) == 0){ - instr->type = a64inst_HALT; - return; - } - - //"opcode operand1, {operand2}, ..." - //duplicated as strtok modifies the input string - - char stringptr[strlen(asmLine) + 1]; - strcpy(stringptr, asmLine); - char *token; - token = strtok(stringptr, " "); - char opcode[strlen(token)+1]; - strcpy(opcode, token); - token = strtok(NULL, ""); - char operands[strlen(token)+1]; - strcpy(operands, token); - - if(strcmp(opcode, ".int") == 0){ - //type is directive - instr->type = a64inst_DIRECTIVE; - - } else if(opcode[strlen(opcode)-1]== ':') { - //type is label - //add to symbol table - instr->type = a64inst_LABEL; - char opcodeCpy[strlen(opcode)+1]; - strcpy(opcodeCpy, opcode); - char *labelData = strtok(opcodeCpy, ":"); - instr->data.LabelData.label = labelData; } else { - //type is instruction - int operandCount = 0; - char *operandList[5]; - //generate list of operands - tokeniseOperands(operands, &operandCount, operandList, &numOperands); - //categorise instruction type from opcode and operands - classifyOpcode(opcode, instr, operandList, operandCount); - //define struct values according to operands and type - printf("got to here"); - switch(instr->type){ - case a64inst_BRANCH: - generateBranchOperands(instr, opcode, operandList); - break; - case a64inst_SINGLETRANSFER: - generateLoadStoreOperands(instr, opcode, operandList, numOperands); - calcluateAddressFormat(instr, operandList, numOperands); - break; - case a64inst_LOADLITERAL: - generateLoadStoreOperands(instr, opcode, operandList, numOperands); - break; - case a64inst_DPREGISTER: - //generate DP operands; - break; - case a64inst_DPIMMEDIATE: - //generate DP operands; - break; - default: - printf("INVALID INSTRUCTION"); - break; - } + // Arithmetic/Logic + data->DPROpType = a64inst_DPR_ARITHMLOGIC; + if (isStringIn(tokens[0], ARITHMETIC_OPCODES, 4)) { + // Arithmetic + data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOpData.arithmLogicData.type = 1; + + } else { + // Logic + + } } - -} - -// Takes an array of strings, each string representing an assembly instruction. -// Returns an array of a64inst_instruction pointers, each representing an instruction. -a64inst_instruction *parse(char **asmLines, int lineCount) { - a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); - - int i = 0; - while (asmLines[i] != NULL) { - parser_instruction(asmLines[i], &instructions[i]); - i++; - } - - return instructions; } From 31fa1392e1bfdae281844f20632452a9b1a47799 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 17:25:17 +0100 Subject: [PATCH 075/113] Restructring the assembling into binary, add helper funcs --- src/twopassassembly.c | 154 ++++++++++++++++++++++++------------------ 1 file changed, 88 insertions(+), 66 deletions(-) diff --git a/src/twopassassembly.c b/src/twopassassembly.c index 34e379d..e9fcf2e 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -1,3 +1,4 @@ +#include #include "global.h" #include "a64instruction/a64instruction.h" #include "symboltable.c" @@ -6,36 +7,57 @@ #define HALT_BINARY 2315255808 -// Generates assembled code based on the two-pass assembly method +// Temp helper function to print binary representation of a word +// static void printBinary(word number) { +// for (int i = 31; i >= 0; i--) { +// putchar((number & (1 << i)) ? '1' : '0'); +// } +// putchar('\n'); +// } +// write the provided value to the bits in the range [lsb, msb) {inclusive, exclusive} to the word. +// Does not modify any other bits in the word. +void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { + // Ensure LSB and MSB are within range of word size, and in the correct order + assert(lsb < msb && msb <= 32); + + // Create a mask with 1s in the range [lsb, msb) and 0s elsewhere + word mask = 0; + for (uint8_t i = lsb; i < msb; i++) { + mask |= 1 << i; + } + + // Clear the bits in the range [lsb, msb) in the word + *wrd &= ~mask; + + // Set the bits in the range [lsb, msb) to the value + *wrd |= (value << lsb) & mask; +} + + +// Generates assembled code based on the two-pass assembly method word assembleBranch(a64inst_instruction *instr) { - word binInstr = 0; - binInstr += (5 << 28); // 101 start of branch instr + word wrd = 0; + switch (instr->data.BranchData.BranchType) { case a64inst_UNCONDITIONAL: - // 000101 - // 25-0: sign extended simm26 - binInstr += instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset; + setBits(&wrd, 26, 30, 0x5); + setBits(&wrd, 25, 0, instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset); break; + case a64inst_REGISTER: - // 10000 - // 11111 - // 000000 - // 9-5: address from register - // 0000 - binInstr += ((instr->data.BranchData.processOpData.registerData.src) << 5); + setBits(&wrd, 16, 32, 0xD61F); + setBits(&wrd, 5, 10, instr->data.BranchData.processOpData.registerData.src); break; + case a64inst_CONDITIONAL: - // 01010100 - // 25-5: sign extended offset - // 4-0: 0{condition} - binInstr += ((instr->data.BranchData.processOpData.conditionalData.offset) << 5); - binInstr += instr->data.BranchData.processOpData.conditionalData.cond; - break; - default: + setBits(&wrd, 26, 32, 0x15); + setBits(&wrd, 5, 24, instr->data.BranchData.processOpData.conditionalData.offset); + setBits(&wrd, 0, 4, instr->data.BranchData.processOpData.conditionalData.cond); break; } - return binInstr; + + return wrd; } st* firstPass(a64inst_instruction instrs[], int numInstrs) { @@ -53,32 +75,30 @@ st* firstPass(a64inst_instruction instrs[], int numInstrs) { } word dpi(a64inst_instruction cI) { - word out = 0; + word wrd = 0; + a64inst_DPImmediateData data = cI.data.DPImmediateData; - // sf - out += data.regType * (1 << 31); - out += data.processOp * (1 << 29); - out += 1 << 28; - // if arithmetic + + setBits(&wrd, 31, 32, data.regType); // sf + setBits(&wrd, 29, 31, data.processOp); // opc + setBits(&wrd, 28, 29, 0x1); // constant value + setBits(&wrd, 0, 5, data.dest); // rd + if (data.DPIOpType == a64inst_DPI_ARITHM) { - out += 1 << 24; - // shift - if (data.processOpData.arithmData.shiftImmediate) { - out += 1 << 22; - } - out += data.processOpData.arithmData.immediate * (1 << 10); - out += data.processOpData.arithmData.src * (1 << 5); + setBits(&wrd, 23, 26, 0x2); //opi + setBits(&wrd, 5, 10, data.processOpData.arithmData.src); // rn + setBits(&wrd, 22, 23, data.processOpData.arithmData.shiftImmediate); // sh + setBits(&wrd, 10, 22, data.processOpData.arithmData.immediate); // imm12 } // if wide move else { - out += 5 * (1 << 23); - // hw - out += data.processOpData.wideMovData.shiftScalar * (1 << 21); - out += data.processOpData.wideMovData.immediate * (1 << 5); + setBits(&wrd, 23, 26, 0x5); //opi + // TODO: Check the following line, is it shiftScalar?: + setBits(&wrd, 21, 23, data.processOpData.wideMovData.shiftScalar); // hw + setBits(&wrd, 5, 21, data.processOpData.wideMovData.immediate); // imm16 } - // destination register - out += data.dest; - return out; + + return wrd; } word dpr(a64inst_instruction cI) { @@ -135,49 +155,51 @@ word dpr(a64inst_instruction cI) { } word sts(a64inst_instruction cI) { + word wrd = 0; + a64inst_SingleTransferData data = cI.data.SingleTransferData; - word out = 0; a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData; - // this deals with every bit in the 31-23 range apart from sf and U - out += (512 + 128 + 64 + 32U) * (1 << 23); - int sf = data.regType; - int u = 0; - int offset = 0; - int xn = data2.base; - int rt = data.target; + + setBits(&wrd, 22, 32, 0x2E0); + setBits(&wrd, 30, 31, data.regType); + setBits(&wrd, 24, 25, data2.addressingMode == a64inst_UNSIGNED_OFFSET); + setBits(&wrd, 22, 23, data2.transferType); + setBits(&wrd, 5, 10, data2.base); + setBits(&wrd, 0, 5, data.target); + switch (data2.addressingMode) { // register offset case a64inst_REGISTER_OFFSET: - offset += 2080 + 64 * data2.a64inst_addressingModeData.offsetReg; + setBits(&wrd, 21, 22, 1); + setBits(&wrd, 10, 16, 0x1A); + setBits(&wrd, 16, 21, data2.a64inst_addressingModeData.offsetReg); break; // unsigned offset case a64inst_UNSIGNED_OFFSET: - offset += data2.a64inst_addressingModeData.unsignedOffset; - u = 1; + setBits(&wrd, 10, 22, data2.a64inst_addressingModeData.unsignedOffset); break; // pre/post indexed default: - offset = 1 + data2.addressingMode * 2 + data2.a64inst_addressingModeData.indexedOffset * 4; + setBits(&wrd, 21, 22, 0); + setBits(&wrd, 11, 12, data2.addressingMode == a64inst_PRE_INDEXED); + setBits(&wrd, 10, 11, 1); + setBits(&wrd, 12, 21, data2.a64inst_addressingModeData.indexedOffset); break; } - out += sf * (1 << 30); - out += u * (1 << 22); - out += offset * 1024; - out += xn * 32; - out += rt; - return out; + + return wrd; } word ldl(a64inst_instruction cI) { - word out = 3 * (1 << 27); + word wrd = 0; + a64inst_SingleTransferData data = cI.data.SingleTransferData; - int sf = data.regType; - int simm19 = data.processOpData.loadLiteralData.offset; - int rt = data.target; - out += sf * (1 << 30); - out += simm19 * 32; - out += rt; - return out; + setBits(&wrd, 24, 32, 0x18); + setBits(&wrd, 30, 31, data.regType); + setBits(&wrd, 5, 24, data.processOpData.loadLiteralData.offset); + setBits(&wrd, 0, 5, data.target); + + return wrd; } word *secondPass(a64inst_instruction instrs[], int numInstrs, st* table) { From 873c0b60cb3039c6964bb64f848d83411e3ccd2a Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 13 Jun 2024 18:37:36 +0100 Subject: [PATCH 076/113] add hex number handling to getoperandnumebrs --- src/parser.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 793bb68..74ac067 100644 --- a/src/parser.c +++ b/src/parser.c @@ -21,6 +21,7 @@ static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"}; static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; +static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); @@ -165,7 +166,14 @@ int getOperandNumber(char *operand){ char operandCpy[strlen(operand)]; strcpy(operandCpy, operand+1); char **endptr = NULL; - int number = strtol(operandCpy, endptr, 10); + int number; + if(strncmp(operandCpy, "0x", 2)==0){ + //hex value + strcpy(operandCpy, operand+3); + number = strtol(operandCpy, endptr, 16); + } else { + number = strtol(operandCpy, endptr, 10); + } return number; } From cea959062190be3e6eb6697b19db6ab548a3ae9a Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 18:40:17 +0100 Subject: [PATCH 077/113] Update Encoding DP Register for readability --- src/twopassassembly.c | 78 +++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 48 deletions(-) diff --git a/src/twopassassembly.c b/src/twopassassembly.c index e9fcf2e..0a03c7e 100644 --- a/src/twopassassembly.c +++ b/src/twopassassembly.c @@ -102,56 +102,38 @@ word dpi(a64inst_instruction cI) { } word dpr(a64inst_instruction cI) { - word out = 0; + word wrd = 0; + a64inst_DPRegisterData data = cI.data.DPRegisterData; - // sf - int sf = data.regType; - // bits 27-25 - out += 5 * (1 << 25); - int m = data.DPROpType; - int opc = 0; - int opr = 0; - int rm = 0; - int operand = 0; - int rn = 0; - int rd = 0; - // multiply - if (m == 1) { - // opc = 0; - opr = 8; - if (data.processOpData.multiplydata.negProd) { - operand += 32; - } - operand += data.processOpData.multiplydata.summand; + setBits(&wrd, 31, 32, data.regType); // sf + setBits(&wrd, 29, 31, data.processOp); // opc + setBits(&wrd, 28, 28, data.DPROpType); // M + setBits(&wrd, 25 ,28, 0x5); + setBits(&wrd, 16, 21, data.src2); // src2 + setBits(&wrd, 5, 10, data.src1); // src1 + setBits(&wrd, 0, 5, data.dest); // src2 + + if (data.DPROpType == a64inst_DPR_MULTIPLY) { + setBits(&wrd, 21, 31, 0xD8); + setBits(&wrd, 15, 16, data.processOpData.multiplydata.negProd); + setBits(&wrd, 10, 15, data.processOpData.multiplydata.summand); + + } else { + // Arithmetic Logic Instruction + setBits(&wrd, 22, 24, data.processOpData.arithmLogicData.shiftType); + setBits(&wrd, 10, 16, data.processOpData.arithmLogicData.shiftAmount); + + if (data.processOpData.arithmLogicData.type == a64inst_DPR_ARITHM) { + // Arithmetic + setBits(&wrd, 24, 25, 0x1); // bit 24 + } else { + setBits(&wrd, 21, 22, data.processOpData.arithmLogicData.negShiftedSrc2); + } + } - // arithmetic and logical - else { - // shift - opr += 2 * data.processOpData.arithmLogicData.shiftType; - // arithmetic - if (data.processOpData.arithmLogicData.type == 1) { - opr += 8; - } - // logical - else { - if (data.processOpData.arithmLogicData.negShiftedSrc2) { - opr += 1; - } - } - operand += data.processOpData.arithmLogicData.shiftAmount; - } - rm += data.src1; - rn += data.src2; - rd += data.dest; - out += sf * (1 << 31); - out += opc * (1 << 29); - out += m * (1 << 28); - out += opr * (1 << 21); - out += rm * (1 << 16); - out += operand * 1024; - out += rn * 32; - out += rd; - return out; + + return wrd; + } word sts(a64inst_instruction cI) { From 664f8e64786998c0dbc60c764b893083043873af Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 13 Jun 2024 19:01:39 +0100 Subject: [PATCH 078/113] build struct for DPRegister arithmetic --- src/parser.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/parser.c b/src/parser.c index 74ac067..c332bd6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,6 +14,7 @@ void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operand void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); +void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); /** Constants */ static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; @@ -144,6 +145,7 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { break; case a64inst_DPREGISTER: //generate DP operands; + parseDPRegister(instr, tokens, tokensCount); break; case a64inst_DPIMMEDIATE: parseDPImmediate(instr, tokens, tokensCount); @@ -330,6 +332,13 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) // Arithmetic data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); data->processOpData.arithmLogicData.type = 1; + if(tokensCount == 5) { + //has a shift + int numTokens = 0; + char **shiftOperands = tokenise(tokens[4], &numTokens); + data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); + data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]); + } } else { // Logic From 38951db9c83eef0765c0691eaf880eb5189fd371 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 19:04:49 +0100 Subject: [PATCH 079/113] Restructure encode into a separate module --- src/assemble.c | 6 +- src/encode.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+), 3 deletions(-) mode change 100755 => 100644 src/assemble.c create mode 100644 src/encode.c diff --git a/src/assemble.c b/src/assemble.c old mode 100755 new mode 100644 index 0a8687d..d0321a4 --- a/src/assemble.c +++ b/src/assemble.c @@ -23,11 +23,11 @@ int main(int argc, char **argv) { // First Pass: Create the symbol table st *table = firstPass(instructions, lineCount); - // Second Pass: Assemble the instructions - word *binary = secondPass(instructions, lineCount, table); // 1000 is just a temp fix. + // Second Pass: Encode the instructions into binary + word *binary = encode(instructions, lineCount, table); // Write the binary to the output file - writeBinaryFile(binary, argv[2], lineCount); // 1000 is just a temp fix. + writeBinaryFile(binary, argv[2], lineCount); /* TODO: FREE MEMORY!! */ diff --git a/src/encode.c b/src/encode.c new file mode 100644 index 0000000..8fc716d --- /dev/null +++ b/src/encode.c @@ -0,0 +1,224 @@ +#include +#include "global.h" +#include "a64instruction/a64instruction.h" +#include "symboltable.c" +#include +#include + +#define HALT_BINARY 2315255808 + +// write the provided value to the bits in the range [lsb, msb) {inclusive, exclusive} to the word. +// Does not modify any other bits in the word. +void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { + // Ensure LSB and MSB are within range of word size, and in the correct order + assert(lsb < msb && msb <= 32); + + // Create a mask with 1s in the range [lsb, msb) and 0s elsewhere + word mask = 0; + for (uint8_t i = lsb; i < msb; i++) { + mask |= 1 << i; + } + + // Clear the bits in the range [lsb, msb) in the word + *wrd &= ~mask; + + // Set the bits in the range [lsb, msb) to the value + *wrd |= (value << lsb) & mask; +} + + +// Generates assembled code based on the two-pass assembly method +word encodeBranch(a64inst_instruction *instr) { + word wrd = 0; + + switch (instr->data.BranchData.BranchType) { + case a64inst_UNCONDITIONAL: + setBits(&wrd, 26, 30, 0x5); + setBits(&wrd, 25, 0, instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset); + break; + + case a64inst_REGISTER: + setBits(&wrd, 16, 32, 0xD61F); + setBits(&wrd, 5, 10, instr->data.BranchData.processOpData.registerData.src); + break; + + case a64inst_CONDITIONAL: + setBits(&wrd, 26, 32, 0x15); + setBits(&wrd, 5, 24, instr->data.BranchData.processOpData.conditionalData.offset); + setBits(&wrd, 0, 4, instr->data.BranchData.processOpData.conditionalData.cond); + break; + } + + return wrd; +} + +st* firstPass(a64inst_instruction instrs[], int numInstrs) { + // TODO: + // -iterate over instructions, adding to symbol table + // create symbol table and map labels to addresses/lines + st *table = (st*)malloc(sizeof(st)); + for (int i = 0; i < numInstrs; i++) { + // discuss defining a LABEL type + if (instrs[i].type == a64inst_LABEL) { + st_add(*table, &(instrs[i].data.LabelData.label), &i); + } + } + return table; +} + +word encodeDPImmediate(a64inst_instruction inst) { + word wrd = 0; + + a64inst_DPImmediateData data = inst.data.DPImmediateData; + + setBits(&wrd, 31, 32, data.regType); // sf + setBits(&wrd, 29, 31, data.processOp); // opc + setBits(&wrd, 28, 29, 0x1); // constant value + setBits(&wrd, 0, 5, data.dest); // rd + + if (data.DPIOpType == a64inst_DPI_ARITHM) { + setBits(&wrd, 23, 26, 0x2); //opi + setBits(&wrd, 5, 10, data.processOpData.arithmData.src); // rn + setBits(&wrd, 22, 23, data.processOpData.arithmData.shiftImmediate); // sh + setBits(&wrd, 10, 22, data.processOpData.arithmData.immediate); // imm12 + } + // if wide move + else { + setBits(&wrd, 23, 26, 0x5); //opi + // TODO: Check the following line, is it shiftScalar?: + setBits(&wrd, 21, 23, data.processOpData.wideMovData.shiftScalar); // hw + setBits(&wrd, 5, 21, data.processOpData.wideMovData.immediate); // imm16 + } + + return wrd; +} + +word encodeDPRegister(a64inst_instruction inst) { + word wrd = 0; + + a64inst_DPRegisterData data = inst.data.DPRegisterData; + setBits(&wrd, 31, 32, data.regType); // sf + setBits(&wrd, 29, 31, data.processOp); // opc + setBits(&wrd, 28, 28, data.DPROpType); // M + setBits(&wrd, 25 ,28, 0x5); + setBits(&wrd, 16, 21, data.src2); // src2 + setBits(&wrd, 5, 10, data.src1); // src1 + setBits(&wrd, 0, 5, data.dest); // src2 + + if (data.DPROpType == a64inst_DPR_MULTIPLY) { + setBits(&wrd, 21, 31, 0xD8); + setBits(&wrd, 15, 16, data.processOpData.multiplydata.negProd); + setBits(&wrd, 10, 15, data.processOpData.multiplydata.summand); + + } else { + // Arithmetic Logic Instruction + setBits(&wrd, 22, 24, data.processOpData.arithmLogicData.shiftType); + setBits(&wrd, 10, 16, data.processOpData.arithmLogicData.shiftAmount); + + if (data.processOpData.arithmLogicData.type == a64inst_DPR_ARITHM) { + // Arithmetic + setBits(&wrd, 24, 25, 0x1); // bit 24 + } else { + setBits(&wrd, 21, 22, data.processOpData.arithmLogicData.negShiftedSrc2); + } + + } + + return wrd; + +} + +word encodeSingleDataTransfer(a64inst_instruction inst) { + word wrd = 0; + + a64inst_SingleTransferData data = inst.data.SingleTransferData; + a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData; + + setBits(&wrd, 22, 32, 0x2E0); + setBits(&wrd, 30, 31, data.regType); + setBits(&wrd, 24, 25, data2.addressingMode == a64inst_UNSIGNED_OFFSET); + setBits(&wrd, 22, 23, data2.transferType); + setBits(&wrd, 5, 10, data2.base); + setBits(&wrd, 0, 5, data.target); + + switch (data2.addressingMode) { + // register offset + case a64inst_REGISTER_OFFSET: + setBits(&wrd, 21, 22, 1); + setBits(&wrd, 10, 16, 0x1A); + setBits(&wrd, 16, 21, data2.a64inst_addressingModeData.offsetReg); + break; + // unsigned offset + case a64inst_UNSIGNED_OFFSET: + setBits(&wrd, 10, 22, data2.a64inst_addressingModeData.unsignedOffset); + break; + // pre/post indexed + default: + setBits(&wrd, 21, 22, 0); + setBits(&wrd, 11, 12, data2.addressingMode == a64inst_PRE_INDEXED); + setBits(&wrd, 10, 11, 1); + setBits(&wrd, 12, 21, data2.a64inst_addressingModeData.indexedOffset); + break; + } + + return wrd; +} + +word encodeLoadLiteral(a64inst_instruction cI) { + word wrd = 0; + + a64inst_SingleTransferData data = cI.data.SingleTransferData; + setBits(&wrd, 24, 32, 0x18); + setBits(&wrd, 30, 31, data.regType); + setBits(&wrd, 5, 24, data.processOpData.loadLiteralData.offset); + setBits(&wrd, 0, 5, data.target); + + return wrd; +} + +word *encode(a64inst_instruction insts[], int instCount, st* table) { + // TODO: + // iterate over instructions again, this time replacing labels + // with values from symbol table + // after a line has had all the values replaced, assemble it and append + word *arr = (word*)malloc(sizeof(word) * instCount); + int index = 0; + for (int i = 0; i < instCount; i++) { + a64inst_instruction inst = insts[i]; + switch (inst.type) { + case a64inst_DPIMMEDIATE: + arr[index] = encodeDPImmediate(inst); + index++; + break; + case a64inst_DPREGISTER: + arr[index] = encodeDPRegister(inst); + index++; + break; + case a64inst_SINGLETRANSFER: + arr[index] = encodeSingleDataTransfer(inst); + index++; + break; + case a64inst_LOADLITERAL: + arr[index] = encodeLoadLiteral(inst); + index++; + break; + case a64inst_DIRECTIVE: + arr[index] = inst.data.DirectiveData.value; + index++; + break; + case a64inst_HALT: + arr[index] = HALT_BINARY; + index++; + break; + case a64inst_LABEL: + // Labels are handled in the first pass and used for addressing. + break; + case a64inst_BRANCH: + arr[index] = encodeBranch(&inst); + index++; + default: + break; + } + } + return arr; +} From ba41986b7b79e8ad3d683573107b04a2f2e91109 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 19:06:05 +0100 Subject: [PATCH 080/113] Fix Bug in the naming of the file --- src/assemble.c | 2 +- src/twopassassembly.c | 232 ------------------------------------------ 2 files changed, 1 insertion(+), 233 deletions(-) delete mode 100644 src/twopassassembly.c diff --git a/src/assemble.c b/src/assemble.c index d0321a4..42302b3 100644 --- a/src/assemble.c +++ b/src/assemble.c @@ -4,7 +4,7 @@ #include "parser.h" #include "fileio.h" #include "parser.h" -#include "twopassassembly.c" +#include "encode.c" int main(int argc, char **argv) { // Check the arguments diff --git a/src/twopassassembly.c b/src/twopassassembly.c deleted file mode 100644 index 0a03c7e..0000000 --- a/src/twopassassembly.c +++ /dev/null @@ -1,232 +0,0 @@ -#include -#include "global.h" -#include "a64instruction/a64instruction.h" -#include "symboltable.c" -#include -#include - -#define HALT_BINARY 2315255808 - -// Temp helper function to print binary representation of a word -// static void printBinary(word number) { -// for (int i = 31; i >= 0; i--) { -// putchar((number & (1 << i)) ? '1' : '0'); -// } -// putchar('\n'); -// } - -// write the provided value to the bits in the range [lsb, msb) {inclusive, exclusive} to the word. -// Does not modify any other bits in the word. -void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { - // Ensure LSB and MSB are within range of word size, and in the correct order - assert(lsb < msb && msb <= 32); - - // Create a mask with 1s in the range [lsb, msb) and 0s elsewhere - word mask = 0; - for (uint8_t i = lsb; i < msb; i++) { - mask |= 1 << i; - } - - // Clear the bits in the range [lsb, msb) in the word - *wrd &= ~mask; - - // Set the bits in the range [lsb, msb) to the value - *wrd |= (value << lsb) & mask; -} - - -// Generates assembled code based on the two-pass assembly method -word assembleBranch(a64inst_instruction *instr) { - word wrd = 0; - - switch (instr->data.BranchData.BranchType) { - case a64inst_UNCONDITIONAL: - setBits(&wrd, 26, 30, 0x5); - setBits(&wrd, 25, 0, instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset); - break; - - case a64inst_REGISTER: - setBits(&wrd, 16, 32, 0xD61F); - setBits(&wrd, 5, 10, instr->data.BranchData.processOpData.registerData.src); - break; - - case a64inst_CONDITIONAL: - setBits(&wrd, 26, 32, 0x15); - setBits(&wrd, 5, 24, instr->data.BranchData.processOpData.conditionalData.offset); - setBits(&wrd, 0, 4, instr->data.BranchData.processOpData.conditionalData.cond); - break; - } - - return wrd; -} - -st* firstPass(a64inst_instruction instrs[], int numInstrs) { - // TODO: - // -iterate over instructions, adding to symbol table - // create symbol table and map labels to addresses/lines - st *table = (st*)malloc(sizeof(st)); - for (int i = 0; i < numInstrs; i++) { - // discuss defining a LABEL type - if (instrs[i].type == a64inst_LABEL) { - st_add(*table, &(instrs[i].data.LabelData.label), &i); - } - } - return table; -} - -word dpi(a64inst_instruction cI) { - word wrd = 0; - - a64inst_DPImmediateData data = cI.data.DPImmediateData; - - setBits(&wrd, 31, 32, data.regType); // sf - setBits(&wrd, 29, 31, data.processOp); // opc - setBits(&wrd, 28, 29, 0x1); // constant value - setBits(&wrd, 0, 5, data.dest); // rd - - if (data.DPIOpType == a64inst_DPI_ARITHM) { - setBits(&wrd, 23, 26, 0x2); //opi - setBits(&wrd, 5, 10, data.processOpData.arithmData.src); // rn - setBits(&wrd, 22, 23, data.processOpData.arithmData.shiftImmediate); // sh - setBits(&wrd, 10, 22, data.processOpData.arithmData.immediate); // imm12 - } - // if wide move - else { - setBits(&wrd, 23, 26, 0x5); //opi - // TODO: Check the following line, is it shiftScalar?: - setBits(&wrd, 21, 23, data.processOpData.wideMovData.shiftScalar); // hw - setBits(&wrd, 5, 21, data.processOpData.wideMovData.immediate); // imm16 - } - - return wrd; -} - -word dpr(a64inst_instruction cI) { - word wrd = 0; - - a64inst_DPRegisterData data = cI.data.DPRegisterData; - setBits(&wrd, 31, 32, data.regType); // sf - setBits(&wrd, 29, 31, data.processOp); // opc - setBits(&wrd, 28, 28, data.DPROpType); // M - setBits(&wrd, 25 ,28, 0x5); - setBits(&wrd, 16, 21, data.src2); // src2 - setBits(&wrd, 5, 10, data.src1); // src1 - setBits(&wrd, 0, 5, data.dest); // src2 - - if (data.DPROpType == a64inst_DPR_MULTIPLY) { - setBits(&wrd, 21, 31, 0xD8); - setBits(&wrd, 15, 16, data.processOpData.multiplydata.negProd); - setBits(&wrd, 10, 15, data.processOpData.multiplydata.summand); - - } else { - // Arithmetic Logic Instruction - setBits(&wrd, 22, 24, data.processOpData.arithmLogicData.shiftType); - setBits(&wrd, 10, 16, data.processOpData.arithmLogicData.shiftAmount); - - if (data.processOpData.arithmLogicData.type == a64inst_DPR_ARITHM) { - // Arithmetic - setBits(&wrd, 24, 25, 0x1); // bit 24 - } else { - setBits(&wrd, 21, 22, data.processOpData.arithmLogicData.negShiftedSrc2); - } - - } - - return wrd; - -} - -word sts(a64inst_instruction cI) { - word wrd = 0; - - a64inst_SingleTransferData data = cI.data.SingleTransferData; - a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData; - - setBits(&wrd, 22, 32, 0x2E0); - setBits(&wrd, 30, 31, data.regType); - setBits(&wrd, 24, 25, data2.addressingMode == a64inst_UNSIGNED_OFFSET); - setBits(&wrd, 22, 23, data2.transferType); - setBits(&wrd, 5, 10, data2.base); - setBits(&wrd, 0, 5, data.target); - - switch (data2.addressingMode) { - // register offset - case a64inst_REGISTER_OFFSET: - setBits(&wrd, 21, 22, 1); - setBits(&wrd, 10, 16, 0x1A); - setBits(&wrd, 16, 21, data2.a64inst_addressingModeData.offsetReg); - break; - // unsigned offset - case a64inst_UNSIGNED_OFFSET: - setBits(&wrd, 10, 22, data2.a64inst_addressingModeData.unsignedOffset); - break; - // pre/post indexed - default: - setBits(&wrd, 21, 22, 0); - setBits(&wrd, 11, 12, data2.addressingMode == a64inst_PRE_INDEXED); - setBits(&wrd, 10, 11, 1); - setBits(&wrd, 12, 21, data2.a64inst_addressingModeData.indexedOffset); - break; - } - - return wrd; -} - -word ldl(a64inst_instruction cI) { - word wrd = 0; - - a64inst_SingleTransferData data = cI.data.SingleTransferData; - setBits(&wrd, 24, 32, 0x18); - setBits(&wrd, 30, 31, data.regType); - setBits(&wrd, 5, 24, data.processOpData.loadLiteralData.offset); - setBits(&wrd, 0, 5, data.target); - - return wrd; -} - -word *secondPass(a64inst_instruction instrs[], int numInstrs, st* table) { - // TODO: - // iterate over instructions again, this time replacing labels - // with values from symbol table - // after a line has had all the values replaced, assemble it and append - word *arr = (word*)malloc(sizeof(word) * numInstrs); - int index = 0; - for (int i = 0; i < numInstrs; i++) { - a64inst_instruction cI = instrs[i]; - switch (cI.type) { - case a64inst_DPIMMEDIATE: - arr[index] = dpi(cI); - index++; - break; - case a64inst_DPREGISTER: - arr[index] = dpr(cI); - index++; - break; - case a64inst_SINGLETRANSFER: - arr[index] = sts(cI); - index++; - break; - case a64inst_LOADLITERAL: - arr[index] = ldl(cI); - index++; - break; - case a64inst_DIRECTIVE: - arr[index] = cI.data.DirectiveData.value; - index++; - break; - case a64inst_HALT: - arr[index] = HALT_BINARY; - index++; - break; - case a64inst_LABEL: - // Labels are handled in the first pass and used for addressing. - break; - case a64inst_BRANCH: - arr[index] = assembleBranch(&cI); - index++; - default: - break; - } - } - return arr; -} From ded57032cec5556e3be588f09caacc27e55930e2 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 19:08:25 +0100 Subject: [PATCH 081/113] Update encode for a small bug in the numbering --- src/add_imm_sh.s | 3 --- src/encode.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 src/add_imm_sh.s diff --git a/src/add_imm_sh.s b/src/add_imm_sh.s deleted file mode 100644 index 8271daa..0000000 --- a/src/add_imm_sh.s +++ /dev/null @@ -1,3 +0,0 @@ -add x0, x0, #1, lsl #12 - -and x0, x0, x0 diff --git a/src/encode.c b/src/encode.c index 8fc716d..1ab0d6a 100644 --- a/src/encode.c +++ b/src/encode.c @@ -99,7 +99,7 @@ word encodeDPRegister(a64inst_instruction inst) { a64inst_DPRegisterData data = inst.data.DPRegisterData; setBits(&wrd, 31, 32, data.regType); // sf setBits(&wrd, 29, 31, data.processOp); // opc - setBits(&wrd, 28, 28, data.DPROpType); // M + setBits(&wrd, 28, 29, data.DPROpType); // M setBits(&wrd, 25 ,28, 0x5); setBits(&wrd, 16, 21, data.src2); // src2 setBits(&wrd, 5, 10, data.src1); // src1 From fb15dc90c65cfe80f8f6d9731508c7d303dc8d9c Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 19:20:02 +0100 Subject: [PATCH 082/113] Fix Bug in Parser for an error checking --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index c332bd6..33c7685 100644 --- a/src/parser.c +++ b/src/parser.c @@ -289,7 +289,7 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount data->DPIOpType = a64inst_DPI_WIDEMOV; data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 3); data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); - if (tokensCount >= 3) { + if (tokensCount >= 4) { data->processOpData.wideMovData.shiftScalar = getOperandNumber(tokens[3]); } From 16fb04b7377d7ffd64497f76a576bdfad544cc44 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 13 Jun 2024 19:25:22 +0100 Subject: [PATCH 083/113] fix tokenising of dpimmediate shifts --- src/parser.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 33c7685..c775646 100644 --- a/src/parser.c +++ b/src/parser.c @@ -300,7 +300,9 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]); if (tokensCount >= 5) { - int shiftAmount = getOperandNumber(tokens[4]); + int numTokens = 0; + char **shiftOperands = tokenise(tokens[4], &numTokens); + int shiftAmount = getOperandNumber(shiftOperands[1]); if (shiftAmount > 0) { data->processOpData.arithmData.shiftImmediate = true; } From ed6ff9ba920485bee93d89d5705af6e7e4aae2b4 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 19:26:40 +0100 Subject: [PATCH 084/113] Update the tokenizer to skip whitespaces --- src/tokeniser.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/tokeniser.c b/src/tokeniser.c index 1bb2fe5..7cec970 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -39,7 +39,10 @@ char **tokenise(char *line, int *numTokens) { if (*c == ',' && !inBracket) { *c = '\0'; tokens[(*numTokens)++] = currentToken; - currentToken = c + 2; // Skip ", " + currentToken = c + 1; + while (*currentToken == ' ') { + currentToken++; + } } } @@ -85,7 +88,10 @@ char **tokeniseOperands(char *line, int *numTokens) { if (*c == ',' && !inBracket) { *c = '\0'; tokens[(*numTokens)++] = currentToken; - currentToken = c + 2; // Skip ", " + currentToken = c + 1; + while (*currentToken == ' ') { + currentToken++; + } } } From 9a92efc3f34570be7d91ee6e25823d0a02acff6b Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Thu, 13 Jun 2024 19:52:53 +0100 Subject: [PATCH 085/113] changes to wide move --- src/parser.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index c775646..e358cb8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -287,10 +287,15 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 3)) { data->DPIOpType = a64inst_DPI_WIDEMOV; - data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 3); + data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 4); data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); if (tokensCount >= 4) { - data->processOpData.wideMovData.shiftScalar = getOperandNumber(tokens[3]); + int numTokens = 0; + char **shiftOperands = tokenise(tokens[3], &numTokens); + int shiftAmount = getOperandNumber(shiftOperands[1]); + if (shiftAmount > 0) { + data->processOpData.wideMovData.shiftScalar = 12; + } } } else { From 228bfec612b2f4202633807dcb88a9267da4d187 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Thu, 13 Jun 2024 19:57:26 +0100 Subject: [PATCH 086/113] more changes to wide moves --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index e358cb8..6083cef 100644 --- a/src/parser.c +++ b/src/parser.c @@ -285,7 +285,7 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount data->dest = getOperandNumber(tokens[1]); data->regType = parseRegisterType(tokens[1]); - if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 3)) { + if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 4)) { data->DPIOpType = a64inst_DPI_WIDEMOV; data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 4); data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); From 6bd993b29e3ca614f82a6d3e1a6be202dc69421f Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 13 Jun 2024 20:00:55 +0100 Subject: [PATCH 087/113] fix dpregister to construct IR --- src/parser.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 6083cef..16fbdc1 100644 --- a/src/parser.c +++ b/src/parser.c @@ -23,6 +23,7 @@ static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; +static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"}; a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); @@ -349,7 +350,52 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) } else { // Logic - + int opcodeCategory = indexStringIn(tokens[0], LOGIC_OPCODES, 8); + switch(opcodeCategory/2){ + case 0: + //and + if((tokens[0][strlen(tokens[0])-1]) == 's'){ + data->processOp = 3; + } else { + data->processOp = 0; + } + data->processOpData.arithmLogicData.negShiftedSrc2 = 0; + break; + case 1: + //negated AND + if((tokens[0][strlen(tokens[0])-1]) == 's'){ + data->processOp = 3; + } else { + data->processOp = 0; + } + data->processOpData.arithmLogicData.negShiftedSrc2 = 1; + break; + case 2: + //XOR + data->processOp = 2; + if(opcodeCategory==4){ + data->processOpData.arithmLogicData.negShiftedSrc2 = 0; + } else { + data->processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + case 3: + //OR + data->processOp = 1; + if(opcodeCategory==6){ + data->processOpData.arithmLogicData.negShiftedSrc2 = 0; + } else { + data->processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + } + if(tokensCount == 5) { + //has a shift + int numTokens = 0; + char **shiftOperands = tokenise(tokens[4], &numTokens); + data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); + data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]); + } } } } From 04df4ea7950b5cb79b29ca58a043d04480c6906c Mon Sep 17 00:00:00 2001 From: sBubshait Date: Thu, 13 Jun 2024 20:06:37 +0100 Subject: [PATCH 088/113] Update Parser to fix bug in parsing multiply --- src/parser.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parser.c b/src/parser.c index 16fbdc1..283f5db 100644 --- a/src/parser.c +++ b/src/parser.c @@ -5,6 +5,7 @@ #include #include "parser.h" #include "a64instruction/a64instruction.h" +#include "global.h" #include "tokeniser.c" /** Prototypes */ @@ -330,6 +331,9 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) if (tokensCount >= 5) { data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]); data->processOpData.multiplydata.negProd = strcmp(tokens[4], "mneg") == 0; + }else { + data->processOpData.multiplydata.summand = ZERO_REGISTER; + data->processOpData.multiplydata.negProd = false; } } else { From 3104da7f30b5bd83a92e6845753eb5ed083f29b8 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Thu, 13 Jun 2024 20:24:55 +0100 Subject: [PATCH 089/113] dpr multiply --- src/parser.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/parser.c b/src/parser.c index 283f5db..9c1dacb 100644 --- a/src/parser.c +++ b/src/parser.c @@ -25,6 +25,7 @@ static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"}; +//static const char *ALIASES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov", "mul", "mneg"}; a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); @@ -329,13 +330,13 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) // Multiply data->DPROpType = a64inst_DPR_MULTIPLY; if (tokensCount >= 5) { - data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]); - data->processOpData.multiplydata.negProd = strcmp(tokens[4], "mneg") == 0; - }else { - data->processOpData.multiplydata.summand = ZERO_REGISTER; - data->processOpData.multiplydata.negProd = false; + data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]); + data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0; + } + else { + data->processOpData.multiplydata.summand = ZERO_REGISTER; + data->processOpData.multiplydata.negProd = strcmp(tokens[0], "mneg") == 0; } - } else { // Arithmetic/Logic data->DPROpType = a64inst_DPR_ARITHMLOGIC; From f5d35ce2465f3eacfa287f2418ffdf05a8a54e23 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 13 Jun 2024 20:36:00 +0100 Subject: [PATCH 090/113] fix classification logic for str/ldr --- src/parser.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/parser.c b/src/parser.c index 9c1dacb..5f9f652 100644 --- a/src/parser.c +++ b/src/parser.c @@ -13,7 +13,7 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr); static char *duplicateString(char *str); void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); -void calcluateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); +void calculateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); @@ -141,7 +141,7 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { case a64inst_SINGLETRANSFER: parseSingleTransfer(instr, opcode, tokens, tokensCount); - calcluateAddressFormat(instr, tokens, tokensCount); + calculateAddressFormat(instr, tokens, tokensCount); break; case a64inst_LOADLITERAL: parseSingleTransfer(instr, opcode, tokens, tokensCount); @@ -183,31 +183,32 @@ int getOperandNumber(char *operand){ } -void calcluateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { +void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { assert(*tokens[2] == '['); int operandCount = 0; + char unsplitString[strlen(tokens[2])]; + strcpy(unsplitString, tokens[2]); char **operands = tokeniseOperands(tokens[2], &operandCount); int baseRegister = getOperandNumber(operands[0]); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; - if(operands[1][strlen(operands[1])-1] == '!') { + if(unsplitString[strlen(unsplitString)-1] == '!') { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); - - } else if(operands[1][strlen(operands[1])-1] == ']') { - // POST_INDEXED - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); - + } else if( (isOperandRegister(*operands[0]) == 1) && (isOperandRegister(*operands[1]) == 1)){ //register instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); - + } else if(unsplitString[strlen(unsplitString)-1] == ']') { + // POST_INDEXED + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); + } else { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(operandCount > 1){ From 9507180dc90705454e03e21ddebaf79b85031e5a Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Thu, 13 Jun 2024 20:43:57 +0100 Subject: [PATCH 091/113] fix str/ldr classification logic to avoid incorrect array indexing --- src/parser.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index 5f9f652..92afb0a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -199,11 +199,13 @@ void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tok instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); - } else if( (isOperandRegister(*operands[0]) == 1) - && (isOperandRegister(*operands[1]) == 1)){ - //register - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); + } else if(operandCount==2) { + if((isOperandRegister(*operands[0]) == 1) + && (isOperandRegister(*operands[1]) == 1)){ + //register + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); + } } else if(unsplitString[strlen(unsplitString)-1] == ']') { // POST_INDEXED instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; From 0815d5b6f6ffcc8098625af485082229f18eafc1 Mon Sep 17 00:00:00 2001 From: GDBWNV <93523315+GDBWNV@users.noreply.github.com> Date: Thu, 13 Jun 2024 21:07:58 +0100 Subject: [PATCH 092/113] fix for widemove --- src/encode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encode.c b/src/encode.c index 1ab0d6a..4f6d508 100644 --- a/src/encode.c +++ b/src/encode.c @@ -86,7 +86,7 @@ word encodeDPImmediate(a64inst_instruction inst) { else { setBits(&wrd, 23, 26, 0x5); //opi // TODO: Check the following line, is it shiftScalar?: - setBits(&wrd, 21, 23, data.processOpData.wideMovData.shiftScalar); // hw + setBits(&wrd, 21, 23, (data.processOpData.wideMovData.shiftScalar == 12)); // hw setBits(&wrd, 5, 21, data.processOpData.wideMovData.immediate); // imm16 } From f3e1c1f1500ed8bb2e8615687c6835013f2f00eb Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 02:00:10 +0100 Subject: [PATCH 093/113] Add handling of all aliases. Add Shift Utility Function --- src/parser.c | 86 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/src/parser.c b/src/parser.c index 92afb0a..e8bc85f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -25,7 +25,8 @@ static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"}; -//static const char *ALIASES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov", "mul", "mneg"}; +static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"}; +static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"}; a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); @@ -64,6 +65,25 @@ static int indexStringIn(char *str, const char *arr[], int arrSize) { return -1; } +typedef struct { + int type; + int immediate; +} ShiftData; + +static ShiftData *parseShift(char *shift) { + char buffer[100]; + strcpy(buffer, shift); + char *shiftType = strtok(buffer, " "); + char *shiftAmount = strtok(NULL, " "); + ShiftData *data = malloc(sizeof(ShiftData)); + data->type = indexStringIn(shiftType, SHIFT_TYPE_OPCODES, 4); + while (*shiftAmount == ' ' || *shiftAmount == '#') { + shiftAmount++; + } + data->immediate = atoi(shiftAmount); + return data; +} + int isOperandRegister(char regStartChar) { return((regStartChar == 'x') || (regStartChar == 'w')); } @@ -74,7 +94,52 @@ int classifyDPInst(char *operandList[]){ isOperandRegister(operandList[3][0])); } -void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int tokensCount){ +void classifyAlias(char *opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) { + + int aliasIndex = indexStringIn(opcode, ALIAS_OPCODES, 9); + if (aliasIndex != -1) { + // The instruction is one of the aliases, convert into the target. + char *opcode = ALIAS_TARGET_OPCODES[aliasIndex]; + + // To correctly encode the zero register, which is either w31 or x31. + char *zeroReg = malloc(5 * sizeof(char)); + *zeroReg = *tokens[1]; + strcat(zeroReg, "31"); + + switch(aliasIndex) { + case 0: // cmp -> subs rzr, rn, + case 1: // cmn -> adds rzr, rn, + case 4: // tst -> ands rzr, rn, + // Convert from [instr] REG, to [instr] RZR, REG, + tokens[0] = opcode; + tokens[4] = tokens[3]; + tokens[3] = tokens[2]; + tokens[2] = tokens[1]; + tokens[1] = zeroReg; + (*tokensCount)++; + break; + + case 2: // neg -> subs rd, rzr, + case 3: // negs -> subs rd, rzr, + case 5: // mvn -> orn rd, rzr, + case 6: // mov -> orr rd, rzr, rm + tokens[0] = opcode; + tokens[4] = tokens[3]; + tokens[3] = tokens[2]; + tokens[2] = zeroReg; + (*tokensCount)++; + break; + + default: + break; + } + + } +} + +void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount){ + + classifyAlias(opcode, instr, tokens, tokensCount); if (isStringIn(opcode, BRANCH_OPCODES, 9)) { instr->type = a64inst_BRANCH; @@ -132,7 +197,7 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { } else { // Instruction - classifyOpcode(opcode, instr, tokens, tokensCount); + classifyOpcode(opcode, instr, tokens, &tokensCount); switch(instr->type){ case a64inst_BRANCH: @@ -296,12 +361,8 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 4); data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); if (tokensCount >= 4) { - int numTokens = 0; - char **shiftOperands = tokenise(tokens[3], &numTokens); - int shiftAmount = getOperandNumber(shiftOperands[1]); - if (shiftAmount > 0) { - data->processOpData.wideMovData.shiftScalar = 12; - } + ShiftData shData = *parseShift(tokens[3]); + data->processOpData.wideMovData.shiftScalar = shData.immediate; } } else { @@ -311,10 +372,8 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]); if (tokensCount >= 5) { - int numTokens = 0; - char **shiftOperands = tokenise(tokens[4], &numTokens); - int shiftAmount = getOperandNumber(shiftOperands[1]); - if (shiftAmount > 0) { + ShiftData shData = *parseShift(tokens[4]); + if (shData.immediate > 0) { data->processOpData.arithmData.shiftImmediate = true; } } @@ -340,6 +399,7 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) data->processOpData.multiplydata.summand = ZERO_REGISTER; data->processOpData.multiplydata.negProd = strcmp(tokens[0], "mneg") == 0; } + } else { // Arithmetic/Logic data->DPROpType = a64inst_DPR_ARITHMLOGIC; From d1815c0cce2bfc5e28b47f63a82e41b343ea2b31 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 02:01:40 +0100 Subject: [PATCH 094/113] Fix Bug in handling wide moves shift scalar --- src/encode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/encode.c b/src/encode.c index 4f6d508..df51d4b 100644 --- a/src/encode.c +++ b/src/encode.c @@ -85,8 +85,8 @@ word encodeDPImmediate(a64inst_instruction inst) { // if wide move else { setBits(&wrd, 23, 26, 0x5); //opi - // TODO: Check the following line, is it shiftScalar?: - setBits(&wrd, 21, 23, (data.processOpData.wideMovData.shiftScalar == 12)); // hw + uint8_t hw = data.processOpData.wideMovData.shiftScalar / 16; + setBits(&wrd, 21, 23, hw); // hw setBits(&wrd, 5, 21, data.processOpData.wideMovData.immediate); // imm16 } From 6aa93c79ae258d175c38e6da8fd27b3b0e84786c Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 16:42:42 +0100 Subject: [PATCH 095/113] fix indexing error with parsing branch operands --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index e8bc85f..395d18c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -324,7 +324,7 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) printf("unconditional"); break; case a64inst_REGISTER: - instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[0]); + instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[1]); break; case a64inst_CONDITIONAL: { From 16e52a9421bd67ba04762e4d0f53f28aa93707eb Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 16:52:48 +0100 Subject: [PATCH 096/113] fix directive value parsing --- src/parser.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/parser.c b/src/parser.c index 395d18c..4f2f71c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -88,6 +88,26 @@ int isOperandRegister(char regStartChar) { return((regStartChar == 'x') || (regStartChar == 'w')); } +//takes inputted char array and returns the integer of the operand, skipping the first character +//e.g. for a passed "R32", it skips the 'R' and returns 32 +int getOperandNumber(char *operand){ + char operandCpy[strlen(operand)]; + strcpy(operandCpy, operand+1); + char **endptr = NULL; + int number; + if((strncmp(operandCpy, "0x", 2)==0)) { + //hex value + strcpy(operandCpy, operand+3); + number = strtol(operandCpy, endptr, 16); + } else if(operandCpy[0]=='x'){ + strcpy(operandCpy, operand+2); + number = strtol(operandCpy, endptr, 16); + } else { + number = strtol(operandCpy, endptr, 10); + } + return number; +} + int classifyDPInst(char *operandList[]){ return(isOperandRegister(operandList[1][0]) && isOperandRegister(operandList[2][0]) && @@ -188,6 +208,7 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; + instr->data.DirectiveData.value = getOperandNumber(tokens[1]); } else if(opcode[strlen(opcode)-1]== ':') { // Label @@ -230,22 +251,7 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { } -//takes inputted char array and returns the integer of the operand, skipping the first character -//e.g. for a passed "R32", it skips the 'R' and returns 32 -int getOperandNumber(char *operand){ - char operandCpy[strlen(operand)]; - strcpy(operandCpy, operand+1); - char **endptr = NULL; - int number; - if(strncmp(operandCpy, "0x", 2)==0){ - //hex value - strcpy(operandCpy, operand+3); - number = strtol(operandCpy, endptr, 16); - } else { - number = strtol(operandCpy, endptr, 10); - } - return number; -} + void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { From fd165aae472bb46eede569c4d75e3bcfeb56d795 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 19:55:32 +0100 Subject: [PATCH 097/113] create helper function to handle label offsets --- src/encode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/encode.c b/src/encode.c index df51d4b..22173dc 100644 --- a/src/encode.c +++ b/src/encode.c @@ -26,9 +26,15 @@ void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { *wrd |= (value << lsb) & mask; } +int calculateLabelOffset(char *label, int wordArrIndex, symboltable *st){ + int labelAddress = st.get(label); + int currInstrAddr = wordArrIndex * 4; + int totalOffset = labelAddress - currInstrAddr; + return totalOffset; +} // Generates assembled code based on the two-pass assembly method -word encodeBranch(a64inst_instruction *instr) { +word encodeBranch(a64inst_instruction *instr, int wordArrIndex) { word wrd = 0; switch (instr->data.BranchData.BranchType) { @@ -214,7 +220,7 @@ word *encode(a64inst_instruction insts[], int instCount, st* table) { // Labels are handled in the first pass and used for addressing. break; case a64inst_BRANCH: - arr[index] = encodeBranch(&inst); + arr[index] = encodeBranch(&inst, index); index++; default: break; From 59871d7a0eae7602af8af9bb73e3856147357390 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 20:04:08 +0100 Subject: [PATCH 098/113] Restructre first pass, Reimplement symboltable --- src/Makefile | 2 +- src/assemble.c | 18 ++++- src/decode.c | 169 ---------------------------------------------- src/decode.h | 96 -------------------------- src/emulate.c | 67 ------------------ src/emulator.h | 32 --------- src/encode.c | 35 ++-------- src/execute.h | 7 -- src/print.c | 58 ---------------- src/print.h | 12 ---- src/symboltable.c | 102 +++++++++++++++++----------- src/symboltable.h | 27 ++++++++ src/tokeniser.c | 32 +++++++-- 13 files changed, 142 insertions(+), 515 deletions(-) delete mode 100644 src/decode.c delete mode 100644 src/decode.h delete mode 100644 src/emulate.c delete mode 100644 src/emulator.h delete mode 100644 src/execute.h delete mode 100644 src/print.c delete mode 100644 src/print.h create mode 100644 src/symboltable.h diff --git a/src/Makefile b/src/Makefile index 2fe219c..071143f 100755 --- a/src/Makefile +++ b/src/Makefile @@ -7,7 +7,7 @@ CFLAGS ?= -std=c17 -g\ .PHONY: all clean -all: assemble emulate +all: assemble assemble: assemble.o parser.o fileio.o emulate: emulate.o diff --git a/src/assemble.c b/src/assemble.c index 42302b3..c6b19fd 100644 --- a/src/assemble.c +++ b/src/assemble.c @@ -6,6 +6,8 @@ #include "parser.h" #include "encode.c" +static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount); + int main(int argc, char **argv) { // Check the arguments if (argc < 3) { @@ -21,7 +23,7 @@ int main(int argc, char **argv) { a64inst_instruction *instructions = parse(source, lineCount); // First Pass: Create the symbol table - st *table = firstPass(instructions, lineCount); + symbol_table *table = firstPass(instructions, lineCount); // Second Pass: Encode the instructions into binary word *binary = encode(instructions, lineCount, table); @@ -33,3 +35,17 @@ int main(int argc, char **argv) { return EXIT_SUCCESS; } + +static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) { + symbol_table *table = st_init(); + int labelCount = 0; + + for (int i = 0; i < lineCount; i++) { + a64inst_instruction inst = instructions[i]; + if (inst.type == a64inst_LABEL) { + st_insert(table, inst.data.LabelData.label, 4 * (i - (labelCount++))); + } + } + + return table; +} diff --git a/src/decode.c b/src/decode.c deleted file mode 100644 index ce69c35..0000000 --- a/src/decode.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include "decode.h" -#include "emulator.h" - -// Retrieve the bits between positions 'lsb' (inclusive) and 'msb' (exclusive) from a given word -// as a new zero-extended word. -static word getBits(word wrd, uint8_t lsb, uint8_t msb) { - - // Ensure LSB and MSB are within range of word size, and in the correct order - assert(lsb < msb && msb <= WORD_BITS); - - wrd &= ((dword) 1 << msb) - 1; - return wrd >> lsb; -} - -// Given a binary word, return its internal representation as an a64instruction struct encoding the same -// information. -a64inst_instruction *decode(word wrd) { - - a64inst_instruction *inst = malloc(sizeof(a64inst_instruction)); - if (inst == NULL) { - fprintf(stderr, "Ran out of memory while attempting to decode an instruction!\n"); - exit(1); - } - - word typeId = getBits(wrd, TYPE_ID_LSB, TYPE_ID_MSB); - // Halt interpretation - if (wrd == HALT_WORD) { - inst->type = a64inst_HALT; - - // Data Processing Immediate interpretation - } else if (typeId == DP_IMM_ID) { - inst->type = a64inst_DPIMMEDIATE; - inst->data.DPImmediateData.regType = getBits(wrd, DP_WIDTH_LSB, DP_WIDTH_MSB); - inst->data.DPImmediateData.processOp = getBits(wrd, DP_OP_LSB, DP_OP_MSB); - inst->data.DPImmediateData.dest = getBits(wrd, DP_DEST_LSB, DP_DEST_MSB); - - switch(getBits(wrd, DP_IMM_OPTYPE_LSB, DP_IMM_OPTYPE_MSB)) { - - case DP_IMM_OPTYPE_ARITHM: - inst->data.DPImmediateData.DPIOpType = a64inst_DPI_ARITHM; - inst->data.DPImmediateData.processOpData.arithmData.shiftImmediate = getBits(wrd, DP_IMM_ARITHM_SHIFTFLAG_LSB, DP_IMM_ARITHM_SHIFTFLAG_MSB); - inst->data.DPImmediateData.processOpData.arithmData.immediate = getBits(wrd, DP_IMM_ARITHM_IMMVAL_LSB, DP_IMM_ARITHM_IMMVAL_MSB); - inst->data.DPImmediateData.processOpData.arithmData.src = getBits(wrd, DP_IMM_ARITHM_DEST_LSB, DP_IMM_ARITHM_DEST_MSB); - break; - - case DP_IMM_OPTYPE_WIDEMOV: - inst->data.DPImmediateData.DPIOpType = a64inst_DPI_WIDEMOV; - inst->data.DPImmediateData.processOpData.wideMovData.shiftScalar = getBits(wrd, DP_IMM_WIDEMOV_SHIFTSCALAR_LSB, DP_IMM_WIDEMOV_SHIFTSCALAR_MSB); - inst->data.DPImmediateData.processOpData.wideMovData.immediate = getBits(wrd, DP_IMM_WIDEMOV_IMMVAL_LSB, DP_IMM_WIDEMOV_IMMVAL_MSB); - break; - - default: - fprintf(stderr, "Unknown immediate data processing operation type found!\n"); - exit(1); - break; - } - - } else if (typeId == BRANCH_ID) { - inst->type = a64inst_BRANCH; - word branchTypeFlag = getBits(wrd, BRANCH_TYPE_LSB, BRANCH_TYPE_MSB); - - inst->data.BranchData.BranchType = branchTypeFlag; - - switch (branchTypeFlag) { - case a64inst_UNCONDITIONAL: - inst->data.BranchData.processOpData.unconditionalData.unconditionalOffset = getBits(wrd, BRANCH_UNCONDITIONAL_OFFSET_LSB, BRANCH_UNCONDITIONAL_OFFSET_MSB); - break; - - case a64inst_CONDITIONAL: - inst->data.BranchData.processOpData.conditionalData.offset = getBits(wrd, BRANCH_CONDITIONAL_OFFSET_LSB, BRANCH_CONDITIONAL_OFFSET_MSB); - - word conditionFlag = getBits(wrd, BRANCH_CONDITIONAL_COND_LSB, BRANCH_CONDITIONAL_COND_MSB); - - if(conditionFlag <= 1 || (conditionFlag >= 10 && conditionFlag <= 14)) { - inst->data.BranchData.processOpData.conditionalData.cond = conditionFlag; - } else { - fprintf(stderr, "Unknown condition detected!\n"); - exit(1); - } - - break; - - case a64inst_REGISTER: - inst->data.BranchData.processOpData.registerData.src = getBits(wrd, BRANCH_REGISTER_SRC_LSB, BRANCH_REGISTER_SRC_MSB); - break; - - default: - fprintf(stderr, "Undefined branch type detected!\n"); - exit(1); - break; - } - - // TODO: Some minor code duplication between DPR and DPI data interpretation - // Data Processing Register interpretation - } else if (getBits(wrd, DP_REG_LSB, DP_REG_MSB) == 1) { - inst->type = a64inst_DPREGISTER; - inst->data.DPRegisterData.regType = getBits(wrd, DP_WIDTH_LSB, DP_WIDTH_MSB); - inst->data.DPRegisterData.processOp = getBits(wrd, DP_OP_LSB, DP_OP_MSB); - inst->data.DPRegisterData.dest = getBits(wrd, DP_DEST_LSB, DP_DEST_MSB); - inst->data.DPRegisterData.src1 = getBits(wrd, DP_REG_SRC1_LSB, DP_REG_SRC1_MSB); - inst->data.DPRegisterData.src2 = getBits(wrd, DP_REG_SRC2_LSB, DP_REG_SRC2_MSB); - inst->data.DPRegisterData.DPROpType = getBits(wrd, DP_REG_OPTYPE_LSB, DP_REG_OPTYPE_MSB); - - a64inst_DPRegister_ArithmLogicData *arithmLogicData = &inst->data.DPRegisterData.processOpData.arithmLogicData; - - arithmLogicData->type = getBits(wrd, DP_REG_ARITHMLOGIC_ARITHMFLAG_LSB, DP_REG_ARITHMLOGIC_ARITHMFLAG_MSB); - arithmLogicData->shiftType = getBits(wrd, DP_REG_ARITHMLOGIC_SHIFTTYPE_LSB, DP_REG_ARITHMLOGIC_SHIFTTYPE_MSB); - arithmLogicData->negShiftedSrc2 = getBits(wrd, DP_REG_ARITHMLOGIC_NEGSRC2FLAG_LSB, DP_REG_ARITHMLOGIC_NEGSRC2FLAG_MSB); - - switch(inst->data.DPRegisterData.DPROpType) { - - case a64inst_DPR_ARITHMLOGIC: - if (arithmLogicData->type == a64inst_DPR_ARITHM && (arithmLogicData->negShiftedSrc2 || arithmLogicData->shiftType == a64inst_ROR)) { - fprintf(stderr, "Attempting to decode arithmetic DPR instruction with invalid format!\n"); - } - arithmLogicData->shiftAmount = getBits(wrd, DP_REG_ARITHMLOGIC_SHIFTAMOUNT_LSB, DP_REG_ARITHMLOGIC_SHIFTAMOUNT_MSB); - break; - - case a64inst_DPR_MULTIPLY:; - if (!(inst->data.DPRegisterData.processOp == DP_REG_MULTIPLY_PROCESSOP && - arithmLogicData->type == DP_REG_MULTIPLY_ARITHMFLAG && - arithmLogicData->shiftType == DP_REG_MULTIPLY_SHIFTTYPE && - arithmLogicData->negShiftedSrc2 == DP_REG_MULTIPLY_NEGSRC2FLAG)) { - fprintf(stderr, "Attempting to decode multiply DPR instruction with invalid format!\n"); - } - inst->data.DPRegisterData.processOpData.multiplydata.summand = getBits(wrd, DP_REG_MULTIPLY_SUMMAND_LSB, DP_REG_MULTIPLY_SUMMAND_MSB); - inst->data.DPRegisterData.processOpData.multiplydata.negProd = getBits(wrd, DP_REG_MULTIPLY_NEGPROD_LSB, DP_REG_MULTIPLY_NEGPROD_MSB); - break; - } - - } else { - // Load and Store, or unknown - // Ignore unknown for now - inst->type = a64inst_SINGLETRANSFER; - inst->data.SingleTransferData.regType = getBits(wrd, SDT_REGTYPE_FLAG_LSB, SDT_REGTYPE_FLAG_MSB); - inst->data.SingleTransferData.target = getBits(wrd, SDT_TARGET_REG_LSB, SDT_TARGET_REG_MSB); - - // TODO: Assert that the instruction is a Single Transfer indeed. - - if(getBits(wrd, SDT_OPTYPE_FLAG_LSB, SDT_OPTYPE_FLAG_MSB) == a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER) { - // Single Data Transfer - inst->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; - inst->data.SingleTransferData.processOpData.singleDataTransferData.transferType = getBits(wrd, SDT_TRANSFER_TYPE_LSB, SDT_TRANSFER_TYPE_MSB); - inst->data.SingleTransferData.processOpData.singleDataTransferData.base = getBits(wrd, SDT_BASE_REG_LSB, SDT_BASE_REG_MSB); - if (getBits(wrd, SDT_UNSIGNED_FLAG_LSB, SDT_UNSIGNED_FLAG_MSB) == 1) { - // Unsigned offset - inst->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; - inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = getBits(wrd, SDT_OFFSET_LSB, SDT_OFFSET_MSB); - } else if (getBits(wrd, SDT_REGISTER_FLAG_LSB, SDT_REGISTER_FLAG_MSB) == 1) { - // Register Offset - inst->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getBits(wrd, SDT_REGISTER_REG_LSB, SDT_REGISTER_REG_MSB); - } else { - // Pre-Indexed or Post-Indexed - inst->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = getBits(wrd, SDT_INDEXED_ADDRMODE_LSB, SDT_INDEXED_ADDRMODE_MSB); - inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getBits(wrd, SDT_INDEXED_OFFSET_LSB, SDT_INDEXED_OFFSET_MSB); - } - } else { - // Load Literal - inst->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_LOAD_LITERAL; - inst->data.SingleTransferData.processOpData.loadLiteralData.offset = getBits(wrd, SDT_LOAD_LITERAL_OFFSET_LSB, SDT_LOAD_LITERAL_OFFSET_MSB); - } - } - - return inst; -} diff --git a/src/decode.h b/src/decode.h deleted file mode 100644 index d509ae1..0000000 --- a/src/decode.h +++ /dev/null @@ -1,96 +0,0 @@ -#include "global.h" -#include "a64instruction/a64instruction.h" - -#define HALT_WORD 0x8a000000 - -#define TYPE_ID_LSB 26 -#define TYPE_ID_MSB 29 -#define DP_IMM_ID 4 -#define BRANCH_ID 5 - -#define DP_REG_LSB 25 -#define DP_REG_MSB 26 - -#define DP_WIDTH_LSB 31 -#define DP_WIDTH_MSB 32 -#define DP_OP_LSB 29 -#define DP_OP_MSB 31 -#define DP_DEST_LSB 0 -#define DP_DEST_MSB 5 - -#define DP_IMM_OPTYPE_LSB 23 -#define DP_IMM_OPTYPE_MSB 26 -#define DP_IMM_OPTYPE_ARITHM 2 -#define DP_IMM_OPTYPE_WIDEMOV 5 -#define DP_IMM_ARITHM_SHIFTFLAG_LSB 22 -#define DP_IMM_ARITHM_SHIFTFLAG_MSB 23 -#define DP_IMM_ARITHM_IMMVAL_LSB 10 -#define DP_IMM_ARITHM_IMMVAL_MSB 22 -#define DP_IMM_ARITHM_DEST_LSB 5 -#define DP_IMM_ARITHM_DEST_MSB 10 -#define DP_IMM_WIDEMOV_SHIFTSCALAR_LSB 21 -#define DP_IMM_WIDEMOV_SHIFTSCALAR_MSB 23 -#define DP_IMM_WIDEMOV_IMMVAL_LSB 5 -#define DP_IMM_WIDEMOV_IMMVAL_MSB 21 - -#define DP_REG_SRC1_LSB 5 -#define DP_REG_SRC1_MSB 10 -#define DP_REG_SRC2_LSB 16 -#define DP_REG_SRC2_MSB 21 -#define DP_REG_OPTYPE_LSB 28 -#define DP_REG_OPTYPE_MSB 29 -#define DP_REG_ARITHMLOGIC_ARITHMFLAG_LSB 24 -#define DP_REG_ARITHMLOGIC_ARITHMFLAG_MSB 25 -#define DP_REG_ARITHMLOGIC_SHIFTTYPE_LSB 22 -#define DP_REG_ARITHMLOGIC_SHIFTTYPE_MSB 24 -#define DP_REG_ARITHMLOGIC_NEGSRC2FLAG_LSB 21 -#define DP_REG_ARITHMLOGIC_NEGSRC2FLAG_MSB 22 -#define DP_REG_ARITHMLOGIC_SHIFTAMOUNT_LSB 10 -#define DP_REG_ARITHMLOGIC_SHIFTAMOUNT_MSB 16 -#define DP_REG_MULTIPLY_SUMMAND_LSB 10 -#define DP_REG_MULTIPLY_SUMMAND_MSB 15 -#define DP_REG_MULTIPLY_NEGPROD_LSB 15 -#define DP_REG_MULTIPLY_NEGPROD_MSB 16 -// Defines the values for fields used for arithmetic/logic DPR instructions -// that are necessary to indicate a multiplication instruction -#define DP_REG_MULTIPLY_PROCESSOP 0 -#define DP_REG_MULTIPLY_ARITHMFLAG 1 -#define DP_REG_MULTIPLY_SHIFTTYPE 0 -#define DP_REG_MULTIPLY_NEGSRC2FLAG 0 - -#define SDT_OPTYPE_FLAG_LSB 31 -#define SDT_OPTYPE_FLAG_MSB 32 -#define SDT_REGTYPE_FLAG_LSB 30 -#define SDT_REGTYPE_FLAG_MSB 31 -#define SDT_TARGET_REG_LSB 0 -#define SDT_TARGET_REG_MSB 5 - -#define SDT_BASE_REG_LSB 5 -#define SDT_BASE_REG_MSB 10 -#define SDT_OFFSET_LSB 10 -#define SDT_OFFSET_MSB 22 -#define SDT_TRANSFER_TYPE_LSB 22 -#define SDT_TRANSFER_TYPE_MSB 23 -#define SDT_UNSIGNED_FLAG_LSB 24 -#define SDT_UNSIGNED_FLAG_MSB 25 -#define SDT_REGISTER_FLAG_LSB 21 -#define SDT_REGISTER_FLAG_MSB 22 -#define SDT_REGISTER_REG_LSB 16 -#define SDT_REGISTER_REG_MSB 21 -#define SDT_INDEXED_ADDRMODE_LSB 11 -#define SDT_INDEXED_ADDRMODE_MSB 12 -#define SDT_INDEXED_OFFSET_LSB 12 -#define SDT_INDEXED_OFFSET_MSB 21 -#define SDT_LOAD_LITERAL_OFFSET_LSB 5 -#define SDT_LOAD_LITERAL_OFFSET_MSB 24 - -#define BRANCH_TYPE_LSB 30 -#define BRANCH_TYPE_MSB 32 -#define BRANCH_UNCONDITIONAL_OFFSET_LSB 0 -#define BRANCH_UNCONDITIONAL_OFFSET_MSB 26 -#define BRANCH_REGISTER_SRC_LSB 5 -#define BRANCH_REGISTER_SRC_MSB 10 -#define BRANCH_CONDITIONAL_COND_LSB 0 -#define BRANCH_CONDITIONAL_COND_MSB 4 -#define BRANCH_CONDITIONAL_OFFSET_LSB 5 -#define BRANCH_CONDITIONAL_OFFSET_MSB 24 diff --git a/src/emulate.c b/src/emulate.c deleted file mode 100644 index 4c7f42e..0000000 --- a/src/emulate.c +++ /dev/null @@ -1,67 +0,0 @@ -#include -#include -#include "a64instruction/a64instruction.h" -#include "emulator.h" -#include "fileio.h" -#include "global.h" -#include "print.h" -#include "decode.h" -#include "execute.h" - -int main(int arg, char **argv){ - return EXIT_SUCCESS; -} - -/* -extern a64inst_instruction *decode(word w); - -int main(int argc, char **argv) { - - // Check the arguments - if (argc == 1) { - fprintf(stderr, "Error: An object file is required. Syntax: ./emulate []"); - return EXIT_FAILURE; - } - - FILE *out = stdout; - if (argc > 2) { - out = fopen(argv[2], "w"); - if (out == NULL) { - fprintf(stderr, "Error: Could not open file %s\n", argv[2]); - return EXIT_FAILURE; - } - } - - // Initialising the machine state - Machine state = {0}; - state.memory = fileio_loadBin(argv[1], MEMORY_SIZE); - state.conditionCodes = (PState){0, 1, 0, 0}; - state.pc = 0x0; - - - // Fetch-decode-execute cycle - word wrd; - a64inst_instruction *inst; - do { - - // Step 1: Fetch instruction at PC's address - wrd = readWord(state.memory, state.pc); - - // Step 2: Decode instruction to internal representation - inst = decode(wrd); - - // Step 3: Update processor state to reflect executing the instruction, and increment PC - execute(&state, inst); - - if (inst->type != a64inst_BRANCH) - state.pc += sizeof(word); - } while (inst->type != a64inst_HALT); - - state.pc -= sizeof(word); - - printState(&state, out); - free(state.memory); - - return EXIT_SUCCESS; -} -*/ diff --git a/src/emulator.h b/src/emulator.h deleted file mode 100644 index facbfd7..0000000 --- a/src/emulator.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef __EMULATOR__ -#define __EMULATOR__ -#include "global.h" -#include - -/************************************ - * DEFINITIONS - ************************************/ - -#define BYTE_BITS 8 -#define WORD_BITS (BYTE_BITS * sizeof(word)) -#define DWORD_BITS (BYTE_BITS * sizeof(dword)) - -/************************************ - * STRUCTS - ************************************/ - -typedef struct { - bool Negative; - bool Zero; - bool Carry; - bool Overflow; -} PState; - -typedef struct { - dword registers[REGISTER_COUNT]; - dword pc; - byte *memory; - PState conditionCodes; -} Machine; - -#endif diff --git a/src/encode.c b/src/encode.c index 22173dc..847585c 100644 --- a/src/encode.c +++ b/src/encode.c @@ -3,7 +3,6 @@ #include "a64instruction/a64instruction.h" #include "symboltable.c" #include -#include #define HALT_BINARY 2315255808 @@ -26,21 +25,19 @@ void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { *wrd |= (value << lsb) & mask; } -int calculateLabelOffset(char *label, int wordArrIndex, symboltable *st){ - int labelAddress = st.get(label); - int currInstrAddr = wordArrIndex * 4; - int totalOffset = labelAddress - currInstrAddr; - return totalOffset; +static int getLabelOffset(symbol_table* table, char* label, int currentIndex) { + address target = st_get(table, label); + return target - (currentIndex * 4); } // Generates assembled code based on the two-pass assembly method -word encodeBranch(a64inst_instruction *instr, int wordArrIndex) { +word encodeBranch(a64inst_instruction *instr) { word wrd = 0; switch (instr->data.BranchData.BranchType) { case a64inst_UNCONDITIONAL: setBits(&wrd, 26, 30, 0x5); - setBits(&wrd, 25, 0, instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset); + setBits(&wrd, 0, 25, instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset); break; case a64inst_REGISTER: @@ -58,20 +55,6 @@ word encodeBranch(a64inst_instruction *instr, int wordArrIndex) { return wrd; } -st* firstPass(a64inst_instruction instrs[], int numInstrs) { - // TODO: - // -iterate over instructions, adding to symbol table - // create symbol table and map labels to addresses/lines - st *table = (st*)malloc(sizeof(st)); - for (int i = 0; i < numInstrs; i++) { - // discuss defining a LABEL type - if (instrs[i].type == a64inst_LABEL) { - st_add(*table, &(instrs[i].data.LabelData.label), &i); - } - } - return table; -} - word encodeDPImmediate(a64inst_instruction inst) { word wrd = 0; @@ -182,11 +165,7 @@ word encodeLoadLiteral(a64inst_instruction cI) { return wrd; } -word *encode(a64inst_instruction insts[], int instCount, st* table) { - // TODO: - // iterate over instructions again, this time replacing labels - // with values from symbol table - // after a line has had all the values replaced, assemble it and append +word *encode(a64inst_instruction insts[], int instCount, symbol_table* table) { word *arr = (word*)malloc(sizeof(word) * instCount); int index = 0; for (int i = 0; i < instCount; i++) { @@ -220,7 +199,7 @@ word *encode(a64inst_instruction insts[], int instCount, st* table) { // Labels are handled in the first pass and used for addressing. break; case a64inst_BRANCH: - arr[index] = encodeBranch(&inst, index); + arr[index] = encodeBranch(&inst); index++; default: break; diff --git a/src/execute.h b/src/execute.h deleted file mode 100644 index 8b691e6..0000000 --- a/src/execute.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __EXECUTE__ -#define __EXECUTE__ -#include "a64instruction/a64instruction.h" -#include "emulator.h" - -void execute(Machine *state, a64inst_instruction *inst); -#endif diff --git a/src/print.c b/src/print.c deleted file mode 100644 index 1d1d4a2..0000000 --- a/src/print.c +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include -#include -#include -#include "print.h" -#include "emulator.h" - -#define UNSET_CONDITION_CODE_CHAR '-' - -// Prints the current machine state into the provided stream -void printState(Machine *state, FILE *stream) { - printRegisters(state, stream); - printMemory(state, stream); -} - -// Prints the current machine registers into the provided stream -void printRegisters(Machine *state, FILE *stream) { - fprintf(stream, "Registers:\n"); - for (int i = 0; i < REGISTER_COUNT; i++) { - fprintf(stream, "X%02d\t= %016" PRIx64 "\n", i, state->registers[i]); - } - fprintf(stream, "PC\t= %016" PRIx64 "\n", state->pc); - fprintf(stream, "PSTATE\t: %c%c%c%c", state->conditionCodes.Negative ? 'N' : UNSET_CONDITION_CODE_CHAR, - state->conditionCodes.Zero ? 'Z' : UNSET_CONDITION_CODE_CHAR, - state->conditionCodes.Carry ? 'C' : UNSET_CONDITION_CODE_CHAR, - state->conditionCodes.Overflow ? 'V' : UNSET_CONDITION_CODE_CHAR); -} - -// Returns the word starting at the provided address -word readWord(byte *memory, uint32_t address) { - word result = 0; - int bytesPerWord = WORD_BITS / BYTE_BITS - 1; - for (int i = 0; i <= bytesPerWord; i++) - result |= (word) memory[address + i] << (BYTE_BITS * i); - return result; -} - -// Returns the double word starting at the provided address -dword readDoubleWord(byte *memory, uint32_t address) { - dword result = 0; - int bytesPerDword = DWORD_BITS / BYTE_BITS - 1; - for (int i = 0; i <= bytesPerDword; i++) - result |= (dword) memory[address + i] << (BYTE_BITS * i); - return result; -} - -// Prints all non-zero memory locations into the provided stream -void printMemory(Machine *state, FILE *stream) { - fprintf(stream, "\nNon-zero memory:\n"); - - // print memory 4 byte aligned - for (int addr = 0; addr < MEMORY_SIZE; addr+= 4) { - word data = readWord(state->memory, addr); - if (data != 0) { - fprintf(stream, "0x%08x: %08x\n", addr, data); - } - } -} diff --git a/src/print.h b/src/print.h deleted file mode 100644 index 404e947..0000000 --- a/src/print.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __PRINT__ -#define __PRINT__ -#include -#include "emulator.h" - -word readWord(byte *memory, uint32_t address); -dword readDoubleWord(byte *memory, uint32_t address); -void printState(Machine *state, FILE *stream); -void printRegisters(Machine *state, FILE *stream); -void printMemory(Machine *state, FILE *stream); - -#endif diff --git a/src/symboltable.c b/src/symboltable.c index a4c99e1..e93c84a 100644 --- a/src/symboltable.c +++ b/src/symboltable.c @@ -1,48 +1,74 @@ #include +#include +#include +#include "symboltable.h" -typedef struct st st; -typedef struct node node; // forward declaration - -typedef struct node { - const void* key; - void* value; - node* prev; - node* next; -} node; - -struct st { - node* head; - node* tail; -}; - -// add new node to the end -void st_add(st table, void* key, void* value) { - node n = {key, value, table.tail}; - if (table.head == NULL) { - table.head = &n; - table.tail = &n; +symbol_table *st_init(void) { + symbol_table *st = malloc(sizeof(symbol_table)); + if (st == NULL) { + fprintf(stderr, "Failed to allocate memory for symbol table\n"); + exit(EXIT_FAILURE); } - else { - (*(table.tail)).next = &n; - table.tail = &n; - } + + st->table = malloc(INITIAL_CAPACITY * sizeof(symbol_table_map)); + if (st->table == NULL) { + fprintf(stderr, "Failed to allocate memory for table\n"); + exit(EXIT_FAILURE); + } + + st->size = 0; + st->capacity = INITIAL_CAPACITY; + + return st; } -void* nodeSearch(node* n, void* key) { - if (n != NULL) { - if ((*n).key == key) { - return (*n).value; - } - else { - return nodeSearch((*n).next, key); +/* Grows the symbol table by a factor of GROWTH_FACTOR *only if the table is full*. + */ +static void grow(symbol_table *st) { + if (st->size == st->capacity) { + st->capacity *= GROWTH_FACTOR; + st->table = realloc(st->table, st->capacity * sizeof(symbol_table_map)); + if (st->table == NULL) { + fprintf(stderr, "Failed to reallocate memory for table\n"); + exit(EXIT_FAILURE); } } - else { - return NULL; - } } -// returns the pointer to key of the specified node, or null, if it does not exist -void* st_search(st table, void* key) { - return nodeSearch(table.head, key); +void st_insert(symbol_table *st, char *label, address addr) { + // If full, grow the table + grow(st); + + // Insert the new entry to the end of the table + symbol_table_map *entry = &st->table[st->size]; + entry->label = label; + entry->address = addr; + + st->size++; +} + +bool st_contains(symbol_table *st, char *label) { + for (int i = 0; i < st->size; i++) { + if (strcmp(st->table[i].label, label) == 0) { + return true; + } + } + + return false; +} + +address st_get(symbol_table *st, char *label) { + for (int i = 0; i < st->size; i++) { + if (strcmp(st->table[i].label, label) == 0) { + return st->table[i].address; + } + } + + fprintf(stderr, "Label %s not found in symbol table\n", label); + exit(EXIT_FAILURE); +} + +void st_free(symbol_table *st) { + free(st->table); + free(st); } diff --git a/src/symboltable.h b/src/symboltable.h new file mode 100644 index 0000000..ba8b21c --- /dev/null +++ b/src/symboltable.h @@ -0,0 +1,27 @@ +#include +#include +#include + +#define INITIAL_CAPACITY 5 +#define GROWTH_FACTOR 2 + +typedef uint32_t address; + +typedef struct { + char *label; + address address; +} symbol_table_map; + +typedef struct { + symbol_table_map* table; + int size; + int capacity; +} symbol_table; + +symbol_table *st_init(void); + +void st_insert(symbol_table *st, char *label, address addr); + +bool st_contains(symbol_table *st, char *label); + +address st_get(symbol_table *st, char *label); diff --git a/src/tokeniser.c b/src/tokeniser.c index 7cec970..6e37d1a 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -1,5 +1,6 @@ // Tokeniser.c #include +#include #include #include #include @@ -9,6 +10,25 @@ #define MAX_OPERAND_COUNT 4 #define OPERAND_DELIMITER ", " +char *trim(char *str) { + while (isspace(*str)) { + str++; + } + + if (*str == '\0') { + return str; + } + + char *end = str + strlen(str) - 1; + while (end > str && isspace(*end)) { + end--; + } + + end[1] = '\0'; + + return str; +} + char **tokenise(char *line, int *numTokens) { char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\ if (!tokens) { @@ -16,6 +36,8 @@ char **tokenise(char *line, int *numTokens) { exit(EXIT_FAILURE); } + line = trim(line); + *numTokens = 0; char *token = strtok(line, " "); assert(token != NULL); @@ -23,7 +45,10 @@ char **tokenise(char *line, int *numTokens) { tokens[(*numTokens)++] = token; char *operandStart = strtok(NULL, ""); - assert(operandStart != NULL); + if (operandStart == NULL) { + // No operands. Return the instruction token. + return tokens; + } bool inBracket = false; char *currentToken = operandStart; @@ -48,11 +73,6 @@ char **tokenise(char *line, int *numTokens) { if (*currentToken != '\0') { tokens[*numTokens] = currentToken; - - if (tokens[*numTokens][strlen(tokens[*numTokens]) - 1] == '\n') { - tokens[*numTokens][strlen(tokens[*numTokens]) - 1] = '\0'; - } - (*numTokens)++; } From 5bb68d95e1c42767b974b9a430d2656dd5d2e6c1 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 20:09:14 +0100 Subject: [PATCH 099/113] adjust branch struct to hold label data --- src/a64instruction/a64instruction_Branch.h | 2 ++ src/parser.c | 38 +++++++++------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/a64instruction/a64instruction_Branch.h b/src/a64instruction/a64instruction_Branch.h index 2e525bb..b8a23d5 100644 --- a/src/a64instruction/a64instruction_Branch.h +++ b/src/a64instruction/a64instruction_Branch.h @@ -10,6 +10,7 @@ typedef enum { typedef struct { word unconditionalOffset; + char* label; } a64inst_Branch_UnconditionalData; typedef struct { @@ -29,6 +30,7 @@ typedef enum { typedef struct { a64inst_ConditionType cond; word offset; + char* label; } a64inst_Branch_ConditionalData; typedef struct { diff --git a/src/parser.c b/src/parser.c index 4f2f71c..395d18c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -88,26 +88,6 @@ int isOperandRegister(char regStartChar) { return((regStartChar == 'x') || (regStartChar == 'w')); } -//takes inputted char array and returns the integer of the operand, skipping the first character -//e.g. for a passed "R32", it skips the 'R' and returns 32 -int getOperandNumber(char *operand){ - char operandCpy[strlen(operand)]; - strcpy(operandCpy, operand+1); - char **endptr = NULL; - int number; - if((strncmp(operandCpy, "0x", 2)==0)) { - //hex value - strcpy(operandCpy, operand+3); - number = strtol(operandCpy, endptr, 16); - } else if(operandCpy[0]=='x'){ - strcpy(operandCpy, operand+2); - number = strtol(operandCpy, endptr, 16); - } else { - number = strtol(operandCpy, endptr, 10); - } - return number; -} - int classifyDPInst(char *operandList[]){ return(isOperandRegister(operandList[1][0]) && isOperandRegister(operandList[2][0]) && @@ -208,7 +188,6 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; - instr->data.DirectiveData.value = getOperandNumber(tokens[1]); } else if(opcode[strlen(opcode)-1]== ':') { // Label @@ -251,7 +230,22 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { } - +//takes inputted char array and returns the integer of the operand, skipping the first character +//e.g. for a passed "R32", it skips the 'R' and returns 32 +int getOperandNumber(char *operand){ + char operandCpy[strlen(operand)]; + strcpy(operandCpy, operand+1); + char **endptr = NULL; + int number; + if(strncmp(operandCpy, "0x", 2)==0){ + //hex value + strcpy(operandCpy, operand+3); + number = strtol(operandCpy, endptr, 16); + } else { + number = strtol(operandCpy, endptr, 10); + } + return number; +} void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { From 004ba24b5a89852c4454312e91596cd245c6dd2e Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 20:18:35 +0100 Subject: [PATCH 100/113] parse branch labels and modify load literal struct --- src/a64instruction/a64instruction_SingleTransfer.h | 1 + src/encode.c | 3 +++ src/parser.c | 5 ++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/a64instruction/a64instruction_SingleTransfer.h b/src/a64instruction/a64instruction_SingleTransfer.h index cbd7529..a8b134e 100644 --- a/src/a64instruction/a64instruction_SingleTransfer.h +++ b/src/a64instruction/a64instruction_SingleTransfer.h @@ -34,6 +34,7 @@ typedef struct { typedef struct { uint32_t offset; + char* label; } a64inst_LoadLiteralData; typedef struct { diff --git a/src/encode.c b/src/encode.c index 847585c..1f7ff7a 100644 --- a/src/encode.c +++ b/src/encode.c @@ -25,10 +25,13 @@ void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { *wrd |= (value << lsb) & mask; } +/* static int getLabelOffset(symbol_table* table, char* label, int currentIndex) { address target = st_get(table, label); return target - (currentIndex * 4); } +*/ + // Generates assembled code based on the two-pass assembly method word encodeBranch(a64inst_instruction *instr) { diff --git a/src/parser.c b/src/parser.c index 395d18c..14b4f62 100644 --- a/src/parser.c +++ b/src/parser.c @@ -322,6 +322,7 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) //define and sign extend immediate offset //use symbol table printf("unconditional"); + instr->data.BranchData.processOpData.unconditionalData.label = operandList[1]; break; case a64inst_REGISTER: instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[1]); @@ -345,8 +346,10 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) } else if (strcmp(condition, "al")==0){ instr->data.BranchData.processOpData.conditionalData.cond = AL; } + instr->data.BranchData.processOpData.unconditionalData.label = operandList[1]; + break; - //calculate offset from symbol table. + } } } From 4139d7a083f784dddf4f3ea13b4edb5fc825fd94 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 20:19:49 +0100 Subject: [PATCH 101/113] parse loadliteral into modified struct --- src/parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.c b/src/parser.c index 14b4f62..130795f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -307,6 +307,7 @@ void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[ int offset = getOperandNumber(tokens[1]); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { + instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[1]; //offset is literal, use symbol table and calculate difference } break; From 37a9cf0400aa2cc6615221ad0549bd10a21c65d8 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 20:20:54 +0100 Subject: [PATCH 102/113] Update encode to handle branches with sign extend --- src/encode.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/encode.c b/src/encode.c index 1f7ff7a..2f7cc0f 100644 --- a/src/encode.c +++ b/src/encode.c @@ -25,22 +25,41 @@ void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { *wrd |= (value << lsb) & mask; } -/* -static int getLabelOffset(symbol_table* table, char* label, int currentIndex) { - address target = st_get(table, label); - return target - (currentIndex * 4); -} -*/ +// Sign extend a given value to a 64-bit signed integer given the number of bits +int64_t signExtend(dword value, unsigned int n) { + if (n == 0 || n >= 64) { + // If n_bits is 0 or greater than or equal to 64, return the value as is + return (int64_t)value; + } + + uint64_t sign_bit_mask = (uint64_t)1 << (n - 1); + + // Mask to isolate the n-bit value + uint64_t n_bit_mask = (sign_bit_mask << 1) - 1; + // Check if the sign bit is set + if (value & sign_bit_mask) { + // Sign bit is set, extend the sign + return (int64_t)(value | ~n_bit_mask); + } else { + // Sign bit is not set, return the value as is + return (int64_t)(value & n_bit_mask); + } +} + +static int getLabelOffset(symbol_table* table, char* label, int currentIndex, int n_bits) { + address target = st_get(table, label); + return signExtend((unsigned int) (target - (currentIndex * 4)), n_bits); +} // Generates assembled code based on the two-pass assembly method -word encodeBranch(a64inst_instruction *instr) { +word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) { word wrd = 0; switch (instr->data.BranchData.BranchType) { case a64inst_UNCONDITIONAL: - setBits(&wrd, 26, 30, 0x5); - setBits(&wrd, 0, 25, instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset); + setBits(&wrd, 26, 29, 0x5); + setBits(&wrd, 0, 25, getLabelOffset(st, instr->data.BranchData.processOpData.unconditionalData.label, index, 26)); break; case a64inst_REGISTER: @@ -50,7 +69,7 @@ word encodeBranch(a64inst_instruction *instr) { case a64inst_CONDITIONAL: setBits(&wrd, 26, 32, 0x15); - setBits(&wrd, 5, 24, instr->data.BranchData.processOpData.conditionalData.offset); + setBits(&wrd, 5, 24, getLabelOffset(st, instr->data.BranchData.processOpData.conditionalData.label, index, 19)); setBits(&wrd, 0, 4, instr->data.BranchData.processOpData.conditionalData.cond); break; } @@ -168,7 +187,7 @@ word encodeLoadLiteral(a64inst_instruction cI) { return wrd; } -word *encode(a64inst_instruction insts[], int instCount, symbol_table* table) { +word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) { word *arr = (word*)malloc(sizeof(word) * instCount); int index = 0; for (int i = 0; i < instCount; i++) { @@ -202,7 +221,7 @@ word *encode(a64inst_instruction insts[], int instCount, symbol_table* table) { // Labels are handled in the first pass and used for addressing. break; case a64inst_BRANCH: - arr[index] = encodeBranch(&inst); + arr[index] = encodeBranch(&inst, index, st); index++; default: break; From 3b3cda2d26d6defa0a13ca6d6b709a372e7fe678 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 20:31:08 +0100 Subject: [PATCH 103/113] Fix branch handling, not to multiply by 4 --- src/assemble.c | 2 +- src/encode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/assemble.c b/src/assemble.c index c6b19fd..59a22d2 100644 --- a/src/assemble.c +++ b/src/assemble.c @@ -43,7 +43,7 @@ static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) for (int i = 0; i < lineCount; i++) { a64inst_instruction inst = instructions[i]; if (inst.type == a64inst_LABEL) { - st_insert(table, inst.data.LabelData.label, 4 * (i - (labelCount++))); + st_insert(table, inst.data.LabelData.label, (i - (labelCount++))); } } diff --git a/src/encode.c b/src/encode.c index 2f7cc0f..13fdf17 100644 --- a/src/encode.c +++ b/src/encode.c @@ -49,7 +49,7 @@ int64_t signExtend(dword value, unsigned int n) { static int getLabelOffset(symbol_table* table, char* label, int currentIndex, int n_bits) { address target = st_get(table, label); - return signExtend((unsigned int) (target - (currentIndex * 4)), n_bits); + return signExtend((unsigned int) (target - currentIndex), n_bits); } // Generates assembled code based on the two-pass assembly method From 19b98a3c5127f807b89bf9c5ba75f074a1217b59 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 20:34:44 +0100 Subject: [PATCH 104/113] handle directive parsing --- src/parser.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/parser.c b/src/parser.c index 130795f..08d85fc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -169,6 +169,25 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], in } +//takes inputted char array and returns the integer of the operand, skipping the first character +//e.g. for a passed "R32", it skips the 'R' and returns 32 +int getOperandNumber(char *operand){ + char operandCpy[strlen(operand)]; + strcpy(operandCpy, operand+1); + char **endptr = NULL; + int number; + if(strncmp(operandCpy, "0x", 2)==0){ + //hex value + strcpy(operandCpy, operand+3); + number = strtol(operandCpy, endptr, 16); + } else if(operandCpy[0] == 'x'){ + number = strtol(operandCpy+1, endptr, 16); + } else { + number = strtol(operandCpy, endptr, 10); + } + return number; +} + void parse_instruction(char asmLine[], a64inst_instruction *instr) { if (instr == NULL){ @@ -188,6 +207,7 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; + instr->data.DirectiveData.value = getOperandNumber(tokens[1]); } else if(opcode[strlen(opcode)-1]== ':') { // Label @@ -230,22 +250,6 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { } -//takes inputted char array and returns the integer of the operand, skipping the first character -//e.g. for a passed "R32", it skips the 'R' and returns 32 -int getOperandNumber(char *operand){ - char operandCpy[strlen(operand)]; - strcpy(operandCpy, operand+1); - char **endptr = NULL; - int number; - if(strncmp(operandCpy, "0x", 2)==0){ - //hex value - strcpy(operandCpy, operand+3); - number = strtol(operandCpy, endptr, 16); - } else { - number = strtol(operandCpy, endptr, 10); - } - return number; -} void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { From 3838227a09d5e6b6b7eb56b48734c879e99f03b4 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 20:45:07 +0100 Subject: [PATCH 105/113] fix ldr parsing --- src/encode.c | 8 +++++--- src/parser.c | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/encode.c b/src/encode.c index 13fdf17..46b84c2 100644 --- a/src/encode.c +++ b/src/encode.c @@ -175,13 +175,15 @@ word encodeSingleDataTransfer(a64inst_instruction inst) { return wrd; } -word encodeLoadLiteral(a64inst_instruction cI) { +word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) { word wrd = 0; a64inst_SingleTransferData data = cI.data.SingleTransferData; setBits(&wrd, 24, 32, 0x18); setBits(&wrd, 30, 31, data.regType); - setBits(&wrd, 5, 24, data.processOpData.loadLiteralData.offset); + char *label = data.processOpData.loadLiteralData.label; + int offset = getLabelOffset(st, label, arrIndex, 19); + setBits(&wrd, 5, 24, offset); setBits(&wrd, 0, 5, data.target); return wrd; @@ -206,7 +208,7 @@ word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) { index++; break; case a64inst_LOADLITERAL: - arr[index] = encodeLoadLiteral(inst); + arr[index] = encodeLoadLiteral(inst, index, st); index++; break; case a64inst_DIRECTIVE: diff --git a/src/parser.c b/src/parser.c index 08d85fc..4cb939c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -275,7 +275,7 @@ void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tok instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); } - } else if(unsplitString[strlen(unsplitString)-1] == ']') { + } else if(unsplitString[strlen(unsplitString)-1] == ']' && operandCount == 2) { // POST_INDEXED instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); @@ -311,7 +311,7 @@ void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[ int offset = getOperandNumber(tokens[1]); instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; } else { - instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[1]; + instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2]; //offset is literal, use symbol table and calculate difference } break; From 58371a4fa4d8ce7b9e7404c410f76c9292d25596 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 20:46:35 +0100 Subject: [PATCH 106/113] Update parser to correctly handle the zero register --- src/parser.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/parser.c b/src/parser.c index 4cb939c..d518e0c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -25,6 +25,7 @@ static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"}; +static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"}; static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"}; static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"}; @@ -172,6 +173,10 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], in //takes inputted char array and returns the integer of the operand, skipping the first character //e.g. for a passed "R32", it skips the 'R' and returns 32 int getOperandNumber(char *operand){ + if (isStringIn(operand, ZERO_REGISTER_ALIAS, 2)) { + return ZERO_REGISTER; + } + char operandCpy[strlen(operand)]; strcpy(operandCpy, operand+1); char **endptr = NULL; From f41c7cc749adaf0685731638d3fb230f6abd5f1c Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 20:51:08 +0100 Subject: [PATCH 107/113] rewrite address format logic --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index d518e0c..daa2519 100644 --- a/src/parser.c +++ b/src/parser.c @@ -280,7 +280,7 @@ void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tok instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); } - } else if(unsplitString[strlen(unsplitString)-1] == ']' && operandCount == 2) { + } else if(unsplitString[strlen(unsplitString)-1] == ']') { // POST_INDEXED instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); From ffa798b2663d944ae9a9e2dcae4d33807c2e45e8 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 21:01:51 +0100 Subject: [PATCH 108/113] Update parser to correctly classify branch types --- src/parser.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/parser.c b/src/parser.c index daa2519..2ffc104 100644 --- a/src/parser.c +++ b/src/parser.c @@ -269,30 +269,29 @@ void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tok instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; - if(unsplitString[strlen(unsplitString)-1] == '!') { - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); - - } else if(operandCount==2) { - if((isOperandRegister(*operands[0]) == 1) - && (isOperandRegister(*operands[1]) == 1)){ - //register - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); - } - } else if(unsplitString[strlen(unsplitString)-1] == ']') { - // POST_INDEXED + if (tokenCount >= 4) { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); - } else { + } else if(unsplitString[strlen(unsplitString)-1] == '!') { + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); + + } else if (operandCount == 1 || (!isOperandRegister(*operands[1]))) { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(operandCount > 1){ int offset = getOperandNumber(operands[1]); instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; //NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER } - } + } else { + if((isOperandRegister(*operands[0]) == 1) + && (isOperandRegister(*operands[1]) == 1)){ + //register + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); + } + } } static int parseRegisterType(char *operand) { From 8bea0d6a6dbf8ef2716327b9772357c007a569dd Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 21:15:19 +0100 Subject: [PATCH 109/113] fix directive operand parsing --- src/parser.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index 2ffc104..b2ed421 100644 --- a/src/parser.c +++ b/src/parser.c @@ -212,8 +212,15 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; - instr->data.DirectiveData.value = getOperandNumber(tokens[1]); - + char *intValue = tokens[1]; + char *endptr; + if(strncmp(intValue, "0x", 2) == 0) { + intValue += 2; + instr->data.DirectiveData.value = strtol(intValue, &endptr, 16); + } else { + instr->data.DirectiveData.value = strtol(tokens[1], &endptr, 10); + } + } else if(opcode[strlen(opcode)-1]== ':') { // Label instr->type = a64inst_LABEL; From e5b3f19bec68abe1cd9b2a9ec536eab5cac70a65 Mon Sep 17 00:00:00 2001 From: EDiasAlberto Date: Fri, 14 Jun 2024 21:17:26 +0100 Subject: [PATCH 110/113] restructure directive parsing into independent function --- src/parser.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/parser.c b/src/parser.c index b2ed421..03db487 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); void calculateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); +void parseDirective(a64inst_instruction *inst, char *tokens[]); /** Constants */ static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; @@ -212,14 +213,8 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; - char *intValue = tokens[1]; - char *endptr; - if(strncmp(intValue, "0x", 2) == 0) { - intValue += 2; - instr->data.DirectiveData.value = strtol(intValue, &endptr, 16); - } else { - instr->data.DirectiveData.value = strtol(tokens[1], &endptr, 10); - } + parseDirective(instr, tokens); + } else if(opcode[strlen(opcode)-1]== ':') { // Label @@ -305,6 +300,18 @@ static int parseRegisterType(char *operand) { return operand[0] == 'x'; } +void parseDirective(a64inst_instruction *instr, char *tokens[]) { + char *intValue = tokens[1]; + char *endptr; + if(strncmp(intValue, "0x", 2) == 0) { + intValue += 2; + instr->data.DirectiveData.value = strtol(intValue, &endptr, 16); + } else { + instr->data.DirectiveData.value = strtol(tokens[1], &endptr, 10); + } +} + + void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { switch(instr->type){ From 3501ac93aaf0e99f297290c53b43bd834b333585 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Fri, 14 Jun 2024 21:22:11 +0100 Subject: [PATCH 111/113] Fix Bug in handling zero register in aliases --- src/parser.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 03db487..2985b4c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -104,8 +104,10 @@ void classifyAlias(char *opcode, a64inst_instruction *instr, char *tokens[], int char *opcode = ALIAS_TARGET_OPCODES[aliasIndex]; // To correctly encode the zero register, which is either w31 or x31. + char *start_zeroReg = tokens[1]; + while (isspace(*start_zeroReg)) start_zeroReg++; char *zeroReg = malloc(5 * sizeof(char)); - *zeroReg = *tokens[1]; + *zeroReg = *start_zeroReg; strcat(zeroReg, "31"); switch(aliasIndex) { From 6de1915dbefbeb8aefd7db8c76a961bc5d8228b9 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Sat, 15 Jun 2024 01:50:56 +0100 Subject: [PATCH 112/113] Restructure overall assembler. Add string_util and Docs --- src/Makefile | 2 +- src/assemble.c | 12 +- src/encode.c | 21 ++- src/parser.c | 447 ++++++++++++++++++++-------------------------- src/parser.h | 17 +- src/string_util.c | 173 ++++++++++++++++++ src/string_util.h | 64 +++++++ src/symboltable.c | 8 + src/symboltable.h | 49 ++++- src/tokeniser.c | 101 ++++------- src/tokeniser.h | 26 +++ 11 files changed, 587 insertions(+), 333 deletions(-) create mode 100644 src/string_util.c create mode 100644 src/string_util.h create mode 100644 src/tokeniser.h diff --git a/src/Makefile b/src/Makefile index 071143f..150b23c 100755 --- a/src/Makefile +++ b/src/Makefile @@ -9,7 +9,7 @@ CFLAGS ?= -std=c17 -g\ all: assemble -assemble: assemble.o parser.o fileio.o +assemble: assemble.o parser.o fileio.o tokeniser.o string_util.o emulate: emulate.o clean: diff --git a/src/assemble.c b/src/assemble.c index 59a22d2..9b2484c 100644 --- a/src/assemble.c +++ b/src/assemble.c @@ -1,3 +1,9 @@ +/** @file assemble.c + * @brief The main file for the ARMv8 assembler. Reads an assembly file and outputs the binary file. + * + * @author Saleh Bubshait + */ + #include #include #include "a64instruction/a64instruction.h" @@ -31,11 +37,13 @@ int main(int argc, char **argv) { // Write the binary to the output file writeBinaryFile(binary, argv[2], lineCount); - /* TODO: FREE MEMORY!! */ - return EXIT_SUCCESS; } +/** The first pass of the assembler. Creates the symbol table. Adds all labels + * and the address of the instruction following the label to the symbol table. + * Returns the final symbol table. + */ static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) { symbol_table *table = st_init(); int labelCount = 0; diff --git a/src/encode.c b/src/encode.c index 46b84c2..bdb89f4 100644 --- a/src/encode.c +++ b/src/encode.c @@ -1,3 +1,12 @@ +/** @file encode.c + * @brief A function to encode the internal representation of ARMv8 + * instructions, a64inst_instruction, into binary. + * + * @author Ethan Dias Alberto + * @author George Niedringhaus + * @author Saleh Bubshait + */ + #include #include "global.h" #include "a64instruction/a64instruction.h" @@ -53,7 +62,7 @@ static int getLabelOffset(symbol_table* table, char* label, int currentIndex, in } // Generates assembled code based on the two-pass assembly method -word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) { +static word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) { word wrd = 0; switch (instr->data.BranchData.BranchType) { @@ -77,7 +86,7 @@ word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) { return wrd; } -word encodeDPImmediate(a64inst_instruction inst) { +static word encodeDPImmediate(a64inst_instruction inst) { word wrd = 0; a64inst_DPImmediateData data = inst.data.DPImmediateData; @@ -104,7 +113,7 @@ word encodeDPImmediate(a64inst_instruction inst) { return wrd; } -word encodeDPRegister(a64inst_instruction inst) { +static word encodeDPRegister(a64inst_instruction inst) { word wrd = 0; a64inst_DPRegisterData data = inst.data.DPRegisterData; @@ -139,7 +148,7 @@ word encodeDPRegister(a64inst_instruction inst) { } -word encodeSingleDataTransfer(a64inst_instruction inst) { +static word encodeSingleDataTransfer(a64inst_instruction inst) { word wrd = 0; a64inst_SingleTransferData data = inst.data.SingleTransferData; @@ -175,7 +184,7 @@ word encodeSingleDataTransfer(a64inst_instruction inst) { return wrd; } -word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) { +static word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) { word wrd = 0; a64inst_SingleTransferData data = cI.data.SingleTransferData; @@ -189,7 +198,7 @@ word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) { return wrd; } -word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) { +static word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) { word *arr = (word*)malloc(sizeof(word) * instCount); int index = 0; for (int i = 0; i < instCount; i++) { diff --git a/src/parser.c b/src/parser.c index 2985b4c..474652e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,24 +1,50 @@ +/** @file parser.c + * @brief Functions to parse ARMv8 assembly lines into an array of a special + * internal representation of instructions, a64inst_instruction. + * @author Ethan Dias Alberto + * @author George Niedringhaus + * @author Saleh Bubshait + */ + #include #include #include #include +#include #include #include "parser.h" #include "a64instruction/a64instruction.h" #include "global.h" -#include "tokeniser.c" +#include "tokeniser.h" +#include "string_util.h" -/** Prototypes */ -void parse_instruction(char asmLine[], a64inst_instruction *instr); -static char *duplicateString(char *str); -void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); -void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); -void calculateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands); -void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); -void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); -void parseDirective(a64inst_instruction *inst, char *tokens[]); +/************************************ + * STRUCTS + ************************************/ + +typedef struct { + int type; + int immediate; +} ShiftData; + +/************************************ + * PROTOTYPES + ************************************/ + +static void parse_instruction(char asmLine[], a64inst_instruction *instr); +static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); +static void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); +static void parseAddressingMode(a64inst_instruction *instr, char *operandList[], int numOperands); +static void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); +static void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); +static void parseDirective(a64inst_instruction *inst, char *tokens[]); +static ShiftData *parseShift(char *shift); +static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount); + +/************************************ + * CONSTANTS + ************************************/ -/** Constants */ static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"}; static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; @@ -26,9 +52,11 @@ static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"}; -static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"}; -static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"}; -static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"}; + + +/************************************ + * FUNCTIONS + ************************************/ a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); @@ -38,180 +66,33 @@ a64inst_instruction *parse(char **asmLines, int lineCount) { parse_instruction(asmLines[i], &instructions[i]); i++; } - + return instructions; } -static char *duplicateString(char *str) { - char *newStr = malloc(strlen(str) + 1); - strcpy(newStr, str); - return newStr; -} - -static bool isStringIn(char *str, const char *arr[], int arrSize) { - for (int i = 0; i < arrSize; i++) { - if (strcmp(str, arr[i]) == 0) { - return true; - } - } - return false; -} - -// If more than one occurance, return the last index -static int indexStringIn(char *str, const char *arr[], int arrSize) { - for (int i = arrSize - 1; i >= 0; i--) { - if (strcmp(str, arr[i]) == 0) { - return i; - } - } - return -1; -} - -typedef struct { - int type; - int immediate; -} ShiftData; - -static ShiftData *parseShift(char *shift) { - char buffer[100]; - strcpy(buffer, shift); - char *shiftType = strtok(buffer, " "); - char *shiftAmount = strtok(NULL, " "); - ShiftData *data = malloc(sizeof(ShiftData)); - data->type = indexStringIn(shiftType, SHIFT_TYPE_OPCODES, 4); - while (*shiftAmount == ' ' || *shiftAmount == '#') { - shiftAmount++; - } - data->immediate = atoi(shiftAmount); - return data; -} - -int isOperandRegister(char regStartChar) { - return((regStartChar == 'x') || (regStartChar == 'w')); -} - -int classifyDPInst(char *operandList[]){ - return(isOperandRegister(operandList[1][0]) && - isOperandRegister(operandList[2][0]) && - isOperandRegister(operandList[3][0])); -} - -void classifyAlias(char *opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) { - - int aliasIndex = indexStringIn(opcode, ALIAS_OPCODES, 9); - if (aliasIndex != -1) { - // The instruction is one of the aliases, convert into the target. - char *opcode = ALIAS_TARGET_OPCODES[aliasIndex]; - - // To correctly encode the zero register, which is either w31 or x31. - char *start_zeroReg = tokens[1]; - while (isspace(*start_zeroReg)) start_zeroReg++; - char *zeroReg = malloc(5 * sizeof(char)); - *zeroReg = *start_zeroReg; - strcat(zeroReg, "31"); - - switch(aliasIndex) { - case 0: // cmp -> subs rzr, rn, - case 1: // cmn -> adds rzr, rn, - case 4: // tst -> ands rzr, rn, - // Convert from [instr] REG, to [instr] RZR, REG, - tokens[0] = opcode; - tokens[4] = tokens[3]; - tokens[3] = tokens[2]; - tokens[2] = tokens[1]; - tokens[1] = zeroReg; - (*tokensCount)++; - break; - - case 2: // neg -> subs rd, rzr, - case 3: // negs -> subs rd, rzr, - case 5: // mvn -> orn rd, rzr, - case 6: // mov -> orr rd, rzr, rm - tokens[0] = opcode; - tokens[4] = tokens[3]; - tokens[3] = tokens[2]; - tokens[2] = zeroReg; - (*tokensCount)++; - break; - - default: - break; - } - - } -} - -void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount){ - - classifyAlias(opcode, instr, tokens, tokensCount); - - if (isStringIn(opcode, BRANCH_OPCODES, 9)) { - instr->type = a64inst_BRANCH; - - if (strcmp(opcode, "br") == 0) { - instr->data.BranchData.BranchType = a64inst_REGISTER; - } else if (strcmp(opcode, "b") == 0) { - instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; - } else { - instr->data.BranchData.BranchType = a64inst_CONDITIONAL; - } - - } else if (isStringIn(opcode, SINGLE_TRANSFER_OPCODES, 2)) { - instr->type = a64inst_SINGLETRANSFER; - if (*tokens[2] == '[') { - instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; - instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0; - - } else { - instr->type = a64inst_LOADLITERAL; - } - } else if (classifyDPInst(tokens)) { - instr->type = a64inst_DPREGISTER; - } else { - instr->type = a64inst_DPIMMEDIATE; - } - -} - -//takes inputted char array and returns the integer of the operand, skipping the first character -//e.g. for a passed "R32", it skips the 'R' and returns 32 -int getOperandNumber(char *operand){ - if (isStringIn(operand, ZERO_REGISTER_ALIAS, 2)) { - return ZERO_REGISTER; - } - - char operandCpy[strlen(operand)]; - strcpy(operandCpy, operand+1); - char **endptr = NULL; - int number; - if(strncmp(operandCpy, "0x", 2)==0){ - //hex value - strcpy(operandCpy, operand+3); - number = strtol(operandCpy, endptr, 16); - } else if(operandCpy[0] == 'x'){ - number = strtol(operandCpy+1, endptr, 16); - } else { - number = strtol(operandCpy, endptr, 10); - } - return number; -} - - -void parse_instruction(char asmLine[], a64inst_instruction *instr) { +/** Parses a single ARMv8 assembly line into an a64inst_instruction. + */ +static void parse_instruction(char asmLine[], a64inst_instruction *instr) { if (instr == NULL){ exit(EXIT_FAILURE); } - if(strcmp(asmLine, HALT_ASM_CMD) == 0){ - instr->type = a64inst_HALT; - return; - } - char *asmLineCopy = duplicateString(asmLine); int tokensCount = 0; char **tokens = tokenise(asmLineCopy, &tokensCount); char *opcode = tokens[0]; + // Check if the instruction is the halt instruction, "and x0, x0, x0". + if (tokensCount == 4 && strcmp(opcode, "and") == 0 + && getRegister(tokens[1]) == 0 + && getRegister(tokens[2]) == 0 + && getRegister(tokens[3]) == 0) { + + instr->type = a64inst_HALT; + return; + } + + if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; @@ -226,6 +107,8 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { } else { // Instruction + + // Classify the opcode into the correct instruction type. classifyOpcode(opcode, instr, tokens, &tokensCount); switch(instr->type){ @@ -235,74 +118,32 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) { case a64inst_SINGLETRANSFER: parseSingleTransfer(instr, opcode, tokens, tokensCount); - calculateAddressFormat(instr, tokens, tokensCount); + parseAddressingMode(instr, tokens, tokensCount); break; + case a64inst_LOADLITERAL: parseSingleTransfer(instr, opcode, tokens, tokensCount); break; + case a64inst_DPREGISTER: //generate DP operands; parseDPRegister(instr, tokens, tokensCount); break; + case a64inst_DPIMMEDIATE: parseDPImmediate(instr, tokens, tokensCount); break; + default: - printf("Error: Invalid Instruction\n"); + printf("Error: Invalid Instruction, '%s'\n", opcode); break; + } } - - /* TODO: FREE MEMORY! */ - } - - - -void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) { - assert(*tokens[2] == '['); - - int operandCount = 0; - char unsplitString[strlen(tokens[2])]; - strcpy(unsplitString, tokens[2]); - char **operands = tokeniseOperands(tokens[2], &operandCount); - - int baseRegister = getOperandNumber(operands[0]); - - instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; - - if (tokenCount >= 4) { - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]); - - } else if(unsplitString[strlen(unsplitString)-1] == '!') { - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]); - - } else if (operandCount == 1 || (!isOperandRegister(*operands[1]))) { - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; - if(operandCount > 1){ - int offset = getOperandNumber(operands[1]); - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; - //NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER - } - } else { - if((isOperandRegister(*operands[0]) == 1) - && (isOperandRegister(*operands[1]) == 1)){ - //register - instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; - instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]); - } - } -} - -static int parseRegisterType(char *operand) { - return operand[0] == 'x'; -} - -void parseDirective(a64inst_instruction *instr, char *tokens[]) { +static void parseDirective(a64inst_instruction *instr, char *tokens[]) { char *intValue = tokens[1]; char *endptr; if(strncmp(intValue, "0x", 2) == 0) { @@ -314,27 +155,28 @@ void parseDirective(a64inst_instruction *instr, char *tokens[]) { } -void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { +static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { switch(instr->type){ case a64inst_SINGLETRANSFER: - instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); - instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); + instr->data.SingleTransferData.regType = getRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getRegister(tokens[1]); break; case a64inst_LOADLITERAL: - instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]); - instr->data.SingleTransferData.target = getOperandNumber(tokens[1]); + instr->data.SingleTransferData.regType = getRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getRegister(tokens[1]); if(*tokens[2] =='#'){ //offset is immediate - int offset = getOperandNumber(tokens[1]); - instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset; + instr->data.SingleTransferData.processOpData.loadLiteralData.offset = getImmediate(tokens[2]);; } else { + //offset is label instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2]; - //offset is literal, use symbol table and calculate difference } + break; + default: break; @@ -350,7 +192,7 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) instr->data.BranchData.processOpData.unconditionalData.label = operandList[1]; break; case a64inst_REGISTER: - instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[1]); + instr->data.BranchData.processOpData.registerData.src = getRegister(operandList[1]); break; case a64inst_CONDITIONAL: { @@ -381,13 +223,13 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) { a64inst_DPImmediateData *data = &inst->data.DPImmediateData; - data->dest = getOperandNumber(tokens[1]); - data->regType = parseRegisterType(tokens[1]); + data->dest = getRegister(tokens[1]); + data->regType = getRegisterType(tokens[1]); - if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 4)) { + if (containsString(tokens[0], WIDE_MOV_OPCODES, 4)) { data->DPIOpType = a64inst_DPI_WIDEMOV; - data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 4); - data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]); + data->processOp = lastIndexOfString(tokens[0], WIDE_MOV_OPCODES, 4); + data->processOpData.wideMovData.immediate = getImmediate(tokens[2]); if (tokensCount >= 4) { ShiftData shData = *parseShift(tokens[3]); data->processOpData.wideMovData.shiftScalar = shData.immediate; @@ -395,9 +237,9 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount } else { data->DPIOpType = a64inst_DPI_ARITHM; - data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); - data->processOpData.arithmData.src = getOperandNumber(tokens[2]); - data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]); + data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOpData.arithmData.src = getRegister(tokens[2]); + data->processOpData.arithmData.immediate = getImmediate(tokens[3]); if (tokensCount >= 5) { ShiftData shData = *parseShift(tokens[4]); @@ -411,16 +253,16 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) { a64inst_DPRegisterData *data = &inst->data.DPRegisterData; - data->dest = getOperandNumber(tokens[1]); - data->regType = parseRegisterType(tokens[1]); - data->src1 = getOperandNumber(tokens[2]); - data->src2 = getOperandNumber(tokens[3]); + data->dest = getRegister(tokens[1]); + data->regType = getRegisterType(tokens[1]); + data->src1 = getRegister(tokens[2]); + data->src2 = getRegister(tokens[3]); - if (isStringIn(tokens[0], MULTIPLY_OPCODES, 4)) { + if (containsString(tokens[0], MULTIPLY_OPCODES, 4)) { // Multiply data->DPROpType = a64inst_DPR_MULTIPLY; if (tokensCount >= 5) { - data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]); + data->processOpData.multiplydata.summand = getRegister(tokens[4]); data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0; } else { @@ -432,21 +274,21 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) // Arithmetic/Logic data->DPROpType = a64inst_DPR_ARITHMLOGIC; - if (isStringIn(tokens[0], ARITHMETIC_OPCODES, 4)) { + if (containsString(tokens[0], ARITHMETIC_OPCODES, 4)) { // Arithmetic - data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4); data->processOpData.arithmLogicData.type = 1; if(tokensCount == 5) { //has a shift int numTokens = 0; char **shiftOperands = tokenise(tokens[4], &numTokens); - data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); - data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]); + data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); + data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]); } } else { // Logic - int opcodeCategory = indexStringIn(tokens[0], LOGIC_OPCODES, 8); + int opcodeCategory = lastIndexOfString(tokens[0], LOGIC_OPCODES, 8); switch(opcodeCategory/2){ case 0: //and @@ -489,9 +331,102 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) //has a shift int numTokens = 0; char **shiftOperands = tokenise(tokens[4], &numTokens); - data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); - data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]); + data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); + data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]); } } } } + +/** Classifies the given opcode into the correct instruction type. + * Modifies instr to reflect the classification. + */ +static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) { + + // First, if the opcode is an alias, convert it to the target instruction. + translateAlias(opcode, tokens, tokensCount); + + if (containsString(opcode, BRANCH_OPCODES, 9)) { + instr->type = a64inst_BRANCH; + + if (strcmp(opcode, "br") == 0) { + instr->data.BranchData.BranchType = a64inst_REGISTER; + } else if (strcmp(opcode, "b") == 0) { + instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; + } else { + instr->data.BranchData.BranchType = a64inst_CONDITIONAL; + } + + } else if (containsString(opcode, SINGLE_TRANSFER_OPCODES, 2)) { + instr->type = a64inst_SINGLETRANSFER; + if (*tokens[2] == '[') { + instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0; + + } else { + instr->type = a64inst_LOADLITERAL; + } + + // DP Instruction. + // DP Register if the third operand is a register. + } else if (*tokensCount >= 4 && isRegister(tokens[3])) { + instr->type = a64inst_DPREGISTER; + } else { + instr->type = a64inst_DPIMMEDIATE; + } + +} + +/** Parses a shift string into a ShiftData struct. + */ +static ShiftData *parseShift(char *shift) { + char buffer[20]; + strcpy(buffer, shift); + + char *shiftType = strtok(buffer, " "); + char *shiftAmount = strtok(NULL, " "); + + ShiftData *data = malloc(sizeof(ShiftData)); + + data->type = lastIndexOfString(shiftType, SHIFT_TYPE_OPCODES, 4); + + SKIP_WHITESPACE(shiftAmount); + data->immediate = getImmediate(shiftAmount); + return data; +} + +/** Parses the addressing mode of a single transfer instruction. (Not load literal) + */ +static void parseAddressingMode(a64inst_instruction *instr, char *tokens[], int tokenCount) { + assert(*tokens[2] == '['); + + int operandCount = 0; + char *unsplitString = duplicateString(tokens[2]); + char **operands = tokeniseOperands(tokens[2], &operandCount); + + int baseRegister = getRegister(operands[0]); + + instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; + + if (tokenCount >= 4) { + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(tokens[3]); + + } else if(unsplitString[strlen(unsplitString)-1] == '!') { + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(operands[1]); + + } else if (operandCount == 1 || (!isRegister(operands[1]))) { + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; + if(operandCount > 1){ + int offset = getImmediate(operands[1]); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; + } + } else { + if((isRegister(operands[0]) == 1) + && (isRegister(operands[1]) == 1)){ + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getRegister(operands[1]); + } + } +} diff --git a/src/parser.h b/src/parser.h index 81885af..23b76c6 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,6 +1,17 @@ +/** @file parser.h + * @brief A function to parse ARMv8 assembly lines into an array of a special + * internal representation of instructions, a64inst_instruction. + * + * @author Ethan Dias Alberto + * @author Saleh Bubshait + */ + #include "a64instruction/a64instruction.h" -#define OPERAND_DELIMITER ", " -#define HALT_ASM_CMD "and x0, x0, x0\n" - +/** @brief Parses a list of ARMv8 assembly lines into an array of a64inst_instruction. + * + * @param asmLines An array of strings, each string is an ARMv8 assembly line. + * @param lineCount The number of lines in the asmLines array. + * @return An array of a64inst_instruction representing the parsed instructions. + */ a64inst_instruction *parse(char **asmLines, int lineCount); diff --git a/src/string_util.c b/src/string_util.c new file mode 100644 index 0000000..8b7aaa0 --- /dev/null +++ b/src/string_util.c @@ -0,0 +1,173 @@ +/** @file string_util.c + * @brief This file contains the implementation of some string processing + * utility functions used in the assembler. + * + * @author Saleh Bubshait + */ + +#include +#include +#include +#include +#include "string_util.h" +#include "global.h" + +/************************************ + * CONSTANTS + ************************************/ + +static const char *SPECIAL_REGISTERS[] = {"sp", "xzr", "wzr"}; +static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"}; +static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"}; +static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"}; + +/************************************ + * FUNCTIONS + ************************************/ + +char *trim(char *str) { + // Skip leading whitespace + while (isspace(*str)) { + str++; + } + + // If the string is all whitespace + if (*str == '\0') { + return str; + } + + // Skip trailing whitespace + char *end = str + strlen(str) - 1; + while (end > str && isspace(*end)) { + end--; + } + end[1] = '\0'; + + return str; +} + +bool containsString(char *str, const char *arr[], int arrSize) { + for (int i = 0; i < arrSize; i++) { + if (strcmp(str, arr[i]) == 0) { + return true; + } + } + return false; +} + +int lastIndexOfString(char *str, const char *arr[], int arrSize) { + for (int i = arrSize - 1; i >= 0; i--) { + if (strcmp(str, arr[i]) == 0) { + return i; + } + } + return -1; +} + +char *duplicateString(char *str) { + char *newStr = malloc(strlen(str) + 1); + strcpy(newStr, str); + return newStr; +} + +bool isRegister(char *str) { + SKIP_WHITESPACE(str); + if (str == NULL) + return false; + + if (containsString(str, SPECIAL_REGISTERS, 3)) + return true; + + return tolower(str[0]) == 'x' || tolower(str[0]) == 'w'; +} + +int getRegister(char *str) { + SKIP_WHITESPACE(str); + if (containsString(str, ZERO_REGISTER_ALIAS, 2)) { + return ZERO_REGISTER; + } + + return strtol(str + 1, NULL, 10); +} + +int getImmediate(char *str) { + SKIP_WHITESPACE(str); + if (strlen(str) < 2) { + return 0; + } + + if (str[0] != '#') + return 0; + + str++; // skip # + + if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 3) == 0) { + // Hex + return strtol(str + 2, NULL, 16); + } else { + // Decimal + return strtol(str, NULL, 10); + } + + return 0; +} + +int getRegisterType(char *str) { + SKIP_WHITESPACE(str); + + return tolower(str[0]) == 'x'; +} + + +/** @brief Translates an alias instruction into its target instruction. + * Note: This function modifies the input tokens array and the tokensCount. + * Assumes there is enough space in the tokens array to add the new tokens. + * + * @param opcode The opcode of the instruction. + * @param tokens The tokens of the instruction. + * @param tokensCount The number of tokens in the instruction. + */ +void translateAlias(char *opcode, char *tokens[], int *tokensCount) { + + int aliasIndex = lastIndexOfString(opcode, ALIAS_OPCODES, 9); + if (aliasIndex == -1) + return; + + // The instruction is one of the aliases, convert into the target. + char *targetOpcode = ALIAS_TARGET_OPCODES[aliasIndex]; + + // To correctly encode the zero register, which is either w31 or x31. + char *zeroReg = malloc(5 * sizeof(char)); + *zeroReg = *tokens[1]; + strcat(zeroReg, "31"); + + switch(aliasIndex) { + case 0: // cmp -> subs rzr, rn, + case 1: // cmn -> adds rzr, rn, + case 4: // tst -> ands rzr, rn, + // Convert from [instr] reg, to [instr] rzr, reg, + tokens[0] = targetOpcode; + tokens[4] = tokens[3]; + tokens[3] = tokens[2]; + tokens[2] = tokens[1]; + tokens[1] = zeroReg; + (*tokensCount)++; + break; + + case 2: // neg -> subs rd, rzr, + case 3: // negs -> subs rd, rzr, + case 5: // mvn -> orn rd, rzr, + case 6: // mov -> orr rd, rzr, rm + tokens[0] = targetOpcode; + tokens[4] = tokens[3]; + tokens[3] = tokens[2]; + tokens[2] = zeroReg; + (*tokensCount)++; + break; + + default: + // Note, the multiply instructions are handled separately. + // See DPReg parsing. + break; + } +} diff --git a/src/string_util.h b/src/string_util.h new file mode 100644 index 0000000..c9bca35 --- /dev/null +++ b/src/string_util.h @@ -0,0 +1,64 @@ +/** @file string_util.h + * @brief This file contains the implementation of some string processing + * utility functions used in the assembler. + * + * @author Saleh Bubshait + */ + +/** @brief Skips whitespace characters in a string. + * @param ptr A pointer to the string to skip whitespace in. + */ +#define SKIP_WHITESPACE(ptr) do { while (isspace(*ptr)) { ptr++; } } while (0) + +/** @brief Removes leading and trailing whitespace from a string. + * Note. This function modifies the input string. + * @param str The string to trim. + * @return A pointer to the first non-whitespace character in the string. + */ +char *trim(char *str); + +/** @brief Checks if a string is in an array of strings. + * + * @param str The string to check. + * @param arr The array of strings to check against. + * @param arrSize The size of the array. + * @return True if the string is in the array, false otherwise. + */ +bool containsString(char *str, const char *arr[], int arrSize); + +/** @brief Finds the last index of a string in an array of strings. + * Note: If multiple occurances of the string exist, the index of the last + * occurance is returned! + * + * @param str The string to find. + * @param arr The array of strings to search. + * @param arrSize The size of the array. + * @return The index of the last occurrence of the string in the array, or -1 if not found. + */ +int lastIndexOfString(char *str, const char *arr[], int arrSize); + +/** @brief Duplicates a string. + * Note: The caller is responsible for freeing the returned string. + * + * @param str The string to duplicate. + * @return A pointer to the duplicated string. + */ +char *duplicateString(char *str); + +/** @brief Checks if a string represents an ARMv8 register. + * A string is considered a register if it is: + * - A general purpose register (x0-x30 or w0-w30) + * - A special register (sp, xzr, wzr) + * + * @param str The string to check. + * @return True if the string is a register, false otherwise. + */ +bool isRegister(char *str); + +int getRegister(char *str); + +int getImmediate(char *str); + +int getRegisterType(char *str); + +void translateAlias(char *opcode, char *tokens[], int *tokensCount); diff --git a/src/symboltable.c b/src/symboltable.c index e93c84a..50db150 100644 --- a/src/symboltable.c +++ b/src/symboltable.c @@ -1,3 +1,11 @@ +/** @file symboltable.c + * @brief An Abstract Data Type (ADT) for a symbol table, an array of + * label-address pairs. Labels are strings and addresses are unsigned integers. + * (uint32_t). The symbol table is implemented as a dynamic array. + * + * @author Saleh Bubshait + */ + #include #include #include diff --git a/src/symboltable.h b/src/symboltable.h index ba8b21c..ca1912d 100644 --- a/src/symboltable.h +++ b/src/symboltable.h @@ -1,3 +1,11 @@ +/** @file symboltable.h + * @brief An Abstract Data Type (ADT) for a symbol table, an array of + * label-address pairs. Labels are strings and addresses are unsigned integers. + * (uint32_t). The symbol table is implemented as a dynamic array. + * + * @author Saleh Bubshait + */ + #include #include #include @@ -7,21 +15,56 @@ typedef uint32_t address; +/** An entry in the symbol table, a label-address pair. + */ typedef struct { char *label; address address; } symbol_table_map; +/** The symbol table ADT. + */ typedef struct { - symbol_table_map* table; - int size; - int capacity; + symbol_table_map* table; // entries + int size; // number of entries + int capacity; // size of the table. capacity >= size } symbol_table; +/** @brief Initializes a new symbol table. + * + * @return A pointer to the new symbol table. + */ symbol_table *st_init(void); +/** @brief Inserts a new label-address pair to the symbol table. + * Grows the table if it is full. If the label already exists in the table, + * another entry with the same label is inserted (for performance). + * + * @param st A pointer to the target symbol table. + * @param label The label to insert. + * @param addr The address to insert. + */ void st_insert(symbol_table *st, char *label, address addr); +/** @brief Checks if a label exists in the symbol table. + * + * @param st A pointer to the target symbol table. + * @param label The label to check. + * @return True if the label exists in the table, false otherwise. + */ bool st_contains(symbol_table *st, char *label); +/** @brief Gets the address of a label in the symbol table. + * st_contains should be called before calling this function! + * + * @param st A pointer to the target symbol table. + * @param label The label to get the address of. + * @return The address of the label in the table. + */ address st_get(symbol_table *st, char *label); + +/** @brief Frees the memory allocated for the symbol table. + * + * @param st A pointer to the target symbol table. + */ +void st_free(symbol_table *st); diff --git a/src/tokeniser.c b/src/tokeniser.c index 6e37d1a..3b907ac 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -1,33 +1,23 @@ -// Tokeniser.c +/** @file tokeniser.c + * @brief Functions to tokenise lines of assembly and operand strings. + * + * @author Saleh Bubshait + */ + #include #include #include #include #include #include +#include "tokeniser.h" +#include "string_util.h" -#define MAX_TOKEN_COUNT 5 -#define MAX_OPERAND_COUNT 4 +#define MAX_TOKEN_COUNT 6 +#define MAX_OPERAND_COUNT 5 #define OPERAND_DELIMITER ", " - -char *trim(char *str) { - while (isspace(*str)) { - str++; - } - - if (*str == '\0') { - return str; - } - - char *end = str + strlen(str) - 1; - while (end > str && isspace(*end)) { - end--; - } - - end[1] = '\0'; - - return str; -} +#define OPEN_BRACKET '[' +#define CLOSE_BRACKET ']' char **tokenise(char *line, int *numTokens) { char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\ @@ -46,36 +36,22 @@ char **tokenise(char *line, int *numTokens) { char *operandStart = strtok(NULL, ""); if (operandStart == NULL) { - // No operands. Return the instruction token. + // No operands. Return the first (opcode) token. return tokens; } - bool inBracket = false; - char *currentToken = operandStart; + SKIP_WHITESPACE(operandStart); + + // Use tokeniseOperands to tokenise the operands + int operandTokensCount = 0; + char **operandTokens = tokeniseOperands(operandStart, &operandTokensCount); - for (char *c = operandStart; *c != '\0'; ++c) { - if (*c == '[' || *c == '{') { - inBracket = true; - } else if (*c == ']' || *c == '}') { - inBracket = false; - } - - - if (*c == ',' && !inBracket) { - *c = '\0'; - tokens[(*numTokens)++] = currentToken; - currentToken = c + 1; - while (*currentToken == ' ') { - currentToken++; - } - } - } - - if (*currentToken != '\0') { - tokens[*numTokens] = currentToken; - (*numTokens)++; + for (int i = 0; i < operandTokensCount; i++) { + tokens[(*numTokens)++] = operandTokens[i]; } + + free(operandTokens); return tokens; } @@ -86,42 +62,43 @@ char **tokeniseOperands(char *line, int *numTokens) { exit(EXIT_FAILURE); } - if (*line == '[') { + SKIP_WHITESPACE(line); + + // Remove leading and trailing brackets if they exist + if (*line == OPEN_BRACKET) { line++; // skip '[' - line[strlen(line) - 1] = '\0'; // remove ']' - } else if (*line == '{') { - line++; // skip '{' - line[strlen(line) - 1] = '\0'; // remove '}' + char *end = line + strlen(line) - 1; + while (end > line && *end != CLOSE_BRACKET) { + end--; + } + if (*end == CLOSE_BRACKET) { + *end = '\0'; + } } + line = trim(line); + *numTokens = 0; bool inBracket = false; char *currentToken = line; for (char *c = line; *c != '\0'; ++c) { - if (*c == '[' || *c == '{') { + if (*c == '[') { inBracket = true; - } else if (*c == ']' || *c == '}') { + } else if (*c == ']') { inBracket = false; } if (*c == ',' && !inBracket) { *c = '\0'; tokens[(*numTokens)++] = currentToken; - currentToken = c + 1; - while (*currentToken == ' ') { - currentToken++; - } + currentToken = c + 1; // skip the comma + SKIP_WHITESPACE(currentToken); } } if (*currentToken != '\0') { tokens[*numTokens] = currentToken; - - if (tokens[*numTokens][strlen(tokens[*numTokens]) - 1] == '\n') { - tokens[*numTokens][strlen(tokens[*numTokens]) - 1] = '\0'; - } - (*numTokens)++; } diff --git a/src/tokeniser.h b/src/tokeniser.h new file mode 100644 index 0000000..05b30fa --- /dev/null +++ b/src/tokeniser.h @@ -0,0 +1,26 @@ +/** @file tokeniser.h + * @brief Functions to tokenise lines of assembly and operand strings. + * + * @author Saleh Bubshait + */ + +/** @brief Tokenises a line of assembly code. The first two tokens are separated + * by a space, and the rest are separated by commas. + * e.g., "add x1, x2, x3" -> ["add", "x1", "x2", "x3"]. Handles and skips any + * whitespaces, e.g., " add x1, x2,#4 " -> ["add", "x1", "x2", "#4"]. + * @param line The line to tokenise. + * @param numTokens A pointer to an integer to store the number of tokens. + * @return An array of strings containing the tokens. + */ +char **tokenise(char *line, int *numTokens); + +/** @brief Tokenises the operands of an instruction. The operands are separated + * by commas. Handles and skips any whitespaces, e.g., "x1, x2, #4" -> ["x1", "x2", "#4"]. + * If the line starts with a bracket, it is removed and the closing bracket. + * Note. It also removes anything after the brackets, for example: + * "[x1, x2, #4]!" -> ["x1", "x2", "#4"]. + * @param line The line to tokenise. + * @param numTokens A pointer to an integer to store the number of tokens. + * @return An array of strings containing the tokens. + */ +char **tokeniseOperands(char *line, int *numTokens); From bdeafcbcc6bf2996c9834dc3519b370444d7a453 Mon Sep 17 00:00:00 2001 From: sBubshait Date: Sat, 15 Jun 2024 03:03:02 +0100 Subject: [PATCH 113/113] Update the assembler file structure into subfolders --- src/assemble.c | 8 ++--- src/{ => assembler}/encode.c | 50 +++---------------------------- src/assembler/encode.h | 21 +++++++++++++ src/{ => assembler}/parser.c | 5 ++-- src/{ => assembler}/parser.h | 2 +- src/{ => assembler}/string_util.c | 2 +- src/{ => assembler}/string_util.h | 0 src/{ => assembler}/symboltable.c | 2 +- src/{ => assembler}/symboltable.h | 5 ++++ src/{ => assembler}/tokeniser.c | 0 src/{ => assembler}/tokeniser.h | 0 src/util/binary_util.c | 43 ++++++++++++++++++++++++++ src/util/binary_util.h | 17 +++++++++++ src/{ => util}/fileio.c | 2 +- src/{ => util}/fileio.h | 2 +- 15 files changed, 102 insertions(+), 57 deletions(-) rename src/{ => assembler}/encode.c (80%) create mode 100644 src/assembler/encode.h rename src/{ => assembler}/parser.c (99%) rename src/{ => assembler}/parser.h (92%) rename src/{ => assembler}/string_util.c (99%) rename src/{ => assembler}/string_util.h (100%) rename src/{ => assembler}/symboltable.c (99%) rename src/{ => assembler}/symboltable.h (97%) rename src/{ => assembler}/tokeniser.c (100%) rename src/{ => assembler}/tokeniser.h (100%) create mode 100644 src/util/binary_util.c create mode 100644 src/util/binary_util.h rename src/{ => util}/fileio.c (99%) rename src/{ => util}/fileio.h (92%) diff --git a/src/assemble.c b/src/assemble.c index 9b2484c..e159720 100644 --- a/src/assemble.c +++ b/src/assemble.c @@ -7,10 +7,10 @@ #include #include #include "a64instruction/a64instruction.h" -#include "parser.h" -#include "fileio.h" -#include "parser.h" -#include "encode.c" +#include "assembler/parser.h" +#include "util/fileio.h" +#include "assembler/encode.h" +#include "assembler/symboltable.h" static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount); diff --git a/src/encode.c b/src/assembler/encode.c similarity index 80% rename from src/encode.c rename to src/assembler/encode.c index bdb89f4..ef7c498 100644 --- a/src/encode.c +++ b/src/assembler/encode.c @@ -7,55 +7,13 @@ * @author Saleh Bubshait */ -#include -#include "global.h" -#include "a64instruction/a64instruction.h" -#include "symboltable.c" +#include "symboltable.h" #include +#include "../util/binary_util.h" +#include "encode.h" #define HALT_BINARY 2315255808 -// write the provided value to the bits in the range [lsb, msb) {inclusive, exclusive} to the word. -// Does not modify any other bits in the word. -void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { - // Ensure LSB and MSB are within range of word size, and in the correct order - assert(lsb < msb && msb <= 32); - - // Create a mask with 1s in the range [lsb, msb) and 0s elsewhere - word mask = 0; - for (uint8_t i = lsb; i < msb; i++) { - mask |= 1 << i; - } - - // Clear the bits in the range [lsb, msb) in the word - *wrd &= ~mask; - - // Set the bits in the range [lsb, msb) to the value - *wrd |= (value << lsb) & mask; -} - -// Sign extend a given value to a 64-bit signed integer given the number of bits -int64_t signExtend(dword value, unsigned int n) { - if (n == 0 || n >= 64) { - // If n_bits is 0 or greater than or equal to 64, return the value as is - return (int64_t)value; - } - - uint64_t sign_bit_mask = (uint64_t)1 << (n - 1); - - // Mask to isolate the n-bit value - uint64_t n_bit_mask = (sign_bit_mask << 1) - 1; - - // Check if the sign bit is set - if (value & sign_bit_mask) { - // Sign bit is set, extend the sign - return (int64_t)(value | ~n_bit_mask); - } else { - // Sign bit is not set, return the value as is - return (int64_t)(value & n_bit_mask); - } -} - static int getLabelOffset(symbol_table* table, char* label, int currentIndex, int n_bits) { address target = st_get(table, label); return signExtend((unsigned int) (target - currentIndex), n_bits); @@ -198,7 +156,7 @@ static word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table return wrd; } -static word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) { +word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) { word *arr = (word*)malloc(sizeof(word) * instCount); int index = 0; for (int i = 0; i < instCount; i++) { diff --git a/src/assembler/encode.h b/src/assembler/encode.h new file mode 100644 index 0000000..1ac8a82 --- /dev/null +++ b/src/assembler/encode.h @@ -0,0 +1,21 @@ +/** @file encode.h + * @brief A function to encode the internal representation of ARMv8 + * instructions, a64inst_instruction, into binary. + * + * @author Saleh Bubshait + */ + +#include "../global.h" +#include "../a64instruction/a64instruction.h" +#include "symboltable.h" + +/** @brief Encodes the internal representation of ARMv8 instructions into binary. + * The symbol table is used to resolve labels in branch instructions. Assumes + * that the instructions are in the same order as they appear in the source file. + * + * @param insts An array of a64inst_instruction to encode. + * @param instCount The number of instructions in the array. + * @param st The symbol table to use for label resolution. + * @return An array of words representing the binary encoding of the instructions. + */ +word *encode(a64inst_instruction insts[], int instCount, symbol_table* st); diff --git a/src/parser.c b/src/assembler/parser.c similarity index 99% rename from src/parser.c rename to src/assembler/parser.c index 474652e..b997cf3 100644 --- a/src/parser.c +++ b/src/assembler/parser.c @@ -1,6 +1,7 @@ /** @file parser.c * @brief Functions to parse ARMv8 assembly lines into an array of a special * internal representation of instructions, a64inst_instruction. + * * @author Ethan Dias Alberto * @author George Niedringhaus * @author Saleh Bubshait @@ -13,8 +14,8 @@ #include #include #include "parser.h" -#include "a64instruction/a64instruction.h" -#include "global.h" +#include "../a64instruction/a64instruction.h" +#include "../global.h" #include "tokeniser.h" #include "string_util.h" diff --git a/src/parser.h b/src/assembler/parser.h similarity index 92% rename from src/parser.h rename to src/assembler/parser.h index 23b76c6..12d5f7b 100644 --- a/src/parser.h +++ b/src/assembler/parser.h @@ -6,7 +6,7 @@ * @author Saleh Bubshait */ -#include "a64instruction/a64instruction.h" +#include "../a64instruction/a64instruction.h" /** @brief Parses a list of ARMv8 assembly lines into an array of a64inst_instruction. * diff --git a/src/string_util.c b/src/assembler/string_util.c similarity index 99% rename from src/string_util.c rename to src/assembler/string_util.c index 8b7aaa0..e519ef4 100644 --- a/src/string_util.c +++ b/src/assembler/string_util.c @@ -10,7 +10,7 @@ #include #include #include "string_util.h" -#include "global.h" +#include "../global.h" /************************************ * CONSTANTS diff --git a/src/string_util.h b/src/assembler/string_util.h similarity index 100% rename from src/string_util.h rename to src/assembler/string_util.h diff --git a/src/symboltable.c b/src/assembler/symboltable.c similarity index 99% rename from src/symboltable.c rename to src/assembler/symboltable.c index 50db150..9ccd3d6 100644 --- a/src/symboltable.c +++ b/src/assembler/symboltable.c @@ -5,7 +5,7 @@ * * @author Saleh Bubshait */ - + #include #include #include diff --git a/src/symboltable.h b/src/assembler/symboltable.h similarity index 97% rename from src/symboltable.h rename to src/assembler/symboltable.h index ca1912d..12c99b8 100644 --- a/src/symboltable.h +++ b/src/assembler/symboltable.h @@ -6,6 +6,9 @@ * @author Saleh Bubshait */ +#ifndef __SYMBOLTABLE__ +#define __SYMBOLTABLE__ + #include #include #include @@ -68,3 +71,5 @@ address st_get(symbol_table *st, char *label); * @param st A pointer to the target symbol table. */ void st_free(symbol_table *st); + +#endif diff --git a/src/tokeniser.c b/src/assembler/tokeniser.c similarity index 100% rename from src/tokeniser.c rename to src/assembler/tokeniser.c diff --git a/src/tokeniser.h b/src/assembler/tokeniser.h similarity index 100% rename from src/tokeniser.h rename to src/assembler/tokeniser.h diff --git a/src/util/binary_util.c b/src/util/binary_util.c new file mode 100644 index 0000000..af85377 --- /dev/null +++ b/src/util/binary_util.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include "binary_util.h" + +void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { + // Ensure LSB and MSB are within range of word size, and in the correct order + assert(lsb < msb && msb <= 32); + + // Create a mask with 1s in the range [lsb, msb) and 0s elsewhere + word mask = 0; + for (uint8_t i = lsb; i < msb; i++) { + mask |= 1 << i; + } + + // Clear the bits in the range [lsb, msb) in the word + *wrd &= ~mask; + + // Set the bits in the range [lsb, msb) to the value + *wrd |= (value << lsb) & mask; +} + +// Sign extend a given value to a 64-bit signed integer given the number of bits +int64_t signExtend(dword value, unsigned int n) { + if (n == 0 || n >= 64) { + // If n_bits is 0 or greater than or equal to 64, return the value as is + return (int64_t)value; + } + + uint64_t sign_bit_mask = (uint64_t)1 << (n - 1); + + // Mask to isolate the n-bit value + uint64_t n_bit_mask = (sign_bit_mask << 1) - 1; + + // Check if the sign bit is set + if (value & sign_bit_mask) { + // Sign bit is set, extend the sign + return (int64_t)(value | ~n_bit_mask); + } else { + // Sign bit is not set, return the value as is + return (int64_t)(value & n_bit_mask); + } +} diff --git a/src/util/binary_util.h b/src/util/binary_util.h new file mode 100644 index 0000000..348dda2 --- /dev/null +++ b/src/util/binary_util.h @@ -0,0 +1,17 @@ +/** + */ + +#include "../global.h" + +/** @brief Sets a range of bits of a word (32-bit unsigned integer) to a value. + * The range is inclusive of the lsb and exclusive of the msb. The value should + * fit within the range. + * + * @param wrd A pointer to the word to set bits in. + * @param lsb The least significant bit of the range to set, inclusive. + * @param msb The most significant bit of the range to set, exclusive. + * @param value The value to set the bits to. + */ +void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value); + +int64_t signExtend(dword value, unsigned int n); diff --git a/src/fileio.c b/src/util/fileio.c similarity index 99% rename from src/fileio.c rename to src/util/fileio.c index cd4fcc6..597cbcf 100644 --- a/src/fileio.c +++ b/src/util/fileio.c @@ -1,5 +1,5 @@ #include -#include "global.h" +#include "../global.h" #include "fileio.h" #define MAX_ASM_LINE_LENGTH 300 diff --git a/src/fileio.h b/src/util/fileio.h similarity index 92% rename from src/fileio.h rename to src/util/fileio.h index 3a509ca..b3c9cbc 100644 --- a/src/fileio.h +++ b/src/util/fileio.h @@ -2,7 +2,7 @@ #define __FILEIO__ #include #include -#include "global.h" +#include "../global.h" #define EXIT_FAILURE 1