From 6de1915dbefbeb8aefd7db8c76a961bc5d8228b9 Mon Sep 17 00:00:00 2001
From: sBubshait <s.bobshit@gmail.com>
Date: Sat, 15 Jun 2024 01:50:56 +0100
Subject: [PATCH] Restructure overall assembler. Add string_util and Docs

---
 src/Makefile      |   2 +-
 src/assemble.c    |  12 +-
 src/encode.c      |  21 ++-
 src/parser.c      | 447 ++++++++++++++++++++--------------------------
 src/parser.h      |  17 +-
 src/string_util.c | 173 ++++++++++++++++++
 src/string_util.h |  64 +++++++
 src/symboltable.c |   8 +
 src/symboltable.h |  49 ++++-
 src/tokeniser.c   | 101 ++++-------
 src/tokeniser.h   |  26 +++
 11 files changed, 587 insertions(+), 333 deletions(-)
 create mode 100644 src/string_util.c
 create mode 100644 src/string_util.h
 create mode 100644 src/tokeniser.h

diff --git a/src/Makefile b/src/Makefile
index 071143f..150b23c 100755
--- a/src/Makefile
+++ b/src/Makefile
@@ -9,7 +9,7 @@ CFLAGS  ?= -std=c17 -g\
 
 all: assemble
 
-assemble: assemble.o parser.o fileio.o
+assemble: assemble.o parser.o fileio.o tokeniser.o string_util.o
 emulate: emulate.o
 
 clean:
diff --git a/src/assemble.c b/src/assemble.c
index 59a22d2..9b2484c 100644
--- a/src/assemble.c
+++ b/src/assemble.c
@@ -1,3 +1,9 @@
+/** @file assemble.c
+ *  @brief The main file for the ARMv8 assembler. Reads an assembly file and outputs the binary file.
+ *
+ *  @author Saleh Bubshait
+ */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include "a64instruction/a64instruction.h"
@@ -31,11 +37,13 @@ int main(int argc, char **argv) {
   // Write the binary to the output file
   writeBinaryFile(binary, argv[2], lineCount);
 
-  /* TODO: FREE MEMORY!! */
-
   return EXIT_SUCCESS;
 }
 
+/** The first pass of the assembler. Creates the symbol table. Adds all labels
+ *  and the address of the instruction following the label to the symbol table.
+ *  Returns the final symbol table.
+ */
 static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) {
   symbol_table *table = st_init();
   int labelCount = 0;
diff --git a/src/encode.c b/src/encode.c
index 46b84c2..bdb89f4 100644
--- a/src/encode.c
+++ b/src/encode.c
@@ -1,3 +1,12 @@
+/** @file encode.c
+ *  @brief A function to encode the internal representation of ARMv8
+ *  instructions, a64inst_instruction, into binary.
+ *
+ *  @author Ethan Dias Alberto
+ *  @author George Niedringhaus
+ *  @author Saleh Bubshait
+ */
+
 #include <assert.h>
 #include "global.h"
 #include "a64instruction/a64instruction.h"
@@ -53,7 +62,7 @@ static int getLabelOffset(symbol_table* table, char* label, int currentIndex, in
 }
 
 // Generates assembled code based on the two-pass assembly method
-word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) {
+static word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) {
     word wrd = 0;
 
     switch (instr->data.BranchData.BranchType) {
@@ -77,7 +86,7 @@ word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) {
     return wrd;
 }
 
-word encodeDPImmediate(a64inst_instruction inst) {
+static word encodeDPImmediate(a64inst_instruction inst) {
     word wrd = 0;
 
     a64inst_DPImmediateData data = inst.data.DPImmediateData;
@@ -104,7 +113,7 @@ word encodeDPImmediate(a64inst_instruction inst) {
     return wrd;
 }
 
-word encodeDPRegister(a64inst_instruction inst) {
+static word encodeDPRegister(a64inst_instruction inst) {
     word wrd = 0;
 
     a64inst_DPRegisterData data = inst.data.DPRegisterData;
@@ -139,7 +148,7 @@ word encodeDPRegister(a64inst_instruction inst) {
     
 }
 
-word encodeSingleDataTransfer(a64inst_instruction inst) {
+static word encodeSingleDataTransfer(a64inst_instruction inst) {
     word wrd = 0;
 
     a64inst_SingleTransferData data = inst.data.SingleTransferData;
@@ -175,7 +184,7 @@ word encodeSingleDataTransfer(a64inst_instruction inst) {
     return wrd;
 }
 
-word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) {
+static word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) {
     word wrd = 0;
 
     a64inst_SingleTransferData data = cI.data.SingleTransferData;
@@ -189,7 +198,7 @@ word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) {
     return wrd;
 }
 
-word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) {
+static word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) {
     word *arr = (word*)malloc(sizeof(word) * instCount);
     int index = 0;
     for (int i = 0; i < instCount; i++) {
diff --git a/src/parser.c b/src/parser.c
index 2985b4c..474652e 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,24 +1,50 @@
+/** @file parser.c
+ *  @brief Functions to parse ARMv8 assembly lines into an array of a special
+ *  internal representation of instructions, a64inst_instruction.
+ *  @author Ethan Dias Alberto
+ *  @author George Niedringhaus
+ *  @author Saleh Bubshait
+ */
+
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <ctype.h>
 #include <stdbool.h>
 #include "parser.h"
 #include "a64instruction/a64instruction.h"
 #include "global.h"
-#include "tokeniser.c"
+#include "tokeniser.h"
+#include "string_util.h"
 
-/** Prototypes */
-void parse_instruction(char asmLine[], a64inst_instruction *instr);
-static char *duplicateString(char *str);
-void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands);
-void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]);
-void calculateAddressFormat(a64inst_instruction *instr,  char *operandList[], int numOperands);
-void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount);
-void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount);
-void parseDirective(a64inst_instruction *inst, char *tokens[]);
+/************************************
+ * STRUCTS
+ ************************************/
+
+typedef struct {
+    int type;
+    int immediate;
+} ShiftData;
+
+/************************************
+ * PROTOTYPES
+ ************************************/
+
+static void parse_instruction(char asmLine[], a64inst_instruction *instr);
+static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands);
+static void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]);
+static void parseAddressingMode(a64inst_instruction *instr,  char *operandList[], int numOperands);
+static void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount);
+static void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount);
+static void parseDirective(a64inst_instruction *inst, char *tokens[]);
+static ShiftData *parseShift(char *shift);
+static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount);
+
+/************************************
+ * CONSTANTS
+ ************************************/
 
-/** Constants */
 static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"};
 static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"};
 static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"};
@@ -26,9 +52,11 @@ static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"};
 static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"};
 static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"};
 static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"};
-static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"};
-static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"};
-static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"};
+
+
+/************************************
+ * FUNCTIONS
+ ************************************/
 
 a64inst_instruction *parse(char **asmLines, int lineCount) {
     a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
@@ -38,180 +66,33 @@ a64inst_instruction *parse(char **asmLines, int lineCount) {
         parse_instruction(asmLines[i], &instructions[i]);
         i++;
     }
-
+    
     return instructions;
 }
 
-static char *duplicateString(char *str) {
-    char *newStr = malloc(strlen(str) + 1);
-    strcpy(newStr, str);
-    return newStr;
-}
-
-static bool isStringIn(char *str, const char *arr[], int arrSize) {
-    for (int i = 0; i < arrSize; i++) {
-        if (strcmp(str, arr[i]) == 0) {
-            return true;
-        }
-    }
-    return false;
-}
-
-// If more than one occurance, return the last index
-static int indexStringIn(char *str, const char *arr[], int arrSize) {
-    for (int i = arrSize - 1; i >= 0; i--) {
-        if (strcmp(str, arr[i]) == 0) {
-            return i;
-        }
-    }
-    return -1;
-}
-
-typedef struct {
-    int type;
-    int immediate;
-} ShiftData;
-
-static ShiftData *parseShift(char *shift) {
-    char buffer[100];
-    strcpy(buffer, shift);
-    char *shiftType = strtok(buffer, " ");
-    char *shiftAmount = strtok(NULL, " ");
-    ShiftData *data = malloc(sizeof(ShiftData));
-    data->type = indexStringIn(shiftType, SHIFT_TYPE_OPCODES, 4);
-    while (*shiftAmount == ' ' || *shiftAmount == '#') {
-        shiftAmount++;
-    }
-    data->immediate = atoi(shiftAmount);
-    return data;
-}
-
-int isOperandRegister(char regStartChar) {
-    return((regStartChar == 'x') || (regStartChar == 'w'));
-}
-
-int classifyDPInst(char *operandList[]){
-    return(isOperandRegister(operandList[1][0]) &&
-    isOperandRegister(operandList[2][0]) &&
-    isOperandRegister(operandList[3][0]));
-}
-
-void classifyAlias(char *opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) {
-
-    int aliasIndex = indexStringIn(opcode, ALIAS_OPCODES, 9);
-    if (aliasIndex != -1) {
-        // The instruction is one of the aliases, convert into the target.
-        char *opcode = ALIAS_TARGET_OPCODES[aliasIndex];
-
-        // To correctly encode the zero register, which is either w31 or x31.
-        char *start_zeroReg = tokens[1];
-        while (isspace(*start_zeroReg)) start_zeroReg++;
-        char *zeroReg = malloc(5 * sizeof(char));
-        *zeroReg = *start_zeroReg;
-        strcat(zeroReg, "31");
-
-        switch(aliasIndex) {
-            case 0: // cmp -> subs rzr, rn, <op2>
-            case 1: // cmn -> adds rzr, rn, <op2>
-            case 4: // tst -> ands rzr, rn, <op2>
-            // Convert from [instr] REG, <op2> to [instr] RZR, REG, <op2>
-            tokens[0] = opcode;
-            tokens[4] = tokens[3];
-            tokens[3] = tokens[2];
-            tokens[2] = tokens[1];
-            tokens[1] = zeroReg;
-            (*tokensCount)++;
-            break;
-
-            case 2: // neg -> subs rd, rzr, <op2>
-            case 3: // negs -> subs rd, rzr, <op2>
-            case 5: // mvn  -> orn rd, rzr, <op2>
-            case 6: // mov -> orr rd, rzr, rm
-            tokens[0] = opcode;
-            tokens[4] = tokens[3];
-            tokens[3] = tokens[2];
-            tokens[2] = zeroReg;
-            (*tokensCount)++;
-            break;
-
-            default:
-            break;
-        }
-
-    }
-}
-
-void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount){
-
-    classifyAlias(opcode, instr, tokens, tokensCount);
-
-    if (isStringIn(opcode, BRANCH_OPCODES, 9)) {
-        instr->type = a64inst_BRANCH;
-
-        if (strcmp(opcode, "br") == 0) {
-            instr->data.BranchData.BranchType = a64inst_REGISTER;
-        } else if (strcmp(opcode, "b") == 0) {
-            instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
-        } else {
-            instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
-        }
-    
-    } else if (isStringIn(opcode, SINGLE_TRANSFER_OPCODES, 2)) {
-        instr->type = a64inst_SINGLETRANSFER;
-        if (*tokens[2] == '[') {
-            instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
-            instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0;
-
-        } else {
-            instr->type = a64inst_LOADLITERAL;
-        }
-    } else if (classifyDPInst(tokens)) {
-        instr->type = a64inst_DPREGISTER;
-    } else {
-        instr->type = a64inst_DPIMMEDIATE;
-    }
-
-}
-
-//takes inputted char array and returns the integer of the operand, skipping the first character
-//e.g. for a passed "R32", it skips the 'R' and returns 32
-int getOperandNumber(char *operand){
-    if (isStringIn(operand, ZERO_REGISTER_ALIAS, 2)) {
-        return ZERO_REGISTER;
-    }
-
-    char operandCpy[strlen(operand)];
-    strcpy(operandCpy, operand+1);
-    char **endptr = NULL;
-    int number;
-    if(strncmp(operandCpy, "0x", 2)==0){
-        //hex value
-        strcpy(operandCpy, operand+3);
-        number = strtol(operandCpy, endptr, 16);
-    } else if(operandCpy[0] == 'x'){
-        number = strtol(operandCpy+1, endptr, 16);
-    } else {
-        number = strtol(operandCpy, endptr, 10);
-    }
-    return number;
-}
-
-
-void parse_instruction(char asmLine[], a64inst_instruction *instr) {
+/** Parses a single ARMv8 assembly line into an a64inst_instruction.
+ */
+static void parse_instruction(char asmLine[], a64inst_instruction *instr) {
     if (instr == NULL){
         exit(EXIT_FAILURE);
     }
 
-    if(strcmp(asmLine, HALT_ASM_CMD) == 0){
-        instr->type = a64inst_HALT;
-        return;
-    }
-
     char *asmLineCopy = duplicateString(asmLine);
     int tokensCount = 0;
     char **tokens = tokenise(asmLineCopy, &tokensCount);
     char *opcode = tokens[0];
 
+    // Check if the instruction is the halt instruction, "and x0, x0, x0".
+    if (tokensCount == 4 && strcmp(opcode, "and") == 0 
+        && getRegister(tokens[1]) == 0 
+        && getRegister(tokens[2]) == 0 
+        && getRegister(tokens[3]) == 0) {
+
+        instr->type = a64inst_HALT;
+        return;
+    }
+
+
     if(strcmp(opcode, ".int") == 0){
         // Directive
         instr->type = a64inst_DIRECTIVE;
@@ -226,6 +107,8 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) {
         
     } else {
         // Instruction
+
+        // Classify the opcode into the correct instruction type.
         classifyOpcode(opcode, instr, tokens, &tokensCount);
 
         switch(instr->type){
@@ -235,74 +118,32 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) {
 
             case a64inst_SINGLETRANSFER:
                 parseSingleTransfer(instr, opcode, tokens, tokensCount);
-                calculateAddressFormat(instr, tokens, tokensCount);
+                parseAddressingMode(instr, tokens, tokensCount);
                 break;
+
             case a64inst_LOADLITERAL:
                 parseSingleTransfer(instr, opcode, tokens, tokensCount);
                 break;
+
             case a64inst_DPREGISTER:
                 //generate DP operands;
                 parseDPRegister(instr, tokens, tokensCount);
                 break;
+
             case a64inst_DPIMMEDIATE:
                 parseDPImmediate(instr, tokens, tokensCount);
                 break;
+
             default:   
-                printf("Error: Invalid Instruction\n");
+                printf("Error: Invalid Instruction, '%s'\n", opcode);
                 break;
+
         }
         
     }
-
-    /* TODO: FREE MEMORY! */
-
 }
 
-
-
-
-void calculateAddressFormat(a64inst_instruction *instr,  char *tokens[], int tokenCount) {
-    assert(*tokens[2] == '[');
-
-    int operandCount = 0;
-    char unsplitString[strlen(tokens[2])];
-    strcpy(unsplitString, tokens[2]);
-    char **operands = tokeniseOperands(tokens[2], &operandCount);
-
-    int baseRegister = getOperandNumber(operands[0]);
-    
-    instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister;
-
-    if (tokenCount >= 4) {
-        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
-        instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]);
-
-    } else if(unsplitString[strlen(unsplitString)-1] == '!') {
-        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
-        instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]);
-
-    } else if (operandCount == 1 || (!isOperandRegister(*operands[1]))) {
-        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
-        if(operandCount > 1){
-            int offset = getOperandNumber(operands[1]);
-            instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
-            //NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER
-        }
-    } else {
-        if((isOperandRegister(*operands[0]) == 1)
-            && (isOperandRegister(*operands[1]) == 1)){
-                //register
-                instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
-                instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]);
-        }
-    }
-}
-
-static int parseRegisterType(char *operand) {
-    return operand[0] == 'x';
-}
-
-void parseDirective(a64inst_instruction *instr, char *tokens[]) {
+static void parseDirective(a64inst_instruction *instr, char *tokens[]) {
     char *intValue = tokens[1];
     char *endptr;
     if(strncmp(intValue, "0x", 2) == 0) {
@@ -314,27 +155,28 @@ void parseDirective(a64inst_instruction *instr, char *tokens[]) {
 }
 
 
-void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) {
+static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) {
 
     switch(instr->type){
         case a64inst_SINGLETRANSFER:
-            instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]);
-            instr->data.SingleTransferData.target = getOperandNumber(tokens[1]);
+            instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
+            instr->data.SingleTransferData.target = getRegister(tokens[1]);
             break;
 
         case a64inst_LOADLITERAL:
-            instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]);
-            instr->data.SingleTransferData.target = getOperandNumber(tokens[1]);
+            instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
+            instr->data.SingleTransferData.target = getRegister(tokens[1]);
 
             if(*tokens[2] =='#'){
                 //offset is immediate
-                int offset = getOperandNumber(tokens[1]);
-                instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset;
+                instr->data.SingleTransferData.processOpData.loadLiteralData.offset =  getImmediate(tokens[2]);;
             } else {
+                //offset is label
                 instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2];
-                //offset is literal, use symbol table and calculate difference
             }
+
             break;
+            
         default:
             break;
         
@@ -350,7 +192,7 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[])
             instr->data.BranchData.processOpData.unconditionalData.label = operandList[1];
             break;
         case a64inst_REGISTER:
-            instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[1]);
+            instr->data.BranchData.processOpData.registerData.src = getRegister(operandList[1]);
             break;
         case a64inst_CONDITIONAL:
             {
@@ -381,13 +223,13 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[])
 
 void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) {
     a64inst_DPImmediateData *data = &inst->data.DPImmediateData;
-    data->dest = getOperandNumber(tokens[1]);
-    data->regType = parseRegisterType(tokens[1]);
+    data->dest = getRegister(tokens[1]);
+    data->regType = getRegisterType(tokens[1]);
 
-    if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 4)) {
+    if (containsString(tokens[0], WIDE_MOV_OPCODES, 4)) {
         data->DPIOpType = a64inst_DPI_WIDEMOV;
-        data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 4); 
-        data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]);
+        data->processOp = lastIndexOfString(tokens[0], WIDE_MOV_OPCODES, 4); 
+        data->processOpData.wideMovData.immediate = getImmediate(tokens[2]);
         if (tokensCount >= 4) {
             ShiftData shData = *parseShift(tokens[3]);
             data->processOpData.wideMovData.shiftScalar = shData.immediate;
@@ -395,9 +237,9 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount
 
     } else {
         data->DPIOpType = a64inst_DPI_ARITHM;
-        data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4);
-        data->processOpData.arithmData.src = getOperandNumber(tokens[2]);
-        data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]);
+        data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
+        data->processOpData.arithmData.src = getRegister(tokens[2]);
+        data->processOpData.arithmData.immediate = getImmediate(tokens[3]);
         
         if (tokensCount >= 5) {
             ShiftData shData = *parseShift(tokens[4]);
@@ -411,16 +253,16 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount
 
 void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) {
     a64inst_DPRegisterData *data = &inst->data.DPRegisterData;
-    data->dest = getOperandNumber(tokens[1]);
-    data->regType = parseRegisterType(tokens[1]);
-    data->src1 = getOperandNumber(tokens[2]);
-    data->src2 = getOperandNumber(tokens[3]);
+    data->dest = getRegister(tokens[1]);
+    data->regType = getRegisterType(tokens[1]);
+    data->src1 = getRegister(tokens[2]);
+    data->src2 = getRegister(tokens[3]);
 
-    if (isStringIn(tokens[0], MULTIPLY_OPCODES, 4)) {
+    if (containsString(tokens[0], MULTIPLY_OPCODES, 4)) {
         // Multiply
         data->DPROpType = a64inst_DPR_MULTIPLY;
         if (tokensCount >= 5) {
-        data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]);
+        data->processOpData.multiplydata.summand = getRegister(tokens[4]);
         data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0;
         }
         else {
@@ -432,21 +274,21 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount)
         // Arithmetic/Logic
         data->DPROpType = a64inst_DPR_ARITHMLOGIC;
         
-        if (isStringIn(tokens[0], ARITHMETIC_OPCODES, 4)) {
+        if (containsString(tokens[0], ARITHMETIC_OPCODES, 4)) {
             // Arithmetic
-            data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4);
+            data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
             data->processOpData.arithmLogicData.type = 1;
             if(tokensCount == 5) {
                 //has a shift
                 int numTokens = 0;
                 char **shiftOperands = tokenise(tokens[4], &numTokens);
-                data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
-                data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]);
+                data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
+                data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
             }
 
         } else {
             // Logic
-            int opcodeCategory = indexStringIn(tokens[0], LOGIC_OPCODES, 8);
+            int opcodeCategory = lastIndexOfString(tokens[0], LOGIC_OPCODES, 8);
             switch(opcodeCategory/2){
                 case 0:
                     //and
@@ -489,9 +331,102 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount)
                 //has a shift
                 int numTokens = 0;
                 char **shiftOperands = tokenise(tokens[4], &numTokens);
-                data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
-                data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]);
+                data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
+                data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
             }
         }
     }
 }
+
+/**  Classifies the given opcode into the correct instruction type.
+ *   Modifies instr to reflect the classification.
+ */
+static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) {
+
+    // First, if the opcode is an alias, convert it to the target instruction.
+    translateAlias(opcode, tokens, tokensCount);
+
+    if (containsString(opcode, BRANCH_OPCODES, 9)) {
+        instr->type = a64inst_BRANCH;
+
+        if (strcmp(opcode, "br") == 0) {
+            instr->data.BranchData.BranchType = a64inst_REGISTER;
+        } else if (strcmp(opcode, "b") == 0) {
+            instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
+        } else {
+            instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
+        }
+    
+    } else if (containsString(opcode, SINGLE_TRANSFER_OPCODES, 2)) {
+        instr->type = a64inst_SINGLETRANSFER;
+        if (*tokens[2] == '[') {
+            instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
+            instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0;
+
+        } else {
+            instr->type = a64inst_LOADLITERAL;
+        }
+
+    // DP Instruction.
+    // DP Register if the third operand is a register.
+    } else if (*tokensCount >= 4 && isRegister(tokens[3])) {
+        instr->type = a64inst_DPREGISTER;
+    } else {
+        instr->type = a64inst_DPIMMEDIATE;
+    }
+
+}
+
+/** Parses a shift string into a ShiftData struct.
+ */
+static ShiftData *parseShift(char *shift) {
+    char buffer[20];
+    strcpy(buffer, shift);
+
+    char *shiftType = strtok(buffer, " ");
+    char *shiftAmount = strtok(NULL, " ");
+
+    ShiftData *data = malloc(sizeof(ShiftData));
+
+    data->type = lastIndexOfString(shiftType, SHIFT_TYPE_OPCODES, 4);
+
+    SKIP_WHITESPACE(shiftAmount);
+    data->immediate = getImmediate(shiftAmount);
+    return data;
+}
+
+/** Parses the addressing mode of a single transfer instruction. (Not load literal)
+ */
+static void parseAddressingMode(a64inst_instruction *instr,  char *tokens[], int tokenCount) {
+    assert(*tokens[2] == '[');
+
+    int operandCount = 0;
+    char *unsplitString = duplicateString(tokens[2]);
+    char **operands = tokeniseOperands(tokens[2], &operandCount);
+
+    int baseRegister = getRegister(operands[0]);
+    
+    instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister;
+
+    if (tokenCount >= 4) {
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(tokens[3]);
+
+    } else if(unsplitString[strlen(unsplitString)-1] == '!') {
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(operands[1]);
+
+    } else if (operandCount == 1 || (!isRegister(operands[1]))) {
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
+        if(operandCount > 1){
+            int offset = getImmediate(operands[1]);
+            instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
+        }
+    } else {
+        if((isRegister(operands[0]) == 1)
+            && (isRegister(operands[1]) == 1)){
+                instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
+                instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getRegister(operands[1]);
+        }
+    }
+}
diff --git a/src/parser.h b/src/parser.h
index 81885af..23b76c6 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -1,6 +1,17 @@
+/** @file parser.h
+ *  @brief A function to parse ARMv8 assembly lines into an array of a special
+ *  internal representation of instructions, a64inst_instruction.
+ *
+ *  @author Ethan Dias Alberto 
+ *  @author Saleh Bubshait
+ */
+
 #include "a64instruction/a64instruction.h"
 
-#define OPERAND_DELIMITER ", "
-#define HALT_ASM_CMD "and x0, x0, x0\n"
-
+/** @brief Parses a list of ARMv8 assembly lines into an array of a64inst_instruction.
+ * 
+ * @param asmLines An array of strings, each string is an ARMv8 assembly line.
+ * @param lineCount The number of lines in the asmLines array.
+ * @return An array of a64inst_instruction representing the parsed instructions.
+ */
 a64inst_instruction *parse(char **asmLines, int lineCount);
diff --git a/src/string_util.c b/src/string_util.c
new file mode 100644
index 0000000..8b7aaa0
--- /dev/null
+++ b/src/string_util.c
@@ -0,0 +1,173 @@
+/** @file string_util.c
+ *  @brief This file contains the implementation of some string processing
+ *  utility functions used in the assembler.
+ *
+ *  @author Saleh Bubshait
+ */
+
+#include <string.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include "string_util.h"
+#include "global.h"
+
+/************************************
+ * CONSTANTS
+ ************************************/
+
+static const char *SPECIAL_REGISTERS[] = {"sp", "xzr", "wzr"};
+static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"};
+static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"};
+static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"};
+
+/************************************
+ * FUNCTIONS
+ ************************************/
+
+char *trim(char *str) {
+    // Skip leading whitespace
+    while (isspace(*str)) {
+        str++;
+    }
+
+    // If the string is all whitespace
+    if (*str == '\0') {
+        return str;
+    }
+
+    // Skip trailing whitespace
+    char *end = str + strlen(str) - 1;
+    while (end > str && isspace(*end)) {
+        end--;
+    }
+    end[1] = '\0';
+
+    return str;
+}
+
+bool containsString(char *str, const char *arr[], int arrSize) {
+    for (int i = 0; i < arrSize; i++) {
+        if (strcmp(str, arr[i]) == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
+int lastIndexOfString(char *str, const char *arr[], int arrSize) {
+    for (int i = arrSize - 1; i >= 0; i--) {
+        if (strcmp(str, arr[i]) == 0) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+char *duplicateString(char *str) {
+    char *newStr = malloc(strlen(str) + 1);
+    strcpy(newStr, str);
+    return newStr;
+}
+
+bool isRegister(char *str) {
+    SKIP_WHITESPACE(str);
+    if (str == NULL)
+        return false;
+    
+    if (containsString(str, SPECIAL_REGISTERS, 3))
+        return true;
+
+    return tolower(str[0]) == 'x' || tolower(str[0]) == 'w';
+}
+
+int getRegister(char *str) {
+    SKIP_WHITESPACE(str);
+    if (containsString(str, ZERO_REGISTER_ALIAS, 2)) {
+        return ZERO_REGISTER;
+    }
+
+    return strtol(str + 1, NULL, 10);
+}
+
+int getImmediate(char *str) {
+    SKIP_WHITESPACE(str);
+    if (strlen(str) < 2) {
+        return 0;
+    }
+    
+    if (str[0] != '#')
+        return 0;
+
+    str++; // skip #
+
+    if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 3) == 0) {
+        // Hex
+        return strtol(str + 2, NULL, 16);
+    } else {
+        // Decimal
+        return strtol(str, NULL, 10);
+    }
+
+    return 0;
+}
+
+int getRegisterType(char *str) {
+    SKIP_WHITESPACE(str);
+    
+    return tolower(str[0]) == 'x';
+}
+
+
+/** @brief Translates an alias instruction into its target instruction.
+ * Note: This function modifies the input tokens array and the tokensCount.
+ * Assumes there is enough space in the tokens array to add the new tokens.
+ * 
+ * @param opcode The opcode of the instruction.
+ * @param tokens The tokens of the instruction.
+ * @param tokensCount The number of tokens in the instruction.
+ */
+void translateAlias(char *opcode, char *tokens[], int *tokensCount) {
+
+    int aliasIndex = lastIndexOfString(opcode, ALIAS_OPCODES, 9);
+    if (aliasIndex == -1)
+        return;
+
+    // The instruction is one of the aliases, convert into the target.
+    char *targetOpcode = ALIAS_TARGET_OPCODES[aliasIndex];
+
+    // To correctly encode the zero register, which is either w31 or x31.
+    char *zeroReg = malloc(5 * sizeof(char));
+    *zeroReg = *tokens[1];
+    strcat(zeroReg, "31");
+
+    switch(aliasIndex) {
+        case 0: // cmp -> subs rzr, rn, <op2>
+        case 1: // cmn -> adds rzr, rn, <op2>
+        case 4: // tst -> ands rzr, rn, <op2>
+        // Convert from [instr] reg, <op2> to [instr] rzr, reg, <op2>
+        tokens[0] = targetOpcode;
+        tokens[4] = tokens[3];
+        tokens[3] = tokens[2];
+        tokens[2] = tokens[1];
+        tokens[1] = zeroReg;
+        (*tokensCount)++;
+        break;
+
+        case 2: // neg -> subs rd, rzr, <op2>
+        case 3: // negs -> subs rd, rzr, <op2>
+        case 5: // mvn  -> orn rd, rzr, <op2>
+        case 6: // mov -> orr rd, rzr, rm
+        tokens[0] = targetOpcode;
+        tokens[4] = tokens[3];
+        tokens[3] = tokens[2];
+        tokens[2] = zeroReg;
+        (*tokensCount)++;
+        break;
+
+        default:
+        // Note, the multiply instructions are handled separately.
+        // See DPReg parsing.
+        break;
+    }
+}
diff --git a/src/string_util.h b/src/string_util.h
new file mode 100644
index 0000000..c9bca35
--- /dev/null
+++ b/src/string_util.h
@@ -0,0 +1,64 @@
+/** @file string_util.h
+ *  @brief This file contains the implementation of some string processing
+ *  utility functions used in the assembler.
+ *
+ *  @author Saleh Bubshait
+ */
+
+/** @brief Skips whitespace characters in a string.
+ *  @param ptr A pointer to the string to skip whitespace in.
+ */
+#define SKIP_WHITESPACE(ptr) do { while (isspace(*ptr)) { ptr++; } } while (0)
+
+/** @brief Removes leading and trailing whitespace from a string.
+ *  Note. This function modifies the input string.
+ *  @param str The string to trim.
+ *  @return A pointer to the first non-whitespace character in the string.
+ */
+char *trim(char *str);
+
+/** @brief Checks if a string is in an array of strings.
+ *
+ *  @param str The string to check.
+ *  @param arr The array of strings to check against.
+ *  @param arrSize The size of the array.
+ *  @return True if the string is in the array, false otherwise.
+ */
+bool containsString(char *str, const char *arr[], int arrSize);
+
+/** @brief Finds the last index of a string in an array of strings.
+ *  Note: If multiple occurances of the string exist, the index of the last 
+ *  occurance is returned! 
+ *
+ *  @param str The string to find.
+ *  @param arr The array of strings to search.
+ *  @param arrSize The size of the array.
+ *  @return The index of the last occurrence of the string in the array, or -1 if not found.
+ */
+int lastIndexOfString(char *str, const char *arr[], int arrSize);
+
+/** @brief Duplicates a string.
+ *  Note: The caller is responsible for freeing the returned string.
+ *
+ *  @param str The string to duplicate.
+ *  @return A pointer to the duplicated string.
+ */
+char *duplicateString(char *str);
+
+/** @brief Checks if a string represents an ARMv8 register.
+ *  A string is considered a register if it is:
+ *  - A general purpose register (x0-x30 or w0-w30)
+ *  - A special register (sp, xzr, wzr)
+ *
+ *  @param str The string to check.
+ *  @return True if the string is a register, false otherwise.
+ */
+bool isRegister(char *str);
+
+int getRegister(char *str);
+
+int getImmediate(char *str);
+
+int getRegisterType(char *str);
+
+void translateAlias(char *opcode, char *tokens[], int *tokensCount);
diff --git a/src/symboltable.c b/src/symboltable.c
index e93c84a..50db150 100644
--- a/src/symboltable.c
+++ b/src/symboltable.c
@@ -1,3 +1,11 @@
+/** @file symboltable.c
+ *  @brief An Abstract Data Type (ADT) for a symbol table, an array of 
+ *  label-address pairs. Labels are strings and addresses are unsigned integers.
+ *  (uint32_t). The symbol table is implemented as a dynamic array.
+ *
+ *  @author Saleh Bubshait
+ */
+ 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/src/symboltable.h b/src/symboltable.h
index ba8b21c..ca1912d 100644
--- a/src/symboltable.h
+++ b/src/symboltable.h
@@ -1,3 +1,11 @@
+/** @file symboltable.h
+ *  @brief An Abstract Data Type (ADT) for a symbol table, an array of 
+ *  label-address pairs. Labels are strings and addresses are unsigned integers.
+ *  (uint32_t). The symbol table is implemented as a dynamic array.
+ *
+ *  @author Saleh Bubshait
+ */
+
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -7,21 +15,56 @@
 
 typedef uint32_t address;
 
+/** An entry in the symbol table, a label-address pair.
+ */
 typedef struct {
     char *label;
     address address;
 } symbol_table_map;
 
+/** The symbol table ADT.
+ */
 typedef struct {
-    symbol_table_map* table;
-    int size;
-    int capacity;
+    symbol_table_map* table; // entries
+    int size; // number of entries
+    int capacity; // size of the table. capacity >= size
 } symbol_table;
 
+/** @brief Initializes a new symbol table.
+ *
+ *  @return A pointer to the new symbol table.
+ */
 symbol_table *st_init(void);
 
+/** @brief Inserts a new label-address pair to the symbol table.
+ *  Grows the table if it is full. If the label already exists in the table,
+ *  another entry with the same label is inserted (for performance).
+ *
+ *  @param st A pointer to the target symbol table.
+ *  @param label The label to insert.
+ *  @param addr The address to insert.
+ */
 void st_insert(symbol_table *st, char *label, address addr);
 
+/** @brief Checks if a label exists in the symbol table.
+ *
+ *  @param st A pointer to the target symbol table.
+ *  @param label The label to check.
+ *  @return True if the label exists in the table, false otherwise.
+ */
 bool st_contains(symbol_table *st, char *label);
 
+/** @brief Gets the address of a label in the symbol table.
+ *  st_contains should be called before calling this function!
+ *
+ *  @param st A pointer to the target symbol table.
+ *  @param label The label to get the address of.
+ *  @return The address of the label in the table.
+ */
 address st_get(symbol_table *st, char *label);
+
+/** @brief Frees the memory allocated for the symbol table.
+ *
+ *  @param st A pointer to the target symbol table.
+ */
+void st_free(symbol_table *st);
diff --git a/src/tokeniser.c b/src/tokeniser.c
index 6e37d1a..3b907ac 100644
--- a/src/tokeniser.c
+++ b/src/tokeniser.c
@@ -1,33 +1,23 @@
-// Tokeniser.c
+/** @file tokeniser.c
+ *  @brief Functions to tokenise lines of assembly and operand strings.
+ *
+ *  @author Saleh Bubshait
+ */
+
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdbool.h>
+#include "tokeniser.h"
+#include "string_util.h"
 
-#define MAX_TOKEN_COUNT 5
-#define MAX_OPERAND_COUNT 4
+#define MAX_TOKEN_COUNT 6
+#define MAX_OPERAND_COUNT 5
 #define OPERAND_DELIMITER ", "
-
-char *trim(char *str) {
-    while (isspace(*str)) {
-        str++;
-    }
-
-    if (*str == '\0') {
-        return str;
-    }
-
-    char *end = str + strlen(str) - 1;
-    while (end > str && isspace(*end)) {
-        end--;
-    }
-
-    end[1] = '\0';
-
-    return str;
-}
+#define OPEN_BRACKET '['
+#define CLOSE_BRACKET ']'
 
 char **tokenise(char *line, int *numTokens) {
     char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\
@@ -46,36 +36,22 @@ char **tokenise(char *line, int *numTokens) {
 
     char *operandStart = strtok(NULL, "");
     if (operandStart == NULL) {
-        // No operands. Return the instruction token.
+        // No operands. Return the first (opcode) token.
         return tokens;
     }
 
-    bool inBracket = false;
-    char *currentToken = operandStart;
+    SKIP_WHITESPACE(operandStart);
+    
+    // Use tokeniseOperands to tokenise the operands
+    int operandTokensCount = 0;
+    char **operandTokens = tokeniseOperands(operandStart, &operandTokensCount);
 
-    for (char *c = operandStart; *c != '\0'; ++c) {
-        if (*c == '[' || *c == '{') {
-            inBracket = true;
-        } else if (*c == ']' || *c == '}') {
-            inBracket = false;
-        }
-
-
-        if (*c == ',' && !inBracket) {
-            *c = '\0';
-            tokens[(*numTokens)++] = currentToken;
-            currentToken = c + 1;
-            while (*currentToken == ' ') {
-                currentToken++;
-            }
-        }
-    }
-
-    if (*currentToken != '\0') {
-        tokens[*numTokens] = currentToken;
-        (*numTokens)++;
+    for (int i = 0; i < operandTokensCount; i++) {
+        tokens[(*numTokens)++] = operandTokens[i];
     }
 
+    
+    free(operandTokens);
     return tokens;
 }
 
@@ -86,42 +62,43 @@ char **tokeniseOperands(char *line, int *numTokens) {
         exit(EXIT_FAILURE);
     }
 
-    if (*line == '[') {
+    SKIP_WHITESPACE(line);
+
+    // Remove leading and trailing brackets if they exist
+    if (*line == OPEN_BRACKET) {
         line++; // skip '['
-        line[strlen(line) - 1] = '\0'; // remove ']'
-    } else if (*line == '{') {
-        line++; // skip '{'
-        line[strlen(line) - 1] = '\0'; // remove '}'
+        char *end = line + strlen(line) - 1;
+        while (end > line && *end != CLOSE_BRACKET) {
+            end--;
+        }
+        if (*end == CLOSE_BRACKET) {
+            *end = '\0';
+        }
     }
 
+    line = trim(line);
+
     *numTokens = 0;
     bool inBracket = false;
     char *currentToken = line;
 
     for (char *c = line; *c != '\0'; ++c) {
-        if (*c == '[' || *c == '{') {
+        if (*c == '[') {
             inBracket = true;
-        } else if (*c == ']' || *c == '}') {
+        } else if (*c == ']') {
             inBracket = false;
         }
 
         if (*c == ',' && !inBracket) {
             *c = '\0';
             tokens[(*numTokens)++] = currentToken;
-            currentToken = c + 1;
-            while (*currentToken == ' ') {
-                currentToken++;
-            }
+            currentToken = c + 1; // skip the comma
+            SKIP_WHITESPACE(currentToken);
         }
     }
 
     if (*currentToken != '\0') {
         tokens[*numTokens] = currentToken;
-
-        if (tokens[*numTokens][strlen(tokens[*numTokens]) - 1] == '\n') {
-            tokens[*numTokens][strlen(tokens[*numTokens]) - 1] = '\0';
-        }
-
         (*numTokens)++;
     }
 
diff --git a/src/tokeniser.h b/src/tokeniser.h
new file mode 100644
index 0000000..05b30fa
--- /dev/null
+++ b/src/tokeniser.h
@@ -0,0 +1,26 @@
+/** @file tokeniser.h
+ *  @brief Functions to tokenise lines of assembly and operand strings.
+ *
+ *  @author Saleh Bubshait
+ */
+
+/** @brief Tokenises a line of assembly code. The first two tokens are separated
+ *  by a space, and the rest are separated by commas.
+ *  e.g., "add x1, x2, x3" -> ["add", "x1", "x2", "x3"]. Handles and skips any
+ *  whitespaces, e.g., "  add   x1,    x2,#4    " -> ["add", "x1", "x2", "#4"].
+ *  @param line The line to tokenise.
+ *  @param numTokens A pointer to an integer to store the number of tokens.
+ *  @return An array of strings containing the tokens.
+ */
+char **tokenise(char *line, int *numTokens);
+
+/** @brief Tokenises the operands of an instruction. The operands are separated
+ *  by commas. Handles and skips any whitespaces, e.g., "x1, x2, #4" -> ["x1", "x2", "#4"].
+ *  If the line starts with a bracket, it is removed and the closing bracket.
+ *  Note. It also removes anything after the brackets, for example:
+ *  "[x1, x2, #4]!" -> ["x1", "x2", "#4"].
+ *  @param line The line to tokenise.
+ *  @param numTokens A pointer to an integer to store the number of tokens.
+ *  @return An array of strings containing the tokens.
+ */
+char **tokeniseOperands(char *line, int *numTokens);