Merge assembler into master. Fix all conflicts

2024-06-15 03:38:24 +01:00 · 2024-06-15 03:38:24 +01:00 · e66af80187
commit e66af80187
parent b725033422 bdeafcbcc6
19 changed files with 1404 additions and 3 deletions
--- a/src/Makefile
+++ b/src/Makefile
@ -7,9 +7,9 @@ CFLAGS  ?= -std=c17 -g\

 .PHONY: all clean

-all: assemble emulate
+all: emulate assemble

-assemble: assemble.o
+assemble: assemble.o util/fileio.o util/binary_util.o assembler/encode.o assembler/parse.o assembler/tokenise.o assembler/string_util.o assembler/symboltable.o
 emulate: emulate.o util/fileio.o emulator/execute.o emulator/decode.o emulator/print.o emulator/machine_util.o util/binary_util.o

 clean:
--- a/src/a64instruction/a64instruction_Branch.h
+++ b/src/a64instruction/a64instruction_Branch.h
@ -9,6 +9,7 @@ typedef enum {

 typedef struct {
    word unconditionalOffset;
+    char* label;
 } a64inst_Branch_UnconditionalData;

 typedef struct {
@ -28,6 +29,7 @@ typedef enum {
 typedef struct {
    a64inst_ConditionType cond;
    word offset;
+    char* label;
 } a64inst_Branch_ConditionalData;

 typedef struct {
--- a/src/a64instruction/a64instruction_Directive.h
+++ b/src/a64instruction/a64instruction_Directive.h
@ -1,5 +1,5 @@
 #include "./a64instruction_global.h"

 typedef struct {
-    dword value;
+    word value;
 } a64inst_DirectiveData;
--- a/src/a64instruction/a64instruction_SingleTransfer.h
+++ b/src/a64instruction/a64instruction_SingleTransfer.h
@ -33,6 +33,7 @@ typedef struct {

 typedef struct {
    uint32_t offset;
+    char* label;
 } a64inst_LoadLiteralData;

 typedef struct {
--- a/src/assemble.c
+++ b/src/assemble.c
@ -1,5 +1,59 @@
+/** @file assemble.c
+ *  @brief The main file for the ARMv8 assembler. Reads an assembly file and outputs the binary file.
+ *
+ *  @author Saleh Bubshait
+ */
+
 #include <stdlib.h>
+#include <stdio.h>
+#include "a64instruction/a64instruction.h"
+#include "assembler/parse.h"
+#include "util/fileio.h"
+#include "assembler/encode.h"
+#include "assembler/symboltable.h"
+
+static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount);

 int main(int argc, char **argv) {
+  // Check the arguments
+  if (argc < 3) {
+    fprintf(stderr, "Error: A source file and an object output file are required. Syntax: ./assemble <file_in> <file_out>");
+    return EXIT_FAILURE;
+  }
+
+  // Load the source file into memory
+  int lineCount = countLines(argv[1]);
+  char **source = readAssemblyFile(argv[1], lineCount);
+
+  // Parse the source file
+  a64inst_instruction *instructions = parse(source, lineCount);
+
+  // First Pass: Create the symbol table
+  symbol_table *table = firstPass(instructions, lineCount);
+
+  // Second Pass: Encode the instructions into binary
+  word *binary = encode(instructions, lineCount, table);
+
+  // Write the binary to the output file
+  writeBinaryFile(binary, argv[2], lineCount);
+
  return EXIT_SUCCESS;
 }
+
+/** The first pass of the assembler. Creates the symbol table. Adds all labels
+ *  and the address of the instruction following the label to the symbol table.
+ *  Returns the final symbol table.
+ */
+static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) {
+  symbol_table *table = st_init();
+  int labelCount = 0;
+
+  for (int i = 0; i < lineCount; i++) {
+    a64inst_instruction inst = instructions[i];
+    if (inst.type == a64inst_LABEL) {
+        st_insert(table, inst.data.LabelData.label, (i - (labelCount++)));
+    }
+  }
+
+  return table;
+}
--- a/src/assembler/encode.c
+++ b/src/assembler/encode.c
@ -0,0 +1,200 @@
+/** @file encode.c
+ *  @brief A function to encode the internal representation of ARMv8
+ *  instructions, a64inst_instruction, into binary.
+ *
+ *  @author Ethan Dias Alberto
+ *  @author George Niedringhaus
+ *  @author Saleh Bubshait
+ */
+
+#include "symboltable.h"
+#include <stdlib.h>
+#include "../util/binary_util.h"
+#include "encode.h"
+
+#define HALT_BINARY 2315255808
+
+static int getLabelOffset(symbol_table* table, char* label, int currentIndex, int n_bits) {
+    address target = st_get(table, label);
+    return signExtend((unsigned int) (target - currentIndex), n_bits);
+}
+
+// Generates assembled code based on the two-pass assembly method
+static word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) {
+    word wrd = 0;
+
+    switch (instr->data.BranchData.BranchType) {
+    case a64inst_UNCONDITIONAL:
+        setBits(&wrd, 26, 29, 0x5);
+        setBits(&wrd, 0, 25, getLabelOffset(st, instr->data.BranchData.processOpData.unconditionalData.label, index, 26));
+        break;
+
+    case a64inst_REGISTER:
+        setBits(&wrd, 16, 32, 0xD61F);
+        setBits(&wrd, 5, 10, instr->data.BranchData.processOpData.registerData.src);
+        break;
+
+    case a64inst_CONDITIONAL:
+        setBits(&wrd, 26, 32, 0x15);
+        setBits(&wrd, 5, 24, getLabelOffset(st, instr->data.BranchData.processOpData.conditionalData.label, index, 19));
+        setBits(&wrd, 0, 4, instr->data.BranchData.processOpData.conditionalData.cond);
+        break;
+    }
+
+    return wrd;
+}
+
+static word encodeDPImmediate(a64inst_instruction inst) {
+    word wrd = 0;
+
+    a64inst_DPImmediateData data = inst.data.DPImmediateData;
+    
+    setBits(&wrd, 31, 32, data.regType); // sf
+    setBits(&wrd, 29, 31, data.processOp); // opc
+    setBits(&wrd, 28, 29, 0x1); // constant value
+    setBits(&wrd, 0, 5, data.dest); // rd
+
+    if (data.DPIOpType == a64inst_DPI_ARITHM) {
+        setBits(&wrd, 23, 26, 0x2); //opi
+        setBits(&wrd, 5, 10, data.processOpData.arithmData.src); // rn
+        setBits(&wrd, 22, 23, data.processOpData.arithmData.shiftImmediate); // sh
+        setBits(&wrd, 10, 22, data.processOpData.arithmData.immediate); // imm12
+    }
+    // if wide move
+    else {
+        setBits(&wrd, 23, 26, 0x5); //opi
+        uint8_t hw = data.processOpData.wideMovData.shiftScalar / 16;
+        setBits(&wrd, 21, 23, hw); // hw
+        setBits(&wrd, 5, 21, data.processOpData.wideMovData.immediate); // imm16
+    }
+
+    return wrd;
+}
+
+static word encodeDPRegister(a64inst_instruction inst) {
+    word wrd = 0;
+
+    a64inst_DPRegisterData data = inst.data.DPRegisterData;
+    setBits(&wrd, 31, 32, data.regType); // sf
+    setBits(&wrd, 29, 31, data.processOp); // opc
+    setBits(&wrd, 28, 29, data.DPROpType); // M
+    setBits(&wrd, 25 ,28, 0x5);
+    setBits(&wrd, 16, 21, data.src2); // src2 
+    setBits(&wrd, 5, 10, data.src1); // src1
+    setBits(&wrd, 0, 5, data.dest); // src2 
+
+    if (data.DPROpType == a64inst_DPR_MULTIPLY) {
+        setBits(&wrd, 21, 31, 0xD8);
+        setBits(&wrd, 15, 16, data.processOpData.multiplydata.negProd);
+        setBits(&wrd, 10, 15, data.processOpData.multiplydata.summand);
+
+    } else {
+        // Arithmetic Logic Instruction
+        setBits(&wrd, 22, 24, data.processOpData.arithmLogicData.shiftType);
+        setBits(&wrd, 10, 16, data.processOpData.arithmLogicData.shiftAmount);
+
+         if (data.processOpData.arithmLogicData.type == a64inst_DPR_ARITHM) {
+            // Arithmetic
+            setBits(&wrd, 24, 25, 0x1); // bit 24
+         } else {
+            setBits(&wrd, 21, 22, data.processOpData.arithmLogicData.negShiftedSrc2);
+         }
+
+    }
+
+    return wrd;
+    
+}
+
+static word encodeSingleDataTransfer(a64inst_instruction inst) {
+    word wrd = 0;
+
+    a64inst_SingleTransferData data = inst.data.SingleTransferData;
+    a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData;
+
+    setBits(&wrd, 22, 32, 0x2E0);
+    setBits(&wrd, 30, 31, data.regType);
+    setBits(&wrd, 24, 25, data2.addressingMode == a64inst_UNSIGNED_OFFSET);
+    setBits(&wrd, 22, 23, data2.transferType);
+    setBits(&wrd, 5, 10, data2.base);
+    setBits(&wrd, 0, 5, data.target);
+    
+    switch (data2.addressingMode) {
+        // register offset
+        case a64inst_REGISTER_OFFSET:
+            setBits(&wrd, 21, 22, 1);
+            setBits(&wrd, 10, 16, 0x1A);
+            setBits(&wrd, 16, 21, data2.a64inst_addressingModeData.offsetReg);
+            break;
+        // unsigned offset
+        case a64inst_UNSIGNED_OFFSET:
+            setBits(&wrd, 10, 22, data2.a64inst_addressingModeData.unsignedOffset);
+            break;
+        // pre/post indexed
+        default:
+            setBits(&wrd, 21, 22, 0);
+            setBits(&wrd, 11, 12, data2.addressingMode == a64inst_PRE_INDEXED);
+            setBits(&wrd, 10, 11, 1);
+            setBits(&wrd, 12, 21, data2.a64inst_addressingModeData.indexedOffset);
+            break;
+    }
+
+    return wrd;
+}
+
+static word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) {
+    word wrd = 0;
+
+    a64inst_SingleTransferData data = cI.data.SingleTransferData;
+    setBits(&wrd, 24, 32, 0x18);
+    setBits(&wrd, 30, 31, data.regType);
+    char *label = data.processOpData.loadLiteralData.label;
+    int offset = getLabelOffset(st, label, arrIndex, 19);
+    setBits(&wrd, 5, 24,  offset);
+    setBits(&wrd, 0, 5, data.target);
+
+    return wrd;
+}
+
+word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) {
+    word *arr = (word*)malloc(sizeof(word) * instCount);
+    int index = 0;
+    for (int i = 0; i < instCount; i++) {
+        a64inst_instruction inst = insts[i];
+        switch (inst.type) {
+            case a64inst_DPIMMEDIATE:
+                arr[index] = encodeDPImmediate(inst);
+                index++;
+                break;
+            case a64inst_DPREGISTER:
+                arr[index] = encodeDPRegister(inst);
+                index++;
+                break;
+            case a64inst_SINGLETRANSFER:
+                arr[index] = encodeSingleDataTransfer(inst);
+                index++;
+                break;
+            case a64inst_LOADLITERAL:
+                arr[index] = encodeLoadLiteral(inst, index, st);
+                index++;
+                break;
+            case a64inst_DIRECTIVE:
+                arr[index] = inst.data.DirectiveData.value;
+                index++;
+                break;
+            case a64inst_HALT:
+                arr[index] = HALT_BINARY;
+                index++;
+                break;
+            case a64inst_LABEL:
+                // Labels are handled in the first pass and used for addressing.
+                break;
+            case a64inst_BRANCH:
+                arr[index] = encodeBranch(&inst, index, st);
+                index++;
+            default:
+                break;
+        }
+    }
+    return arr;
+}
--- a/src/assembler/encode.h
+++ b/src/assembler/encode.h
@ -0,0 +1,21 @@
+/** @file encode.h
+ *  @brief A function to encode the internal representation of ARMv8
+ *  instructions, a64inst_instruction, into binary.
+ *
+ *  @author Saleh Bubshait
+ */
+
+#include "../global.h"
+#include "../a64instruction/a64instruction.h"
+#include "symboltable.h"
+
+/** @brief Encodes the internal representation of ARMv8 instructions into binary.
+ *  The symbol table is used to resolve labels in branch instructions. Assumes
+ *  that the instructions are in the same order as they appear in the source file.
+ *
+ *  @param insts An array of a64inst_instruction to encode.
+ *  @param instCount The number of instructions in the array.
+ *  @param st The symbol table to use for label resolution.
+ *  @return An array of words representing the binary encoding of the instructions.
+ */
+word *encode(a64inst_instruction insts[], int instCount, symbol_table* st);
--- a/src/assembler/parse.c
+++ b/src/assembler/parse.c
@ -0,0 +1,433 @@
+/** @file parse.c
+ *  @brief Functions to parse ARMv8 assembly lines into an array of a special
+ *  internal representation of instructions, a64inst_instruction.
+ *
+ *  @author Ethan Dias Alberto
+ *  @author George Niedringhaus
+ *  @author Saleh Bubshait
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include "parse.h"
+#include "../a64instruction/a64instruction.h"
+#include "../global.h"
+#include "tokenise.h"
+#include "string_util.h"
+
+/************************************
+ * STRUCTS
+ ************************************/
+
+typedef struct {
+    int type;
+    int immediate;
+} ShiftData;
+
+/************************************
+ * PROTOTYPES
+ ************************************/
+
+static void parse_instruction(char asmLine[], a64inst_instruction *instr);
+static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands);
+static void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]);
+static void parseAddressingMode(a64inst_instruction *instr,  char *operandList[], int numOperands);
+static void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount);
+static void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount);
+static void parseDirective(a64inst_instruction *inst, char *tokens[]);
+static ShiftData *parseShift(char *shift);
+static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount);
+
+/************************************
+ * CONSTANTS
+ ************************************/
+
+static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"};
+static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"};
+static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"};
+static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"};
+static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"};
+static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"};
+static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"};
+
+
+/************************************
+ * FUNCTIONS
+ ************************************/
+
+a64inst_instruction *parse(char **asmLines, int lineCount) {
+    a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
+
+    int i = 0;
+    while (asmLines[i] != NULL) {
+        parse_instruction(asmLines[i], &instructions[i]);
+        i++;
+    }
+    
+    return instructions;
+}
+
+/** Parses a single ARMv8 assembly line into an a64inst_instruction.
+ */
+static void parse_instruction(char asmLine[], a64inst_instruction *instr) {
+    if (instr == NULL){
+        exit(EXIT_FAILURE);
+    }
+
+    char *asmLineCopy = duplicateString(asmLine);
+    int tokensCount = 0;
+    char **tokens = tokenise(asmLineCopy, &tokensCount);
+    char *opcode = tokens[0];
+
+    // Check if the instruction is the halt instruction, "and x0, x0, x0".
+    if (tokensCount == 4 && strcmp(opcode, "and") == 0 
+        && getRegister(tokens[1]) == 0 
+        && getRegister(tokens[2]) == 0 
+        && getRegister(tokens[3]) == 0) {
+
+        instr->type = a64inst_HALT;
+        return;
+    }
+
+
+    if(strcmp(opcode, ".int") == 0){
+        // Directive
+        instr->type = a64inst_DIRECTIVE;
+        parseDirective(instr, tokens);
+
+
+    } else if(opcode[strlen(opcode)-1]== ':') {
+        // Label
+        instr->type = a64inst_LABEL;
+        opcode[strlen(opcode) - 1] = '\0'; // Remove the colon
+        instr->data.LabelData.label = opcode;
+        
+    } else {
+        // Instruction
+
+        // Classify the opcode into the correct instruction type.
+        classifyOpcode(opcode, instr, tokens, &tokensCount);
+
+        switch(instr->type){
+            case a64inst_BRANCH:
+                parseBranch(instr, opcode, tokens);
+                break;
+
+            case a64inst_SINGLETRANSFER:
+                parseSingleTransfer(instr, opcode, tokens, tokensCount);
+                parseAddressingMode(instr, tokens, tokensCount);
+                break;
+
+            case a64inst_LOADLITERAL:
+                parseSingleTransfer(instr, opcode, tokens, tokensCount);
+                break;
+
+            case a64inst_DPREGISTER:
+                //generate DP operands;
+                parseDPRegister(instr, tokens, tokensCount);
+                break;
+
+            case a64inst_DPIMMEDIATE:
+                parseDPImmediate(instr, tokens, tokensCount);
+                break;
+
+            default:   
+                printf("Error: Invalid Instruction, '%s'\n", opcode);
+                break;
+
+        }
+        
+    }
+}
+
+static void parseDirective(a64inst_instruction *instr, char *tokens[]) {
+    char *intValue = tokens[1];
+    char *endptr;
+    if(strncmp(intValue, "0x", 2) == 0) {
+        intValue += 2;
+        instr->data.DirectiveData.value = strtol(intValue, &endptr, 16);
+    } else {
+        instr->data.DirectiveData.value = strtol(tokens[1], &endptr, 10);
+    }
+}
+
+
+static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) {
+
+    switch(instr->type){
+        case a64inst_SINGLETRANSFER:
+            instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
+            instr->data.SingleTransferData.target = getRegister(tokens[1]);
+            break;
+
+        case a64inst_LOADLITERAL:
+            instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
+            instr->data.SingleTransferData.target = getRegister(tokens[1]);
+
+            if(*tokens[2] =='#'){
+                //offset is immediate
+                instr->data.SingleTransferData.processOpData.loadLiteralData.offset =  getImmediate(tokens[2]);;
+            } else {
+                //offset is label
+                instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2];
+            }
+
+            break;
+            
+        default:
+            break;
+        
+    }
+}
+
+void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) {
+    switch(instr->data.BranchData.BranchType){
+        case a64inst_UNCONDITIONAL:
+            //define and sign extend immediate offset
+            //use symbol table
+            printf("unconditional");
+            instr->data.BranchData.processOpData.unconditionalData.label = operandList[1];
+            break;
+        case a64inst_REGISTER:
+            instr->data.BranchData.processOpData.registerData.src = getRegister(operandList[1]);
+            break;
+        case a64inst_CONDITIONAL:
+            {
+                char condition[strlen(opcode)+1];
+                strcpy(condition, opcode+2);
+                if(strcmp(condition, "eq")==0){
+                    instr->data.BranchData.processOpData.conditionalData.cond = EQ;
+                } else if (strcmp(condition, "ne")==0){
+                    instr->data.BranchData.processOpData.conditionalData.cond = NE;
+                } else if (strcmp(condition, "ge")==0){
+                    instr->data.BranchData.processOpData.conditionalData.cond = GE;
+                } else if (strcmp(condition, "lt")==0){
+                    instr->data.BranchData.processOpData.conditionalData.cond = LT;
+                } else if (strcmp(condition, "gt")==0){
+                    instr->data.BranchData.processOpData.conditionalData.cond = GT;
+                } else if (strcmp(condition, "le")==0){
+                    instr->data.BranchData.processOpData.conditionalData.cond = LE;
+                } else if (strcmp(condition, "al")==0){
+                    instr->data.BranchData.processOpData.conditionalData.cond = AL;
+                }
+                instr->data.BranchData.processOpData.unconditionalData.label = operandList[1];
+
+                break;
+
+            }
+    }
+}
+
+void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) {
+    a64inst_DPImmediateData *data = &inst->data.DPImmediateData;
+    data->dest = getRegister(tokens[1]);
+    data->regType = getRegisterType(tokens[1]);
+
+    if (containsString(tokens[0], WIDE_MOV_OPCODES, 4)) {
+        data->DPIOpType = a64inst_DPI_WIDEMOV;
+        data->processOp = lastIndexOfString(tokens[0], WIDE_MOV_OPCODES, 4); 
+        data->processOpData.wideMovData.immediate = getImmediate(tokens[2]);
+        if (tokensCount >= 4) {
+            ShiftData shData = *parseShift(tokens[3]);
+            data->processOpData.wideMovData.shiftScalar = shData.immediate;
+        }
+
+    } else {
+        data->DPIOpType = a64inst_DPI_ARITHM;
+        data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
+        data->processOpData.arithmData.src = getRegister(tokens[2]);
+        data->processOpData.arithmData.immediate = getImmediate(tokens[3]);
+        
+        if (tokensCount >= 5) {
+            ShiftData shData = *parseShift(tokens[4]);
+            if (shData.immediate > 0) {
+                data->processOpData.arithmData.shiftImmediate = true;
+            }
+        }
+
+    }
+}
+
+void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) {
+    a64inst_DPRegisterData *data = &inst->data.DPRegisterData;
+    data->dest = getRegister(tokens[1]);
+    data->regType = getRegisterType(tokens[1]);
+    data->src1 = getRegister(tokens[2]);
+    data->src2 = getRegister(tokens[3]);
+
+    if (containsString(tokens[0], MULTIPLY_OPCODES, 4)) {
+        // Multiply
+        data->DPROpType = a64inst_DPR_MULTIPLY;
+        if (tokensCount >= 5) {
+        data->processOpData.multiplydata.summand = getRegister(tokens[4]);
+        data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0;
+        }
+        else {
+            data->processOpData.multiplydata.summand = ZERO_REGISTER;
+            data->processOpData.multiplydata.negProd = strcmp(tokens[0], "mneg") == 0;
+        }
+        
+    } else {
+        // Arithmetic/Logic
+        data->DPROpType = a64inst_DPR_ARITHMLOGIC;
+        
+        if (containsString(tokens[0], ARITHMETIC_OPCODES, 4)) {
+            // Arithmetic
+            data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
+            data->processOpData.arithmLogicData.type = 1;
+            if(tokensCount == 5) {
+                //has a shift
+                int numTokens = 0;
+                char **shiftOperands = tokenise(tokens[4], &numTokens);
+                data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
+                data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
+            }
+
+        } else {
+            // Logic
+            int opcodeCategory = lastIndexOfString(tokens[0], LOGIC_OPCODES, 8);
+            switch(opcodeCategory/2){
+                case 0:
+                    //and
+                    if((tokens[0][strlen(tokens[0])-1]) == 's'){
+                        data->processOp = 3;
+                    } else {
+                        data->processOp = 0;
+                    }
+                    data->processOpData.arithmLogicData.negShiftedSrc2 = 0;
+                    break;
+                case 1:
+                    //negated AND
+                    if((tokens[0][strlen(tokens[0])-1]) == 's'){
+                        data->processOp = 3;
+                    } else {
+                        data->processOp = 0;
+                    }
+                    data->processOpData.arithmLogicData.negShiftedSrc2 = 1;
+                    break;
+                case 2:
+                    //XOR
+                    data->processOp = 2;
+                    if(opcodeCategory==4){
+                        data->processOpData.arithmLogicData.negShiftedSrc2 = 0;
+                    } else {
+                        data->processOpData.arithmLogicData.negShiftedSrc2 = 1;
+                    }
+                    break;
+                case 3:
+                    //OR
+                    data->processOp = 1;
+                    if(opcodeCategory==6){
+                        data->processOpData.arithmLogicData.negShiftedSrc2 = 0;
+                    } else {
+                        data->processOpData.arithmLogicData.negShiftedSrc2 = 1;
+                    }
+                    break;
+            }
+            if(tokensCount == 5) {
+                //has a shift
+                int numTokens = 0;
+                char **shiftOperands = tokenise(tokens[4], &numTokens);
+                data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
+                data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
+            }
+        }
+    }
+}
+
+/**  Classifies the given opcode into the correct instruction type.
+ *   Modifies instr to reflect the classification.
+ */
+static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) {
+
+    // First, if the opcode is an alias, convert it to the target instruction.
+    translateAlias(opcode, tokens, tokensCount);
+
+    if (containsString(opcode, BRANCH_OPCODES, 9)) {
+        instr->type = a64inst_BRANCH;
+
+        if (strcmp(opcode, "br") == 0) {
+            instr->data.BranchData.BranchType = a64inst_REGISTER;
+        } else if (strcmp(opcode, "b") == 0) {
+            instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
+        } else {
+            instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
+        }
+    
+    } else if (containsString(opcode, SINGLE_TRANSFER_OPCODES, 2)) {
+        instr->type = a64inst_SINGLETRANSFER;
+        if (*tokens[2] == '[') {
+            instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
+            instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0;
+
+        } else {
+            instr->type = a64inst_LOADLITERAL;
+        }
+
+    // DP Instruction.
+    // DP Register if the third operand is a register.
+    } else if (*tokensCount >= 4 && isRegister(tokens[3])) {
+        instr->type = a64inst_DPREGISTER;
+    } else {
+        instr->type = a64inst_DPIMMEDIATE;
+    }
+
+}
+
+/** Parses a shift string into a ShiftData struct.
+ */
+static ShiftData *parseShift(char *shift) {
+    char buffer[20];
+    strcpy(buffer, shift);
+
+    char *shiftType = strtok(buffer, " ");
+    char *shiftAmount = strtok(NULL, " ");
+
+    ShiftData *data = malloc(sizeof(ShiftData));
+
+    data->type = lastIndexOfString(shiftType, SHIFT_TYPE_OPCODES, 4);
+
+    SKIP_WHITESPACE(shiftAmount);
+    data->immediate = getImmediate(shiftAmount);
+    return data;
+}
+
+/** Parses the addressing mode of a single transfer instruction. (Not load literal)
+ */
+static void parseAddressingMode(a64inst_instruction *instr,  char *tokens[], int tokenCount) {
+    assert(*tokens[2] == '[');
+
+    int operandCount = 0;
+    char *unsplitString = duplicateString(tokens[2]);
+    char **operands = tokeniseOperands(tokens[2], &operandCount);
+
+    int baseRegister = getRegister(operands[0]);
+    
+    instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister;
+
+    if (tokenCount >= 4) {
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(tokens[3]);
+
+    } else if(unsplitString[strlen(unsplitString)-1] == '!') {
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(operands[1]);
+
+    } else if (operandCount == 1 || (!isRegister(operands[1]))) {
+        instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
+        if(operandCount > 1){
+            int offset = getImmediate(operands[1]);
+            instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
+        }
+    } else {
+        if((isRegister(operands[0]) == 1)
+            && (isRegister(operands[1]) == 1)){
+                instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
+                instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getRegister(operands[1]);
+        }
+    }
+}
--- a/src/assembler/parse.h
+++ b/src/assembler/parse.h
@ -0,0 +1,17 @@
+/** @file parse.h
+ *  @brief A function to parse ARMv8 assembly lines into an array of a special
+ *  internal representation of instructions, a64inst_instruction.
+ *
+ *  @author Ethan Dias Alberto 
+ *  @author Saleh Bubshait
+ */
+
+#include "../a64instruction/a64instruction.h"
+
+/** @brief Parses a list of ARMv8 assembly lines into an array of a64inst_instruction.
+ * 
+ * @param asmLines An array of strings, each string is an ARMv8 assembly line.
+ * @param lineCount The number of lines in the asmLines array.
+ * @return An array of a64inst_instruction representing the parsed instructions.
+ */
+a64inst_instruction *parse(char **asmLines, int lineCount);
--- a/src/assembler/string_util.c
+++ b/src/assembler/string_util.c
@ -0,0 +1,173 @@
+/** @file string_util.c
+ *  @brief This file contains the implementation of some string processing
+ *  utility functions used in the assembler.
+ *
+ *  @author Saleh Bubshait
+ */
+
+#include <string.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include "string_util.h"
+#include "../global.h"
+
+/************************************
+ * CONSTANTS
+ ************************************/
+
+static const char *SPECIAL_REGISTERS[] = {"sp", "xzr", "wzr"};
+static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"};
+static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"};
+static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"};
+
+/************************************
+ * FUNCTIONS
+ ************************************/
+
+char *trim(char *str) {
+    // Skip leading whitespace
+    while (isspace(*str)) {
+        str++;
+    }
+
+    // If the string is all whitespace
+    if (*str == '\0') {
+        return str;
+    }
+
+    // Skip trailing whitespace
+    char *end = str + strlen(str) - 1;
+    while (end > str && isspace(*end)) {
+        end--;
+    }
+    end[1] = '\0';
+
+    return str;
+}
+
+bool containsString(char *str, const char *arr[], int arrSize) {
+    for (int i = 0; i < arrSize; i++) {
+        if (strcmp(str, arr[i]) == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
+int lastIndexOfString(char *str, const char *arr[], int arrSize) {
+    for (int i = arrSize - 1; i >= 0; i--) {
+        if (strcmp(str, arr[i]) == 0) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+char *duplicateString(char *str) {
+    char *newStr = malloc(strlen(str) + 1);
+    strcpy(newStr, str);
+    return newStr;
+}
+
+bool isRegister(char *str) {
+    SKIP_WHITESPACE(str);
+    if (str == NULL)
+        return false;
+    
+    if (containsString(str, SPECIAL_REGISTERS, 3))
+        return true;
+
+    return tolower(str[0]) == 'x' || tolower(str[0]) == 'w';
+}
+
+int getRegister(char *str) {
+    SKIP_WHITESPACE(str);
+    if (containsString(str, ZERO_REGISTER_ALIAS, 2)) {
+        return ZERO_REGISTER;
+    }
+
+    return strtol(str + 1, NULL, 10);
+}
+
+int getImmediate(char *str) {
+    SKIP_WHITESPACE(str);
+    if (strlen(str) < 2) {
+        return 0;
+    }
+    
+    if (str[0] != '#')
+        return 0;
+
+    str++; // skip #
+
+    if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 3) == 0) {
+        // Hex
+        return strtol(str + 2, NULL, 16);
+    } else {
+        // Decimal
+        return strtol(str, NULL, 10);
+    }
+
+    return 0;
+}
+
+int getRegisterType(char *str) {
+    SKIP_WHITESPACE(str);
+    
+    return tolower(str[0]) == 'x';
+}
+
+
+/** @brief Translates an alias instruction into its target instruction.
+ * Note: This function modifies the input tokens array and the tokensCount.
+ * Assumes there is enough space in the tokens array to add the new tokens.
+ * 
+ * @param opcode The opcode of the instruction.
+ * @param tokens The tokens of the instruction.
+ * @param tokensCount The number of tokens in the instruction.
+ */
+void translateAlias(char *opcode, char *tokens[], int *tokensCount) {
+
+    int aliasIndex = lastIndexOfString(opcode, ALIAS_OPCODES, 9);
+    if (aliasIndex == -1)
+        return;
+
+    // The instruction is one of the aliases, convert into the target.
+    char *targetOpcode = ALIAS_TARGET_OPCODES[aliasIndex];
+
+    // To correctly encode the zero register, which is either w31 or x31.
+    char *zeroReg = malloc(5 * sizeof(char));
+    *zeroReg = *tokens[1];
+    strcat(zeroReg, "31");
+
+    switch(aliasIndex) {
+        case 0: // cmp -> subs rzr, rn, <op2>
+        case 1: // cmn -> adds rzr, rn, <op2>
+        case 4: // tst -> ands rzr, rn, <op2>
+        // Convert from [instr] reg, <op2> to [instr] rzr, reg, <op2>
+        tokens[0] = targetOpcode;
+        tokens[4] = tokens[3];
+        tokens[3] = tokens[2];
+        tokens[2] = tokens[1];
+        tokens[1] = zeroReg;
+        (*tokensCount)++;
+        break;
+
+        case 2: // neg -> subs rd, rzr, <op2>
+        case 3: // negs -> subs rd, rzr, <op2>
+        case 5: // mvn  -> orn rd, rzr, <op2>
+        case 6: // mov -> orr rd, rzr, rm
+        tokens[0] = targetOpcode;
+        tokens[4] = tokens[3];
+        tokens[3] = tokens[2];
+        tokens[2] = zeroReg;
+        (*tokensCount)++;
+        break;
+
+        default:
+        // Note, the multiply instructions are handled separately.
+        // See DPReg parsing.
+        break;
+    }
+}
--- a/src/assembler/string_util.h
+++ b/src/assembler/string_util.h
@ -0,0 +1,64 @@
+/** @file string_util.h
+ *  @brief This file contains the implementation of some string processing
+ *  utility functions used in the assembler.
+ *
+ *  @author Saleh Bubshait
+ */
+
+/** @brief Skips whitespace characters in a string.
+ *  @param ptr A pointer to the string to skip whitespace in.
+ */
+#define SKIP_WHITESPACE(ptr) do { while (isspace(*ptr)) { ptr++; } } while (0)
+
+/** @brief Removes leading and trailing whitespace from a string.
+ *  Note. This function modifies the input string.
+ *  @param str The string to trim.
+ *  @return A pointer to the first non-whitespace character in the string.
+ */
+char *trim(char *str);
+
+/** @brief Checks if a string is in an array of strings.
+ *
+ *  @param str The string to check.
+ *  @param arr The array of strings to check against.
+ *  @param arrSize The size of the array.
+ *  @return True if the string is in the array, false otherwise.
+ */
+bool containsString(char *str, const char *arr[], int arrSize);
+
+/** @brief Finds the last index of a string in an array of strings.
+ *  Note: If multiple occurances of the string exist, the index of the last 
+ *  occurance is returned! 
+ *
+ *  @param str The string to find.
+ *  @param arr The array of strings to search.
+ *  @param arrSize The size of the array.
+ *  @return The index of the last occurrence of the string in the array, or -1 if not found.
+ */
+int lastIndexOfString(char *str, const char *arr[], int arrSize);
+
+/** @brief Duplicates a string.
+ *  Note: The caller is responsible for freeing the returned string.
+ *
+ *  @param str The string to duplicate.
+ *  @return A pointer to the duplicated string.
+ */
+char *duplicateString(char *str);
+
+/** @brief Checks if a string represents an ARMv8 register.
+ *  A string is considered a register if it is:
+ *  - A general purpose register (x0-x30 or w0-w30)
+ *  - A special register (sp, xzr, wzr)
+ *
+ *  @param str The string to check.
+ *  @return True if the string is a register, false otherwise.
+ */
+bool isRegister(char *str);
+
+int getRegister(char *str);
+
+int getImmediate(char *str);
+
+int getRegisterType(char *str);
+
+void translateAlias(char *opcode, char *tokens[], int *tokensCount);
--- a/src/assembler/symboltable.c
+++ b/src/assembler/symboltable.c
@ -0,0 +1,82 @@
+/** @file symboltable.c
+ *  @brief An Abstract Data Type (ADT) for a symbol table, an array of 
+ *  label-address pairs. Labels are strings and addresses are unsigned integers.
+ *  (uint32_t). The symbol table is implemented as a dynamic array.
+ *
+ *  @author Saleh Bubshait
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "symboltable.h"
+
+symbol_table *st_init(void) {
+    symbol_table *st = malloc(sizeof(symbol_table));
+    if (st == NULL) {
+        fprintf(stderr, "Failed to allocate memory for symbol table\n");
+        exit(EXIT_FAILURE);
+    }
+
+    st->table = malloc(INITIAL_CAPACITY * sizeof(symbol_table_map));
+    if (st->table == NULL) {
+        fprintf(stderr, "Failed to allocate memory for table\n");
+        exit(EXIT_FAILURE);
+    }
+
+    st->size = 0;
+    st->capacity = INITIAL_CAPACITY;
+
+    return st;
+}
+
+/*  Grows the symbol table by a factor of GROWTH_FACTOR *only if the table is full*.
+ */
+static void grow(symbol_table *st) {
+    if (st->size == st->capacity) {
+        st->capacity *= GROWTH_FACTOR;
+        st->table = realloc(st->table, st->capacity * sizeof(symbol_table_map));
+        if (st->table == NULL) {
+            fprintf(stderr, "Failed to reallocate memory for table\n");
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
+void st_insert(symbol_table *st, char *label, address addr) {
+    // If full, grow the table
+    grow(st);
+
+    // Insert the new entry to the end of the table
+    symbol_table_map *entry = &st->table[st->size];
+    entry->label = label;
+    entry->address = addr;
+
+    st->size++;
+}
+
+bool st_contains(symbol_table *st, char *label) {
+    for (int i = 0; i < st->size; i++) {
+       if (strcmp(st->table[i].label, label) == 0) {
+           return true;
+       }
+    }
+
+    return false;
+}
+
+address st_get(symbol_table *st, char *label) {
+    for (int i = 0; i < st->size; i++) {
+        if (strcmp(st->table[i].label, label) == 0) {
+            return st->table[i].address;
+        }
+    }
+
+    fprintf(stderr, "Label %s not found in symbol table\n", label);
+    exit(EXIT_FAILURE);
+}
+
+void st_free(symbol_table *st) {
+    free(st->table);
+    free(st);
+}
--- a/src/assembler/symboltable.h
+++ b/src/assembler/symboltable.h
@ -0,0 +1,75 @@
+/** @file symboltable.h
+ *  @brief An Abstract Data Type (ADT) for a symbol table, an array of 
+ *  label-address pairs. Labels are strings and addresses are unsigned integers.
+ *  (uint32_t). The symbol table is implemented as a dynamic array.
+ *
+ *  @author Saleh Bubshait
+ */
+
+#ifndef __SYMBOLTABLE__
+#define __SYMBOLTABLE__
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define INITIAL_CAPACITY 5
+#define GROWTH_FACTOR 2
+
+typedef uint32_t address;
+
+/** An entry in the symbol table, a label-address pair.
+ */
+typedef struct {
+    char *label;
+    address address;
+} symbol_table_map;
+
+/** The symbol table ADT.
+ */
+typedef struct {
+    symbol_table_map* table; // entries
+    int size; // number of entries
+    int capacity; // size of the table. capacity >= size
+} symbol_table;
+
+/** @brief Initializes a new symbol table.
+ *
+ *  @return A pointer to the new symbol table.
+ */
+symbol_table *st_init(void);
+
+/** @brief Inserts a new label-address pair to the symbol table.
+ *  Grows the table if it is full. If the label already exists in the table,
+ *  another entry with the same label is inserted (for performance).
+ *
+ *  @param st A pointer to the target symbol table.
+ *  @param label The label to insert.
+ *  @param addr The address to insert.
+ */
+void st_insert(symbol_table *st, char *label, address addr);
+
+/** @brief Checks if a label exists in the symbol table.
+ *
+ *  @param st A pointer to the target symbol table.
+ *  @param label The label to check.
+ *  @return True if the label exists in the table, false otherwise.
+ */
+bool st_contains(symbol_table *st, char *label);
+
+/** @brief Gets the address of a label in the symbol table.
+ *  st_contains should be called before calling this function!
+ *
+ *  @param st A pointer to the target symbol table.
+ *  @param label The label to get the address of.
+ *  @return The address of the label in the table.
+ */
+address st_get(symbol_table *st, char *label);
+
+/** @brief Frees the memory allocated for the symbol table.
+ *
+ *  @param st A pointer to the target symbol table.
+ */
+void st_free(symbol_table *st);
+
+#endif
--- a/src/assembler/tokenise.c
+++ b/src/assembler/tokenise.c
@ -0,0 +1,106 @@
+/** @file tokenise.c
+ *  @brief Functions to tokenise lines of assembly and operand strings.
+ *
+ *  @author Saleh Bubshait
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include "tokenise.h"
+#include "string_util.h"
+
+#define MAX_TOKEN_COUNT 6
+#define MAX_OPERAND_COUNT 5
+#define OPERAND_DELIMITER ", "
+#define OPEN_BRACKET '['
+#define CLOSE_BRACKET ']'
+
+char **tokenise(char *line, int *numTokens) {
+    char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\
+    if (!tokens) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(EXIT_FAILURE);
+    }
+
+    line = trim(line);
+    
+    *numTokens = 0;
+    char *token = strtok(line, " ");
+    assert(token != NULL);
+
+    tokens[(*numTokens)++] = token;
+
+    char *operandStart = strtok(NULL, "");
+    if (operandStart == NULL) {
+        // No operands. Return the first (opcode) token.
+        return tokens;
+    }
+
+    SKIP_WHITESPACE(operandStart);
+    
+    // Use tokeniseOperands to tokenise the operands
+    int operandTokensCount = 0;
+    char **operandTokens = tokeniseOperands(operandStart, &operandTokensCount);
+
+    for (int i = 0; i < operandTokensCount; i++) {
+        tokens[(*numTokens)++] = operandTokens[i];
+    }
+
+    
+    free(operandTokens);
+    return tokens;
+}
+
+char **tokeniseOperands(char *line, int *numTokens) {
+    char **tokens = malloc(MAX_OPERAND_COUNT * sizeof(char *));
+    if (!tokens) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(EXIT_FAILURE);
+    }
+
+    SKIP_WHITESPACE(line);
+
+    // Remove leading and trailing brackets if they exist
+    if (*line == OPEN_BRACKET) {
+        line++; // skip '['
+        char *end = line + strlen(line) - 1;
+        while (end > line && *end != CLOSE_BRACKET) {
+            end--;
+        }
+        if (*end == CLOSE_BRACKET) {
+            *end = '\0';
+        }
+    }
+
+    line = trim(line);
+
+    *numTokens = 0;
+    bool inBracket = false;
+    char *currentToken = line;
+
+    for (char *c = line; *c != '\0'; ++c) {
+        if (*c == '[') {
+            inBracket = true;
+        } else if (*c == ']') {
+            inBracket = false;
+        }
+
+        if (*c == ',' && !inBracket) {
+            *c = '\0';
+            tokens[(*numTokens)++] = currentToken;
+            currentToken = c + 1; // skip the comma
+            SKIP_WHITESPACE(currentToken);
+        }
+    }
+
+    if (*currentToken != '\0') {
+        tokens[*numTokens] = currentToken;
+        (*numTokens)++;
+    }
+
+    return tokens;
+}
--- a/src/assembler/tokenise.h
+++ b/src/assembler/tokenise.h
@ -0,0 +1,26 @@
+/** @file tokenise.h
+ *  @brief Functions to tokenise lines of assembly and operand strings.
+ *
+ *  @author Saleh Bubshait
+ */
+
+/** @brief Tokenises a line of assembly code. The first two tokens are separated
+ *  by a space, and the rest are separated by commas.
+ *  e.g., "add x1, x2, x3" -> ["add", "x1", "x2", "x3"]. Handles and skips any
+ *  whitespaces, e.g., "  add   x1,    x2,#4    " -> ["add", "x1", "x2", "#4"].
+ *  @param line The line to tokenise.
+ *  @param numTokens A pointer to an integer to store the number of tokens.
+ *  @return An array of strings containing the tokens.
+ */
+char **tokenise(char *line, int *numTokens);
+
+/** @brief Tokenises the operands of an instruction. The operands are separated
+ *  by commas. Handles and skips any whitespaces, e.g., "x1, x2, #4" -> ["x1", "x2", "#4"].
+ *  If the line starts with a bracket, it is removed and the closing bracket.
+ *  Note. It also removes anything after the brackets, for example:
+ *  "[x1, x2, #4]!" -> ["x1", "x2", "#4"].
+ *  @param line The line to tokenise.
+ *  @param numTokens A pointer to an integer to store the number of tokens.
+ *  @return An array of strings containing the tokens.
+ */
+char **tokeniseOperands(char *line, int *numTokens);
--- a/src/util/binary_util.c
+++ b/src/util/binary_util.c
@ -7,6 +7,9 @@

 #include <assert.h>
 #include "binary_util.h"
+#include <stdint.h>
+#include <stdbool.h>
+#include "binary_util.h"

 word getBits(word wrd, uint8_t lsb, uint8_t msb) {

@ -17,6 +20,23 @@ word getBits(word wrd, uint8_t lsb, uint8_t msb) {
    return wrd >> lsb;
 }

+void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) {
+    // Ensure LSB and MSB are within range of word size, and in the correct order
+    assert(lsb < msb && msb <= 32);
+
+    // Create a mask with 1s in the range [lsb, msb) and 0s elsewhere
+    word mask = 0;
+    for (uint8_t i = lsb; i < msb; i++) {
+        mask |= 1 << i;
+    }
+
+    // Clear the bits in the range [lsb, msb) in the word
+    *wrd &= ~mask;
+
+    // Set the bits in the range [lsb, msb) to the value
+    *wrd |= (value << lsb) & mask;    
+}
+
 dword max(dword a, dword b) {
    return a > b ? a : b;
 }
--- a/src/util/binary_util.h
+++ b/src/util/binary_util.h
@ -20,6 +20,17 @@
 */
 word getBits(word wrd, uint8_t lsb, uint8_t msb);

+/** @brief Sets a range of bits of a word (32-bit unsigned integer) to a value.
+ *  The range is inclusive of the lsb and exclusive of the msb. The value should
+ *  fit within the range.
+ *  
+ *  @param wrd A pointer to the word to set bits in.
+ *  @param lsb The least significant bit of the range to set, inclusive.
+ *  @param msb The most significant bit of the range to set, exclusive.
+ *  @param value The value to set the bits to.
+ */
+void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value);
+
 /** @brief Returns the maximum of two given two double words (uint64_t).
 *
 *  @param a The first double word.
--- a/src/util/fileio.c
+++ b/src/util/fileio.c
@ -11,6 +11,8 @@
 #include "fileio.h"
 #include "../global.h"

+#define MAX_ASM_LINE_LENGTH 300
+
 byte *fileio_loadBin(const char *filePath, size_t memorySize) {
    FILE *file = fopen(filePath, "rb");
    if (file == NULL) {
@ -47,5 +49,87 @@ byte *fileio_loadBin(const char *filePath, size_t memorySize) {
    if (i < byteCount) {
        memset(fileData + i, 0, (byteCount - i) * sizeof(byte));   
    }
+    
    return fileData;
 }
+
+void writeBinaryFile(word instrs[], char outputFile[], int numInstrs) {
+    FILE *fp = fopen(outputFile, "wb");
+    if (fp == NULL) {
+        fprintf(stderr, "Error: Could not open file %s\n", outputFile);
+        exit(EXIT_FAILURE);
+    }
+    
+    fwrite(instrs, sizeof(word), numInstrs, fp);
+    fclose(fp);
+}
+
+int countLines(char *filename) {
+    FILE *file = fopen(filename, "r");
+    if (file == NULL) {
+        fprintf(stderr, "Error: Could not read file %s\n", filename);
+        exit(EXIT_FAILURE);
+    }
+
+    int count = 0;
+    char c;
+    char prevC = '\n';
+
+    while ((c = fgetc(file)) != EOF) {
+        if (c == '\n' && prevC != '\n') {
+            count++;
+        }
+        prevC = c;
+    }
+
+    return count;
+}
+
+char **readAssemblyFile(char filename[], int lineCount) {
+    FILE *fp = fopen(filename, "r");
+    if (fp == NULL) {
+        fprintf(stderr, "Error: Could not read file %s\n", filename);
+        exit(EXIT_FAILURE);
+    }
+
+    char **lines = malloc(sizeof(char *) * lineCount + 1);
+    if (lines == NULL) {
+        fprintf(stderr, "Error: Could not allocate memory to store the assembly lines");
+        exit(EXIT_FAILURE);
+    }
+
+    rewind(fp); // Back to the beginning of the file.
+
+    char buffer[MAX_ASM_LINE_LENGTH];
+    int currentLine = 0;
+    
+    while (fgets(buffer, MAX_ASM_LINE_LENGTH, fp) != NULL) {
+        if (buffer[strlen(buffer) - 1] != '\n') {
+            // It was actually longer than the maximum.
+            // NOTE: I believe this must mean that this is a malformed line, so throw an error.
+            fprintf(stderr, "Error: Line %d in the file %s is too long\n", currentLine, filename);
+            exit(EXIT_FAILURE);
+        }
+
+        if (*buffer == '\n') {
+            // Skip empty lines.
+            continue;
+        }
+
+        lines[currentLine] = malloc(strlen(buffer) + 1);
+        if (lines[currentLine] == NULL) {
+            fprintf(stderr, "Error: Could not allocate memory to store the assembly line");
+            exit(EXIT_FAILURE);
+        }
+
+        strcpy(lines[currentLine], buffer);
+        currentLine++;
+    }
+    
+    if (ferror(fp)) {
+        fprintf(stderr, "Error: Could not read file %s", filename);
+        exit(EXIT_FAILURE);
+    }
+
+    return lines;
+}
--- a/src/util/fileio.h
+++ b/src/util/fileio.h
@ -7,6 +7,8 @@

 #ifndef __FILEIO__
 #define __FILEIO__
+
+#include <stdio.h>
 #include <stdlib.h>
 #include "../global.h"

@ -23,4 +25,34 @@
 */
 byte *fileio_loadBin(const char *filePath, size_t memorySize);

+/** @brief Reads an assembly file line by line, storing each line in a char array.
+ *  The number of lines in the file is determined by counting the number of newline
+ *  characters in the file.
+ *
+ *  @param filename The path to the assembly file to read.
+ *  @param lineCount The number of lines in the file.
+ *  @return An array of char arrays, each containing a line from the file.
+ *
+ *  @see countLines
+ */
+char **readAssemblyFile(char filename[], int lineCount);
+
+/** @brief Writes an array of instructions, represented as unsigned int, to a 
+ *  binary file. The number of instructions to write is specified by numInstrs.
+ *  
+ *  @param instrs The array of instructions to write to the file.
+ *  @param outputFile The path to the binary file to write to.
+ *  @param numInstrs The number of instructions in the array.
+ *
+ *  @see countLines
+ */
+void writeBinaryFile(word instrs[], char outputFile[], int numInstrs);
+
+/** @brief Counts the number of lines in a file. Empty lines are not counted.
+ *
+ *  @param filename The path to the file to count the lines of.
+ *  @return The number of lines in the file.
+ */
+int countLines(char *filename);
+
 #endif