Merge branch 'assembler-s' into 'assembler'

Assembler s See merge request lab2324_summer/armv8_43!8
2024-06-12 14:53:32 +00:00 · 2024-06-12 14:53:32 +00:00 · 1fd7e441b6
commit 1fd7e441b6
parent 654d6fdbb9 53ab6a2bf6
21 changed files with 208 additions and 600 deletions
--- a/src/Makefile
+++ b/src/Makefile
@ -9,7 +9,7 @@ CFLAGS  ?= -std=c17 -g\

 all: assemble emulate

-assemble: assemble.o
+assemble: assemble.o parser.o fileio.o
 emulate: emulate.o

 clean:
--- a/src/a64instruction/a64instruction.h
+++ b/src/a64instruction/a64instruction.h
--- a/src/a64instruction/a64instruction_Branch.h
+++ b/src/a64instruction/a64instruction_Branch.h
@ -1,6 +1,6 @@
 #include <stdbool.h>
 #include "a64instruction_global.h"
-#include "global.h"
+#include "../global.h"

 typedef enum {
    a64inst_UNCONDITIONAL = 0,
--- a/src/a64instruction/a64instruction_DP.h
+++ b/src/a64instruction/a64instruction_DP.h
--- a/src/a64instruction/a64instruction_DPImmediate.h
+++ b/src/a64instruction/a64instruction_DPImmediate.h
--- a/src/a64instruction/a64instruction_DPRegister.h
+++ b/src/a64instruction/a64instruction_DPRegister.h
--- a/src/a64instruction/a64instruction_Directive.h
+++ b/src/a64instruction/a64instruction_Directive.h
@ -1,4 +1,4 @@
-#include "global.h"
+#include "../global.h"

 typedef struct {
    word value;
--- a/src/a64instruction/a64instruction_Label.h
+++ b/src/a64instruction/a64instruction_Label.h
--- a/src/a64instruction/a64instruction_SingleTransfer.h
+++ b/src/a64instruction/a64instruction_SingleTransfer.h
@ -1,6 +1,6 @@
 #include <stdbool.h>
 #include "a64instruction_global.h"
-#include "global.h"
+#include "../global.h"

 typedef enum {
    a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER = 1,
--- a/src/a64instruction/a64instruction_global.h
+++ b/src/a64instruction/a64instruction_global.h
--- a/src/assemble.c
+++ b/src/assemble.c
@ -1,8 +1,35 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "parser.c"
-#include "fileio.c"
+#include "a64instruction/a64instruction.h"
+#include "parser.h"
+#include "fileio.h"
+#include "parser.h"
+#include "twopassassembly.c"

 int main(int argc, char **argv) {
+  // Check the arguments
+  if (argc < 3) {
+    fprintf(stderr, "Error: A source file and an object output file are required. Syntax: ./assemble <file_in> <file_out>");
+    return EXIT_FAILURE;
+  }
+
+  // Load the source file into memory
+  int lineCount = countLines(argv[1]);
+  char **source = readAssemblyFile(argv[1], lineCount);
+
+  // Parse the source file
+  a64inst_instruction *instructions = parse(source, lineCount);
+  
+  // First Pass: Create the symbol table
+  st *table = firstPass(instructions, lineCount);
+
+  // Second Pass: Assemble the instructions
+  word *binary = secondPass(instructions, lineCount, table); // 1000 is just a temp fix.
+
+  // Write the binary to the output file
+  writeBinaryFile(binary, argv[2], lineCount); // 1000 is just a temp fix.
+
+  /* TODO: FREE MEMORY!! */
+
  return EXIT_SUCCESS;
 }
--- a/src/decode.h
+++ b/src/decode.h
@ -1,5 +1,5 @@
 #include "global.h"
-#include "a64instruction.h"
+#include "a64instruction/a64instruction.h"

 #define HALT_WORD 0x8a000000

--- a/src/emulate.c
+++ b/src/emulate.c
@ -1,6 +1,6 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "a64instruction.h"
+#include "a64instruction/a64instruction.h"
 #include "emulator.h"
 #include "fileio.h"
 #include "global.h"
--- a/src/execute.c
+++ b/src/execute.c
@ -1,448 +0,0 @@
-#include <stdlib.h>
-#include <assert.h>
-#include "execute.h"
-#include "print.h"
-
-// Defines the maximum value that can be held in a register
-#define MAX_REG_VAL ((1 << DWORD_BITS) - 1)
-
-// The number of bits to shift the immediate value in an arithmetic immediate data processing
-// instruction if the shift flag is enabled.
-#define DPI_ARITHM_SHIFT 12 
-
-// The number of bits to shift the immediate value in a wide move immediate data processing
-// instruction if the shift flag is enabled.
-#define DPI_WIDEMOV_SHIFT 16
-
-// Prototypes
-void execute_SDT(Machine *state, a64inst_instruction *inst);
-void execute_Branch(Machine *state, a64inst_instruction *inst);
-void executeMultiply(Machine *state, a64inst_instruction *inst);
-
-// Return maximum of two dwords
-static dword max(dword a, dword b) {
-    return a > b ? a : b;
-}
-
-// Truncate a given value to the size of a word or dword depending on the register type
-static dword truncateValue(dword value, a64inst_regType regType) {
-    if (regType == a64inst_X) {
-        return value;
-    } else {
-        return (word)value;
-        //return value & (dword)(((dword)1 << WORD_BITS) - 1);
-    }
-}
-
-// Sign extend a given value to a 64-bit signed integer given the number of bits
-static int64_t signExtend(dword value, unsigned int n) {
-    if (n == 0 || n >= 64) {
-        // If n_bits is 0 or greater than or equal to 64, return the value as is
-        return (int64_t)value;
-    }
-    
-    uint64_t sign_bit_mask = (uint64_t)1 << (n - 1);
-    
-    // Mask to isolate the n-bit value
-    uint64_t n_bit_mask = (sign_bit_mask << 1) - 1;
-
-    // Check if the sign bit is set
-    if (value & sign_bit_mask) {
-        // Sign bit is set, extend the sign
-        return (int64_t)(value | ~n_bit_mask);
-    } else {
-        // Sign bit is not set, return the value as is
-        return (int64_t)(value & n_bit_mask);
-    }
-}
-
-// Read from processor register, ensuring that a valid register specifier is given
-// and accounting for the case where the zero register is accessed. Truncate
-// the 32 most significant bits stored in the R register when reading W register.
-static dword readRegister(Machine *state, a64inst_regSpecifier reg, a64inst_regType regType) {
-    assert(reg <= REGISTER_COUNT);
-    if (reg == ZERO_REGISTER) {
-        return 0;
-    } else {
-        return truncateValue(state->registers[reg], regType);
-    }
-}
-
-// TODO:
-
-// Write to a processor register, ensuring that a valid register specifier is given
-// and truncating the value being written when it can't fit in the specified register
-static void writeRegister(Machine *state, a64inst_regSpecifier reg, a64inst_regType regType, dword value) {
-    assert(reg <= REGISTER_COUNT);
-    if (reg != ZERO_REGISTER) {
-        state->registers[reg] = truncateValue(value, regType);
-    }
-}
-
-// Returns the position of the MSB of the given register type
-inline static dword getMSBPos(a64inst_regType regType) {
-    return (regType ? DWORD_BITS : WORD_BITS) - 1;
-}
-
-// Returns the MSB of the given value assuming it's of the size stored in the given register type
-inline static uint8_t getMSB(dword value, a64inst_regType regType) {
-    return value >> getMSBPos(regType); 
-}
-
-// Updates N and Z condition codes given the machine and a result value
-static void updateCondNZ(Machine *state, dword result, a64inst_regType regType) {
-    state->conditionCodes.Negative = getMSB(result, regType);
-    state->conditionCodes.Zero = result == 0;
-}
-
-// Execute a data processing immediate instruction
-static void executeDPImmediate(Machine *state, a64inst_instruction *inst) {
-    assert(inst->type == a64inst_DPIMMEDIATE);
-    
-    a64inst_regType regType = inst->data.DPImmediateData.regType;
-    a64inst_regSpecifier dest = inst->data.DPImmediateData.dest;
-    switch(inst->data.DPImmediateData.DPIOpType) {
-        
-        // Execute an arithmetic immediate data processing instruction
-        case a64inst_DPI_ARITHM:;
-
-            // If shift flag is enabled, logical left shift by the number of bits specified by the architecture
-            dword arithmImm = inst->data.DPImmediateData.processOpData.arithmData.immediate;
-            dword srcVal = state->registers[inst->data.DPImmediateData.processOpData.arithmData.src];
-            if (inst->data.DPImmediateData.processOpData.arithmData.shiftImmediate) {
-                arithmImm = truncateValue(arithmImm << DPI_ARITHM_SHIFT, regType); 
-            }
-
-            switch(inst->data.DPImmediateData.processOp) {
-                
-                dword result;
-                case(a64inst_ADDS):
-                    result = srcVal + arithmImm;
-                    writeRegister(state, dest, regType, result);
-                    
-                    updateCondNZ(state, result, regType);
-                    state->conditionCodes.Overflow = max(srcVal, arithmImm) > result;
-                    state->conditionCodes.Carry = state->conditionCodes.Overflow;
-                    break;
-                
-                case(a64inst_ADD):
-                    writeRegister(state, dest, regType, srcVal + arithmImm);
-                    break;
-                
-                case(a64inst_SUBS):
-                    result = srcVal - arithmImm;
-                    writeRegister(state, dest, regType, result);
-                    
-                    updateCondNZ(state, result, regType);
-                    state->conditionCodes.Overflow = srcVal < result;
-                    state->conditionCodes.Carry = state->conditionCodes.Overflow;
-                    break;
-                
-                case(a64inst_SUB):
-                    writeRegister(state, dest, regType, srcVal - arithmImm);
-                    break;
-
-                // Unknown opcode detected!
-                default:
-                    fprintf(stderr, "Unknown opcode detected in a DPI arithmetic instruction!\n");
-                    break;
-            }
-            break;
-
-        // Execute a wide move immediate data processing instruction
-        case a64inst_DPI_WIDEMOV:;
-            uint8_t shiftScalar = inst->data.DPImmediateData.processOpData.wideMovData.shiftScalar; 
-            dword wideMovImm = inst->data.DPImmediateData.processOpData.wideMovData.immediate;
-            
-            // NOTE: Not checking that shiftScalar has valid value for 32bit registers. Possibly add explicit error.
-            //printf("%x\n", wideMovImm << (shiftScalar * DPI_WIDEMOV_SHIFT) & );
-            wideMovImm = truncateValue(wideMovImm << (shiftScalar * DPI_WIDEMOV_SHIFT), regType);
-            switch(inst->data.DPImmediateData.processOp) {
-
-                case(a64inst_MOVN):
-                    writeRegister(state, dest, regType, ~wideMovImm);
-                    break;
-
-                case(a64inst_MOVZ):
-                    writeRegister(state, dest, regType, wideMovImm);
-                    break;
-
-                case(a64inst_MOVK):;
-                    dword result = readRegister(state, dest, regType);
-                    result = (result & ~(((1lu << DPI_WIDEMOV_SHIFT) - 1) << shiftScalar * DPI_WIDEMOV_SHIFT)) | wideMovImm;
-                    writeRegister(state, dest, regType, result); 
-                    break;
-                
-                default:
-                    fprintf(stderr, "Unknown opcode detected in a DPI wide move instruction!\n");
-                    break;
-            }
-            break;
-
-        // Unknown instruction detected!    
-        default:
-            fprintf(stderr, "Attempting to execute instruction with unknown DPI operand type!\n");
-            break;
-    }
-}
-
-// Execute a data processing register instruction
-static void executeDPRegister(Machine *state, a64inst_instruction *inst) {
-    assert(inst->type == a64inst_DPREGISTER);
-
-    a64inst_regType regType = inst->data.DPRegisterData.regType;
-    a64inst_regSpecifier dest = inst->data.DPRegisterData.dest;
-    dword src1Val = readRegister(state, inst->data.DPRegisterData.src1, regType);
-    dword src2Val = readRegister(state, inst->data.DPRegisterData.src2, regType);
-
-    switch(inst->data.DPRegisterData.DPROpType) {
-
-        // Execute an arithmetic or logic register data processing instruction
-        case a64inst_DPR_ARITHMLOGIC:;
-
-            // Apply shift to value held in second register
-            a64inst_DPRegister_ArithmLogicData *arithmLogicData = &inst->data.DPRegisterData.processOpData.arithmLogicData;
-            uint8_t shiftAmount = arithmLogicData->shiftAmount;
-            switch(arithmLogicData->shiftType) {
-
-                case a64inst_LSL:
-                    src2Val = truncateValue(src2Val << shiftAmount, regType);
-                    break;
-  
-                case a64inst_LSR:
-                    src2Val = truncateValue(src2Val >> shiftAmount, regType);
-                    break;
-  
-                case a64inst_ASR:
-                    if (regType == a64inst_X) {
-                        src2Val = truncateValue((int64_t)src2Val >> shiftAmount, regType);
-                    } else {
-                        src2Val = truncateValue((int32_t)src2Val >> shiftAmount, regType);
-                    }
-                    break;
-  
-                case a64inst_ROR:
-                    if (arithmLogicData->type != a64inst_DPR_LOGIC) {
-                        fprintf(stderr, "Attempting to perform ROR shift on non-logic register data processing instruction!\n");
-                    }
-                    src2Val = truncateValue(src2Val >> shiftAmount | src2Val << (getMSBPos(regType) - shiftAmount), regType);
-                    break;
-
-                default:
-                    fprintf(stderr, "Attempting to execute arithmetic/logic register data processing instruction with invalid shift type!\n");
-                    break;
-            }
-
-            // Negate second operand if negShiftedSrc2 flag is enabled
-            if (arithmLogicData->negShiftedSrc2) {
-                src2Val = truncateValue(~src2Val, regType);
-            }
-            
-            dword result;
-            switch(arithmLogicData->type) {
-
-                case a64inst_DPR_ARITHM:
-                    switch(inst->data.DPRegisterData.processOp) {
-                    
-                        case(a64inst_ADDS):
-                            result = src1Val + src2Val;
-                            writeRegister(state, dest, regType, result);
-                            
-                            updateCondNZ(state, result, regType);
-                            state->conditionCodes.Overflow = max(src1Val, src2Val) > result;
-                            state->conditionCodes.Carry = state->conditionCodes.Overflow;
-                            break;
-                        
-                        case(a64inst_ADD):
-                            writeRegister(state, dest, regType, src1Val + src2Val);
-                            break;
-                        
-                        case(a64inst_SUBS):
-                            result = src1Val - src2Val;
-                            writeRegister(state, dest, regType, result);
-                            
-                            updateCondNZ(state, result, regType);
-                            state->conditionCodes.Overflow = getMSB(src1Val, regType) != getMSB(src2Val, regType) && getMSB(src1Val, regType) != getMSB(result, regType);
-                            state->conditionCodes.Carry = src1Val >= src2Val;
-                            break;
-                        
-                        case(a64inst_SUB):
-                            writeRegister(state, dest, regType, src1Val - src2Val);
-                            break;
-
-                        // Unknown opcode detected!
-                        default:
-                            fprintf(stderr, "Unknown opcode detected in a DPI arithmetic instruction!\n");
-                            break;
-                    }
-                    break;
-
-                case a64inst_DPR_LOGIC:
-                    switch(inst->data.DPRegisterData.processOp) {
-                        
-                        case a64inst_AND:
-                            writeRegister(state, dest, regType, src1Val & src2Val);
-                            break;
-
-                        case a64inst_OR:
-                            writeRegister(state, dest, regType, src1Val | src2Val);
-                            break;
-
-                        case a64inst_XOR:
-                            writeRegister(state, dest, regType, src1Val ^ src2Val);
-                            break;
-                        
-                        case a64inst_AND_FLAGGED:;
-                            result = src1Val & src2Val;
-                            writeRegister(state, dest, regType, result);
-                            state->conditionCodes.Overflow = 0;
-                            state->conditionCodes.Carry = 0;
-                            updateCondNZ(state, result, regType);
-                            break;
-                    }
-                    break;
-
-                default:
-                    fprintf(stderr, "Attempting to execute an instruction with an unknown DPR arithmetic or logic subtype!\n");
-                    break;                
-            }
-            break;
-
-        // Execute a multiply register data processing instruction
-        case a64inst_DPR_MULTIPLY:
-            break;
-
-        // Unknown instruction detected!    
-        default:
-            fprintf(stderr, "Attempting to execute instruction with unknown DPR operand type!\n");
-            break;
-    }
-}
-
-void execute(Machine *state, a64inst_instruction *inst) {
-
-    switch (inst->type) {
-
-        // Halt the program
-        case a64inst_HALT:
-            break;
-
-        // Execute a data processing immediate instruction
-        case a64inst_DPIMMEDIATE:
-            executeDPImmediate(state, inst);
-            break;
-
-        // Execute a branch instruction
-        case a64inst_BRANCH:
-            execute_Branch(state, inst);
-            break;
-
-        // Execute a data processing register instruction
-        case a64inst_DPREGISTER:
-            if (inst->data.DPRegisterData.DPROpType == a64inst_DPR_MULTIPLY)
-                executeMultiply(state, inst);
-            else
-                executeDPRegister(state, inst);
-            break;
-
-        case a64inst_SINGLETRANSFER: 
-            execute_SDT(state, inst);
-            break;
-
-        // Unknown instruction
-        default:
-            break;
-    }
-
-}
-
-void execute_SDT(Machine *state, a64inst_instruction *inst) {
-    word address;
-    bool isLoad;
-    if (inst->data.SingleTransferData.SingleTransferOpType == a64inst_SINGLE_TRANSFER_LOAD_LITERAL) {
-        // Load Literal
-        isLoad = true;
-        address = state->pc + inst->data.SingleTransferData.processOpData.loadLiteralData.offset * 4;
-    } else {
-        address = state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.base];
-        isLoad = inst->data.SingleTransferData.processOpData.singleDataTransferData.transferType == a64inst_LOAD;
-        switch (inst->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode) {
-            case a64inst_UNSIGNED_OFFSET:
-                address += inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset * (inst->data.SingleTransferData.regType == a64inst_W ? 4 : 8);
-                break;
-            case a64inst_REGISTER_OFFSET:
-                address += state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg];
-                break;
-            case a64inst_PRE_INDEXED:
-                address += inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset;
-                state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.base] = address;
-                break;
-            case a64inst_POST_INDEXED:
-                state->registers[inst->data.SingleTransferData.processOpData.singleDataTransferData.base] = address + inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset;
-                break;
-        }
-    }
-
-    if (isLoad) {
-        if (inst->data.SingleTransferData.regType == a64inst_W) {
-            // 32 bit access
-            state->registers[inst->data.SingleTransferData.target] = readWord(state->memory, address);
-        } else {
-            state->registers[inst->data.SingleTransferData.target] = readDoubleWord(state->memory, address);
-        }
-    } else {
-        *(word *)(state->memory + address) = state->registers[inst->data.SingleTransferData.target];
-
-         // Update base register if post indexed
-        if (inst->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode == a64inst_POST_INDEXED) {
-            writeRegister(state, inst->data.SingleTransferData.processOpData.singleDataTransferData.base, inst->data.SingleTransferData.regType == a64inst_W,  address + inst->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset);
-        }
-    }
-
-}
-
-static bool isConditionMet(Machine* state, a64inst_ConditionType cond) {
-    switch(cond) {
-        case EQ:
-            return state->conditionCodes.Zero;
-        case NE:
-            return !state->conditionCodes.Zero;
-        case GE:
-            return state->conditionCodes.Negative == state->conditionCodes.Overflow;
-        case LT:
-            return state->conditionCodes.Negative != state->conditionCodes.Overflow;
-        case GT:
-            return !state->conditionCodes.Zero && (state->conditionCodes.Negative == state->conditionCodes.Overflow);
-        case LE:
-            return state->conditionCodes.Zero || (state->conditionCodes.Negative != state->conditionCodes.Overflow);
-        case AL:
-            return true;
-        default:
-            fprintf(stderr, "Unknown condition specified!\n");
-            exit(1);
-    }
-}
-
-void execute_Branch(Machine *state, a64inst_instruction *inst) {
-    switch (inst->data.BranchData.BranchType) {
-        case a64inst_UNCONDITIONAL:
-            state->pc += signExtend(inst->data.BranchData.processOpData.unconditionalData.unconditionalOffset * 4, 26);
-            break;
-            
-        case a64inst_REGISTER:
-            state->pc = state->registers[inst->data.BranchData.processOpData.registerData.src];
-            break;
-
-        case a64inst_CONDITIONAL:
-            if (isConditionMet(state, inst->data.BranchData.processOpData.conditionalData.cond)) {
-                state->pc += signExtend(inst->data.BranchData.processOpData.conditionalData.offset * 4, 19);
-            }
-            break;
-    }
-}
-
-void executeMultiply(Machine *state, a64inst_instruction *inst) {
-    dword product = state->registers[inst->data.DPRegisterData.src1] * state->registers[inst->data.DPRegisterData.src2];
-    dword sum = readRegister(state, inst->data.DPRegisterData.processOpData.multiplydata.summand, inst->data.DPRegisterData.regType) + (inst->data.DPRegisterData.processOpData.multiplydata.negProd ? -product : product);
-    writeRegister(state, inst->data.DPRegisterData.dest, inst->data.DPRegisterData.regType, sum);
-}
--- a/src/execute.h
+++ b/src/execute.h
@ -1,6 +1,6 @@
 #ifndef __EXECUTE__
 #define __EXECUTE__
-#include "a64instruction.h"
+#include "a64instruction/a64instruction.h"
 #include "emulator.h"

 void execute(Machine *state, a64inst_instruction *inst);
--- a/src/fileio.c
+++ b/src/fileio.c
@ -1,7 +1,8 @@
-#include <stdio.h>
 #include <string.h>
+#include "global.h"
+#include "fileio.h"

-#define MAX_ASM_LINE_LENGTH 30
+#define MAX_ASM_LINE_LENGTH 300

 int isValidFileFormat(char filename[], char expectedExtension[]){
    char *pointLoc = strrchr(filename, '.');
@ -14,67 +15,76 @@ int isValidFileFormat(char filename[], char expectedExtension[]){
    return(0);
 }

-int writeBinaryFile(word instrs[], char outputFile[], int numInstrs){
-
-    if (!isValidFileFormat(outputFile, "bin")){
-        return(-1);
+void writeBinaryFile(word instrs[], char outputFile[], int numInstrs) {
+    FILE *fp = fopen(outputFile, "wb");
+    if (fp == NULL) {
+        fprintf(stderr, "Error: Could not open file %s\n", outputFile);
+        exit(EXIT_FAILURE);
    }
-
-    FILE *fp; 
    
-    fp = fopen(outputFile, "wb");
-
-    if(fp == NULL){
-        return(-1);
-    }
-
-    fwrite(instrs, 4, sizeof(word) * numInstrs, fp);
+    fwrite(instrs, sizeof(word), numInstrs, fp);
    fclose(fp);
-
-    return(0);
 }

-char **readAssemblyFile(char inputFile[]) {
-    if (!isValidFileFormat(inputFile, "s")){
-        return(NULL);
+int countLines(char *filename) {
+    FILE *file = fopen(filename, "r");
+    if (file == NULL) {
+        fprintf(stderr, "Error: Could not read file %s\n", filename);
+        exit(EXIT_FAILURE);
    }

-    FILE *fp = fopen(inputFile, "r");
+    int count = 0;
+    char c;

-    if (fp == NULL){
-        return(NULL);
-    }
-
-    int lineCount = 0;
-    char ch;
-    while ((ch = fgetc(fp)) != EOF)
-    {
-        if (ch == '\n' || ch == '\0')
-        {
-            lineCount++;
+    while ((c = fgetc(file)) != EOF) {
+        if (c == '\n') {
+            count++;
        }
    }
-    
-    char **heap = malloc(sizeof(char *) * lineCount);

-    rewind(fp);
-
-    for( int i=0; i<lineCount; i++) {
-
-        char tmp[512];
-
-        // read line into tmp
-        fgets(tmp, MAX_ASM_LINE_LENGTH-1, fp);
-
-        int size = strlen(tmp);
-
-        char *line = malloc(size+1);  // allocate mem for text line
-
-        strcpy(line, tmp);
-
-        heap[i] = line;  // store line pointer
+    return count;
+}

+char **readAssemblyFile(char filename[], int lineCount) {
+    FILE *fp = fopen(filename, "r");
+    if (fp == NULL) {
+        fprintf(stderr, "Error: Could not read file %s\n", filename);
+        exit(EXIT_FAILURE);
    }

-    return(heap);
+    char **lines = malloc(sizeof(char *) * lineCount + 1);
+    if (lines == NULL) {
+        fprintf(stderr, "Error: Could not allocate memory to store the assembly lines");
+        exit(EXIT_FAILURE);
+    }
+
+    rewind(fp); // Back to the beginning of the file.
+
+    char buffer[MAX_ASM_LINE_LENGTH];
+    int currentLine = 0;
+    
+    while (fgets(buffer, MAX_ASM_LINE_LENGTH, fp) != NULL) {
+        if (buffer[strlen(buffer) - 1] != '\n') {
+            // It was actually longer than the maximum.
+            // NOTE: I believe this must mean that this is a malformed line, so throw an error.
+            fprintf(stderr, "Error: Line %d in the file %s is too long\n", currentLine, filename);
+            exit(EXIT_FAILURE);
+        }
+
+        lines[currentLine] = malloc(strlen(buffer) + 1);
+        if (lines[currentLine] == NULL) {
+            fprintf(stderr, "Error: Could not allocate memory to store the assembly line");
+            exit(EXIT_FAILURE);
+        }
+
+        strcpy(lines[currentLine], buffer);
+        currentLine++;
+    }
+    
+    if (ferror(fp)) {
+        fprintf(stderr, "Error: Could not read file %s", filename);
+        exit(EXIT_FAILURE);
+    }
+
+    return lines;
 }
--- a/src/fileio.h
+++ b/src/fileio.h
@ -1,9 +1,13 @@
 #ifndef __FILEIO__
 #define __FILEIO__
+#include <stdio.h>
 #include <stdlib.h>
 #include "global.h"

 #define EXIT_FAILURE 1

-extern byte *fileio_loadBin(const char *filePath, size_t memorySize);
+char **readAssemblyFile(char filename[], int lineCount);
+void writeBinaryFile(word instrs[], char outputFile[], int numInstrs);
+int countLines(char *filename);
+
 #endif
--- a/src/parser.c
+++ b/src/parser.c
@ -1,9 +1,10 @@
+#include <assert.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <stdbool.h>
 #include "parser.h"
-
-#include "a64instruction.h"
+#include "a64instruction/a64instruction.h"

 //takes input string, read from asm file and returns
 //input as an a64 instruction
@ -172,8 +173,9 @@ void classifyOpcode(char* opcode, a64inst_instruction *instr, char *operandList[
 }

 void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOperands){
-    char *operandsDupe = NULL;
-    operandsDupe = strcpy(operandsDupe, str);
+    assert(str != NULL);
+    char operandsDupe[strlen(str)+1];
+    strcpy(operandsDupe, str);
    char *operand = strtok(operandsDupe, OPERAND_DELIMITER);
    operands[0] = operand;

@ -187,22 +189,21 @@ void tokeniseOperands(char* str, int *operandCount, char *operands[], int *numOp

 //takes inputted assembly line and returns a 
 //pointer to an abstract representation of the instruction
-a64inst_instruction *parser(char asmLine[]){
+void parser_instruction(char asmLine[], a64inst_instruction *instr) {
    int numOperands = 0;
-    a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); 
    if (instr == NULL){
        exit(EXIT_FAILURE);
    }

    if(strcmp(asmLine, HALT_ASM_CMD) == 0){
        instr->type = a64inst_HALT;
-        return(instr);
+        return;
    }

    //"opcode operand1, {operand2}, ..."
    //duplicated as strtok modifies the input string
-    char *stringptr = NULL;
-    stringptr = strcpy(stringptr, asmLine);
+    char stringptr[strlen(asmLine) + 1];
+    strcpy(stringptr, asmLine);

    char *opcode = strtok(stringptr, " ");
    char *operands = strtok(NULL, "");
@ -251,7 +252,18 @@ a64inst_instruction *parser(char asmLine[]){
        
    }

-    return(instr);
-
 }

+// Takes an array of strings, each string representing an assembly instruction.
+// Returns an array of a64inst_instruction pointers, each representing an instruction.
+a64inst_instruction *parse(char **asmLines, int lineCount) {
+    a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
+
+    int i = 0;
+    while (asmLines[i] != NULL) {
+        parser_instruction(asmLines[i], &instructions[i]);
+        i++;
+    }
+
+    return instructions;
+}
--- a/src/parser.h
+++ b/src/parser.h
@ -1,2 +1,6 @@
+#include "a64instruction/a64instruction.h"
+
 #define OPERAND_DELIMITER ", "
 #define HALT_ASM_CMD "and x0, x0, x0"
+
+a64inst_instruction *parse(char **asmLines, int lineCount);
--- a/src/symboltable.c
+++ b/src/symboltable.c
@ -1,10 +1,9 @@
 #include <stdio.h>

 typedef struct st st;
+typedef struct node node; // forward declaration

-
-
-typedef struct {
+typedef struct node {
    const void* key;
    void* value;
    node* prev;
@ -29,11 +28,6 @@ void st_add(st table, void* key, void* value) {
    }  
 }

-// returns the pointer to key of the specified node, or null, if it does not exist
-void* st_search(st table, void* key) {
-    return nodeSearch(table.head, key);
-}
-
 void* nodeSearch(node* n, void* key) {
    if (n != NULL) {
        if ((*n).key == key) {
@ -46,4 +40,9 @@ void* nodeSearch(node* n, void* key) {
    else {
        return NULL;
    }
-}
+}
+
+// returns the pointer to key of the specified node, or null, if it does not exist
+void* st_search(st table, void* key) {
+    return nodeSearch(table.head, key);
+}
--- a/src/twopassassembly.c
+++ b/src/twopassassembly.c
@ -1,33 +1,34 @@
 #include "global.h"
-#include "a64instruction.h"
-#include "symboltable.h"
-//generates assembled code based on two pass assembly method
+#include "a64instruction/a64instruction.h"
+#include "symboltable.c"
+#include <stdlib.h>
+#include <limits.h>

+// Generates assembled code based on the two-pass assembly method

-word assembleBranch(a64inst_instruction *instr){
+word assembleBranch(a64inst_instruction *instr) {
    word binInstr = 0;
-    binInstr += (5^28); //101 start of branch instr
-    switch (instr->data.BranchData.BranchType)
-    {
+    binInstr += (5 << 28); // 101 start of branch instr
+    switch (instr->data.BranchData.BranchType) {
    case a64inst_UNCONDITIONAL:
-        //000101
-        //25-0: sign extended simm26
-        binInstr += instr->data.processOpData.unconditionalOffset;
+        // 000101
+        // 25-0: sign extended simm26
+        binInstr += instr->data.BranchData.processOpData.unconditionalData.unconditionalOffset;
        break;
    case a64inst_REGISTER:
-        //10000
-        //11111
-        //000000
-        //9-5: address from register
-        //0000
-        binInstr += ((instr->processOpData.src)^5);
+        // 10000
+        // 11111
+        // 000000
+        // 9-5: address from register
+        // 0000
+        binInstr += ((instr->data.BranchData.processOpData.registerData.src) << 5);
        break;
    case a64inst_CONDITIONAL:
-        // 01010100 
+        // 01010100
        // 25-5: sign extended offset
        // 4-0: 0{condition}
-        binInstr += ((instr->processOpData.offset)^5);
-        binInstr += instr->processOpData.cond;
+        binInstr += ((instr->data.BranchData.processOpData.conditionalData.offset) << 5);
+        binInstr += instr->data.BranchData.processOpData.conditionalData.cond;
        break;
    default:
        break;
@ -35,56 +36,56 @@ word assembleBranch(a64inst_instruction *instr){
    return binInstr;
 }

-st* firstPass(a64inst_instruction instrs[], int numInstrs){
-    //TODO:
+st* firstPass(a64inst_instruction instrs[], int numInstrs) {
+    // TODO:
    // -iterate over instructions, adding to symbol table
    // create symbol table and map labels to addresses/lines
-    struct st table;
-    for(int i=0; i<numInstrs; i++){
-        
+    st *table = (st*)malloc(sizeof(st));
+    for (int i = 0; i < numInstrs; i++) {
        // discuss defining a LABEL type
-        if(instrs[i].type==a64inst_LABEL){
-            st_add(table, &(instrs[i].data.LabelData.label), &i);
+        if (instrs[i].type == a64inst_LABEL) {
+            st_add(*table, &(instrs[i].data.LabelData.label), &i);
        }
    }
-    return &table;
+    return table;
 }
-word assembleDPI(a64inst_instruction cI) {
+
+word dpi(a64inst_instruction cI) {
    word out = 0;
    a64inst_DPImmediateData data = cI.data.DPImmediateData;
-    //sf
-    out += data.regType*(2^31);
-    out += data.processOp*(2^29);
-    out += 2^28;
+    // sf
+    out += data.regType * (1 << 31);
+    out += data.processOp * (1 << 29);
+    out += 1 << 28;
    // if arithmetic
    if (data.DPIOpType == a64inst_DPI_ARITHM) {
-        out += 2^24;
+        out += 1 << 24;
        // shift
-        if (data.processOpData.arithmData.shiftImmediate){
-            out += 2^22;
+        if (data.processOpData.arithmData.shiftImmediate) {
+            out += 1 << 22;
        }
-        out += data.processOpData.arithmData.immediate*(2^10);
-        out += data.processOpData.arithmData.src*(2^5);
+        out += data.processOpData.arithmData.immediate * (1 << 10);
+        out += data.processOpData.arithmData.src * (1 << 5);
    }
    // if wide move
    else {
-        out += 5*(2^23);
+        out += 5 * (1 << 23);
        // hw
-        out += data.processOpData.wideMovData.shiftScalar*(2^21);
-        out += data.processOpData.wideMovData.immediate*(2^5);
+        out += data.processOpData.wideMovData.shiftScalar * (1 << 21);
+        out += data.processOpData.wideMovData.immediate * (1 << 5);
    }
    // destination register
    out += data.dest;
    return out;
 }

-word assembleDPR(a64inst_instruction cI) {
+word dpr(a64inst_instruction cI) {
    word out = 0;
    a64inst_DPRegisterData data = cI.data.DPRegisterData;
    // sf
    int sf = data.regType;
    // bits 27-25
-    out += 5*(2^25);
+    out += 5 * (1 << 25);
    int m = data.DPROpType;
    int opc = 0;
    int opr = 0;
@ -94,7 +95,7 @@ word assembleDPR(a64inst_instruction cI) {
    int rd = 0;
    // multiply
    if (m == 1) {
-        //opc = 0;
+        // opc = 0;
        opr = 8;
        if (data.processOpData.multiplydata.negProd) {
            operand += 32;
@ -104,9 +105,9 @@ word assembleDPR(a64inst_instruction cI) {
    // arithmetic and logical
    else {
        // shift
-        opr += 2*data.processOpData.arithmLogicData.shiftType;
+        opr += 2 * data.processOpData.arithmLogicData.shiftType;
        // arithmetic
-        if (data.processOpData.arithmLogicData.type == 1){
+        if (data.processOpData.arithmLogicData.type == 1) {
            opr += 8;
        }
        // logical
@ -120,33 +121,32 @@ word assembleDPR(a64inst_instruction cI) {
    rm += data.src1;
    rn += data.src2;
    rd += data.dest;
-    out += sf*(2^31);
-    out += opc * (2^29);
-    out += m* (2^28);
-    out += opr * (2^21);
-    out += rm * (2^16);
+    out += sf * (1 << 31);
+    out += opc * (1 << 29);
+    out += m * (1 << 28);
+    out += opr * (1 << 21);
+    out += rm * (1 << 16);
    out += operand * 1024;
    out += rn * 32;
    out += rd;
    return out;
 }

-word assembleSTS(a64inst_instruction cI) {
+word sts(a64inst_instruction cI) {
    a64inst_SingleTransferData data = cI.data.SingleTransferData;
    word out = 0;
    a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData;
    // this deals with every bit in the 31-23 range apart from sf and U
-    out += (512+128+64+32)*(2^23);
+    out += (512 + 128 + 64 + 32U) * (1 << 23);
    int sf = data.regType;
    int u = 0;
-    int l = data2.transferType;
    int offset = 0;
    int xn = data2.base;
    int rt = data.target;
    switch (data2.addressingMode) {
        // register offset
        case 2:
-            offset += 2074 + 64*data2.a64inst_addressingModeData.offsetReg;
+            offset += 2074 + 64 * data2.a64inst_addressingModeData.offsetReg;
            break;
        // unsigned offset
        case 3:
@ -155,37 +155,37 @@ word assembleSTS(a64inst_instruction cI) {
            break;
        // pre/post indexed
        default:
-            offset = 1 + data2.addressingMode*2 + data2.a64inst_addressingModeData.indexedOffset*4;
+            offset = 1 + data2.addressingMode * 2 + data2.a64inst_addressingModeData.indexedOffset * 4;
            break;
    }
-    out += sf*(2^30);
-    out += u*(2^22);
-    out += offset*1024;
+    out += sf * (1 << 30);
+    out += u * (1 << 22);
+    out += offset * 1024;
    out += xn * 32;
    out += rt;
    return out;
 }

-word assembleLDL(a64inst_instruction cI) {
-    word out = 3*(2^27);
+word ldl(a64inst_instruction cI) {
+    word out = 3 * (1 << 27);
    a64inst_SingleTransferData data = cI.data.SingleTransferData;
    int sf = data.regType;
    int simm19 = data.processOpData.loadLiteralData.offset;
    int rt = data.target;
-    out += sf * (2^30);
-    out += simm19*32;
+    out += sf * (1 << 30);
+    out += simm19 * 32;
    out += rt;
    return out;
 }

-void secondPass(a64inst_instruction instrs[], int numInstrs, st* table, word arr[]){
-    //TODO:
+word *secondPass(a64inst_instruction instrs[], int numInstrs, st* table) {
+    // TODO:
    // iterate over instructions again, this time replacing labels
    // with values from symbol table
    // after a line has had all the values replaced, assemble it and append
+    word *arr = (word*)malloc(sizeof(word) * numInstrs);
    int index = 0;
-    int lbl = 0;
-    for (int i=0; i<numInstrs; i++) {
+    for (int i = 0; i < numInstrs; i++) {
        a64inst_instruction cI = instrs[i];
        switch (cI.type) {
            case a64inst_DPIMMEDIATE:
@ -209,18 +209,18 @@ void secondPass(a64inst_instruction instrs[], int numInstrs, st* table, word arr
                index++;
                break;
            case a64inst_HALT:
-                arr[index] = 69*(2^25);
+                arr[index] = 69U * (1 << 25);
                index++;
                break;
            case a64inst_LABEL:
-                lbl++;
+                // Labels are handled in the first pass and used for addressing.
                break;
            case a64inst_BRANCH:
-                arr[index] = assembleBranch(&cI, table, lbl);
+                arr[index] = assembleBranch(&cI);
                index++;
            default:
                break;
        }
    }
-    return;
-}
+    return arr;
+}