diff --git a/src/Makefile b/src/Makefile index 3a1441a..cdfa089 100755 --- a/src/Makefile +++ b/src/Makefile @@ -7,9 +7,9 @@ CFLAGS ?= -std=c17 -g\ .PHONY: all clean -all: assemble emulate +all: emulate assemble -assemble: assemble.o +assemble: assemble.o util/fileio.o util/binary_util.o assembler/encode.o assembler/parse.o assembler/tokenise.o assembler/string_util.o assembler/symboltable.o emulate: emulate.o util/fileio.o emulator/execute.o emulator/decode.o emulator/print.o emulator/machine_util.o util/binary_util.o clean: diff --git a/src/a64instruction/a64instruction_Branch.h b/src/a64instruction/a64instruction_Branch.h index d280973..8f479bf 100644 --- a/src/a64instruction/a64instruction_Branch.h +++ b/src/a64instruction/a64instruction_Branch.h @@ -9,6 +9,7 @@ typedef enum { typedef struct { word unconditionalOffset; + char* label; } a64inst_Branch_UnconditionalData; typedef struct { @@ -28,6 +29,7 @@ typedef enum { typedef struct { a64inst_ConditionType cond; word offset; + char* label; } a64inst_Branch_ConditionalData; typedef struct { diff --git a/src/a64instruction/a64instruction_Directive.h b/src/a64instruction/a64instruction_Directive.h index fa2faaa..f04cdae 100644 --- a/src/a64instruction/a64instruction_Directive.h +++ b/src/a64instruction/a64instruction_Directive.h @@ -1,5 +1,5 @@ #include "./a64instruction_global.h" typedef struct { - dword value; + word value; } a64inst_DirectiveData; diff --git a/src/a64instruction/a64instruction_SingleTransfer.h b/src/a64instruction/a64instruction_SingleTransfer.h index 3e3da2b..8e412a6 100644 --- a/src/a64instruction/a64instruction_SingleTransfer.h +++ b/src/a64instruction/a64instruction_SingleTransfer.h @@ -33,6 +33,7 @@ typedef struct { typedef struct { uint32_t offset; + char* label; } a64inst_LoadLiteralData; typedef struct { diff --git a/src/assemble.c b/src/assemble.c old mode 100755 new mode 100644 index e2ad1c8..062f0de --- a/src/assemble.c +++ b/src/assemble.c @@ -1,5 +1,59 @@ +/** @file assemble.c + * @brief The main file for the ARMv8 assembler. Reads an assembly file and outputs the binary file. + * + * @author Saleh Bubshait + */ + #include +#include +#include "a64instruction/a64instruction.h" +#include "assembler/parse.h" +#include "util/fileio.h" +#include "assembler/encode.h" +#include "assembler/symboltable.h" + +static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount); int main(int argc, char **argv) { + // Check the arguments + if (argc < 3) { + fprintf(stderr, "Error: A source file and an object output file are required. Syntax: ./assemble "); + return EXIT_FAILURE; + } + + // Load the source file into memory + int lineCount = countLines(argv[1]); + char **source = readAssemblyFile(argv[1], lineCount); + + // Parse the source file + a64inst_instruction *instructions = parse(source, lineCount); + + // First Pass: Create the symbol table + symbol_table *table = firstPass(instructions, lineCount); + + // Second Pass: Encode the instructions into binary + word *binary = encode(instructions, lineCount, table); + + // Write the binary to the output file + writeBinaryFile(binary, argv[2], lineCount); + return EXIT_SUCCESS; } + +/** The first pass of the assembler. Creates the symbol table. Adds all labels + * and the address of the instruction following the label to the symbol table. + * Returns the final symbol table. + */ +static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) { + symbol_table *table = st_init(); + int labelCount = 0; + + for (int i = 0; i < lineCount; i++) { + a64inst_instruction inst = instructions[i]; + if (inst.type == a64inst_LABEL) { + st_insert(table, inst.data.LabelData.label, (i - (labelCount++))); + } + } + + return table; +} diff --git a/src/assembler/encode.c b/src/assembler/encode.c new file mode 100644 index 0000000..ef7c498 --- /dev/null +++ b/src/assembler/encode.c @@ -0,0 +1,200 @@ +/** @file encode.c + * @brief A function to encode the internal representation of ARMv8 + * instructions, a64inst_instruction, into binary. + * + * @author Ethan Dias Alberto + * @author George Niedringhaus + * @author Saleh Bubshait + */ + +#include "symboltable.h" +#include +#include "../util/binary_util.h" +#include "encode.h" + +#define HALT_BINARY 2315255808 + +static int getLabelOffset(symbol_table* table, char* label, int currentIndex, int n_bits) { + address target = st_get(table, label); + return signExtend((unsigned int) (target - currentIndex), n_bits); +} + +// Generates assembled code based on the two-pass assembly method +static word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) { + word wrd = 0; + + switch (instr->data.BranchData.BranchType) { + case a64inst_UNCONDITIONAL: + setBits(&wrd, 26, 29, 0x5); + setBits(&wrd, 0, 25, getLabelOffset(st, instr->data.BranchData.processOpData.unconditionalData.label, index, 26)); + break; + + case a64inst_REGISTER: + setBits(&wrd, 16, 32, 0xD61F); + setBits(&wrd, 5, 10, instr->data.BranchData.processOpData.registerData.src); + break; + + case a64inst_CONDITIONAL: + setBits(&wrd, 26, 32, 0x15); + setBits(&wrd, 5, 24, getLabelOffset(st, instr->data.BranchData.processOpData.conditionalData.label, index, 19)); + setBits(&wrd, 0, 4, instr->data.BranchData.processOpData.conditionalData.cond); + break; + } + + return wrd; +} + +static word encodeDPImmediate(a64inst_instruction inst) { + word wrd = 0; + + a64inst_DPImmediateData data = inst.data.DPImmediateData; + + setBits(&wrd, 31, 32, data.regType); // sf + setBits(&wrd, 29, 31, data.processOp); // opc + setBits(&wrd, 28, 29, 0x1); // constant value + setBits(&wrd, 0, 5, data.dest); // rd + + if (data.DPIOpType == a64inst_DPI_ARITHM) { + setBits(&wrd, 23, 26, 0x2); //opi + setBits(&wrd, 5, 10, data.processOpData.arithmData.src); // rn + setBits(&wrd, 22, 23, data.processOpData.arithmData.shiftImmediate); // sh + setBits(&wrd, 10, 22, data.processOpData.arithmData.immediate); // imm12 + } + // if wide move + else { + setBits(&wrd, 23, 26, 0x5); //opi + uint8_t hw = data.processOpData.wideMovData.shiftScalar / 16; + setBits(&wrd, 21, 23, hw); // hw + setBits(&wrd, 5, 21, data.processOpData.wideMovData.immediate); // imm16 + } + + return wrd; +} + +static word encodeDPRegister(a64inst_instruction inst) { + word wrd = 0; + + a64inst_DPRegisterData data = inst.data.DPRegisterData; + setBits(&wrd, 31, 32, data.regType); // sf + setBits(&wrd, 29, 31, data.processOp); // opc + setBits(&wrd, 28, 29, data.DPROpType); // M + setBits(&wrd, 25 ,28, 0x5); + setBits(&wrd, 16, 21, data.src2); // src2 + setBits(&wrd, 5, 10, data.src1); // src1 + setBits(&wrd, 0, 5, data.dest); // src2 + + if (data.DPROpType == a64inst_DPR_MULTIPLY) { + setBits(&wrd, 21, 31, 0xD8); + setBits(&wrd, 15, 16, data.processOpData.multiplydata.negProd); + setBits(&wrd, 10, 15, data.processOpData.multiplydata.summand); + + } else { + // Arithmetic Logic Instruction + setBits(&wrd, 22, 24, data.processOpData.arithmLogicData.shiftType); + setBits(&wrd, 10, 16, data.processOpData.arithmLogicData.shiftAmount); + + if (data.processOpData.arithmLogicData.type == a64inst_DPR_ARITHM) { + // Arithmetic + setBits(&wrd, 24, 25, 0x1); // bit 24 + } else { + setBits(&wrd, 21, 22, data.processOpData.arithmLogicData.negShiftedSrc2); + } + + } + + return wrd; + +} + +static word encodeSingleDataTransfer(a64inst_instruction inst) { + word wrd = 0; + + a64inst_SingleTransferData data = inst.data.SingleTransferData; + a64inst_SingleDataTransferData data2 = data.processOpData.singleDataTransferData; + + setBits(&wrd, 22, 32, 0x2E0); + setBits(&wrd, 30, 31, data.regType); + setBits(&wrd, 24, 25, data2.addressingMode == a64inst_UNSIGNED_OFFSET); + setBits(&wrd, 22, 23, data2.transferType); + setBits(&wrd, 5, 10, data2.base); + setBits(&wrd, 0, 5, data.target); + + switch (data2.addressingMode) { + // register offset + case a64inst_REGISTER_OFFSET: + setBits(&wrd, 21, 22, 1); + setBits(&wrd, 10, 16, 0x1A); + setBits(&wrd, 16, 21, data2.a64inst_addressingModeData.offsetReg); + break; + // unsigned offset + case a64inst_UNSIGNED_OFFSET: + setBits(&wrd, 10, 22, data2.a64inst_addressingModeData.unsignedOffset); + break; + // pre/post indexed + default: + setBits(&wrd, 21, 22, 0); + setBits(&wrd, 11, 12, data2.addressingMode == a64inst_PRE_INDEXED); + setBits(&wrd, 10, 11, 1); + setBits(&wrd, 12, 21, data2.a64inst_addressingModeData.indexedOffset); + break; + } + + return wrd; +} + +static word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) { + word wrd = 0; + + a64inst_SingleTransferData data = cI.data.SingleTransferData; + setBits(&wrd, 24, 32, 0x18); + setBits(&wrd, 30, 31, data.regType); + char *label = data.processOpData.loadLiteralData.label; + int offset = getLabelOffset(st, label, arrIndex, 19); + setBits(&wrd, 5, 24, offset); + setBits(&wrd, 0, 5, data.target); + + return wrd; +} + +word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) { + word *arr = (word*)malloc(sizeof(word) * instCount); + int index = 0; + for (int i = 0; i < instCount; i++) { + a64inst_instruction inst = insts[i]; + switch (inst.type) { + case a64inst_DPIMMEDIATE: + arr[index] = encodeDPImmediate(inst); + index++; + break; + case a64inst_DPREGISTER: + arr[index] = encodeDPRegister(inst); + index++; + break; + case a64inst_SINGLETRANSFER: + arr[index] = encodeSingleDataTransfer(inst); + index++; + break; + case a64inst_LOADLITERAL: + arr[index] = encodeLoadLiteral(inst, index, st); + index++; + break; + case a64inst_DIRECTIVE: + arr[index] = inst.data.DirectiveData.value; + index++; + break; + case a64inst_HALT: + arr[index] = HALT_BINARY; + index++; + break; + case a64inst_LABEL: + // Labels are handled in the first pass and used for addressing. + break; + case a64inst_BRANCH: + arr[index] = encodeBranch(&inst, index, st); + index++; + default: + break; + } + } + return arr; +} diff --git a/src/assembler/encode.h b/src/assembler/encode.h new file mode 100644 index 0000000..1ac8a82 --- /dev/null +++ b/src/assembler/encode.h @@ -0,0 +1,21 @@ +/** @file encode.h + * @brief A function to encode the internal representation of ARMv8 + * instructions, a64inst_instruction, into binary. + * + * @author Saleh Bubshait + */ + +#include "../global.h" +#include "../a64instruction/a64instruction.h" +#include "symboltable.h" + +/** @brief Encodes the internal representation of ARMv8 instructions into binary. + * The symbol table is used to resolve labels in branch instructions. Assumes + * that the instructions are in the same order as they appear in the source file. + * + * @param insts An array of a64inst_instruction to encode. + * @param instCount The number of instructions in the array. + * @param st The symbol table to use for label resolution. + * @return An array of words representing the binary encoding of the instructions. + */ +word *encode(a64inst_instruction insts[], int instCount, symbol_table* st); diff --git a/src/assembler/parse.c b/src/assembler/parse.c new file mode 100644 index 0000000..7020ed4 --- /dev/null +++ b/src/assembler/parse.c @@ -0,0 +1,433 @@ +/** @file parse.c + * @brief Functions to parse ARMv8 assembly lines into an array of a special + * internal representation of instructions, a64inst_instruction. + * + * @author Ethan Dias Alberto + * @author George Niedringhaus + * @author Saleh Bubshait + */ + +#include +#include +#include +#include +#include +#include +#include "parse.h" +#include "../a64instruction/a64instruction.h" +#include "../global.h" +#include "tokenise.h" +#include "string_util.h" + +/************************************ + * STRUCTS + ************************************/ + +typedef struct { + int type; + int immediate; +} ShiftData; + +/************************************ + * PROTOTYPES + ************************************/ + +static void parse_instruction(char asmLine[], a64inst_instruction *instr); +static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); +static void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); +static void parseAddressingMode(a64inst_instruction *instr, char *operandList[], int numOperands); +static void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); +static void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); +static void parseDirective(a64inst_instruction *inst, char *tokens[]); +static ShiftData *parseShift(char *shift); +static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount); + +/************************************ + * CONSTANTS + ************************************/ + +static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; +static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"}; +static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; +static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; +static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; +static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; +static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"}; + + +/************************************ + * FUNCTIONS + ************************************/ + +a64inst_instruction *parse(char **asmLines, int lineCount) { + a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); + + int i = 0; + while (asmLines[i] != NULL) { + parse_instruction(asmLines[i], &instructions[i]); + i++; + } + + return instructions; +} + +/** Parses a single ARMv8 assembly line into an a64inst_instruction. + */ +static void parse_instruction(char asmLine[], a64inst_instruction *instr) { + if (instr == NULL){ + exit(EXIT_FAILURE); + } + + char *asmLineCopy = duplicateString(asmLine); + int tokensCount = 0; + char **tokens = tokenise(asmLineCopy, &tokensCount); + char *opcode = tokens[0]; + + // Check if the instruction is the halt instruction, "and x0, x0, x0". + if (tokensCount == 4 && strcmp(opcode, "and") == 0 + && getRegister(tokens[1]) == 0 + && getRegister(tokens[2]) == 0 + && getRegister(tokens[3]) == 0) { + + instr->type = a64inst_HALT; + return; + } + + + if(strcmp(opcode, ".int") == 0){ + // Directive + instr->type = a64inst_DIRECTIVE; + parseDirective(instr, tokens); + + + } else if(opcode[strlen(opcode)-1]== ':') { + // Label + instr->type = a64inst_LABEL; + opcode[strlen(opcode) - 1] = '\0'; // Remove the colon + instr->data.LabelData.label = opcode; + + } else { + // Instruction + + // Classify the opcode into the correct instruction type. + classifyOpcode(opcode, instr, tokens, &tokensCount); + + switch(instr->type){ + case a64inst_BRANCH: + parseBranch(instr, opcode, tokens); + break; + + case a64inst_SINGLETRANSFER: + parseSingleTransfer(instr, opcode, tokens, tokensCount); + parseAddressingMode(instr, tokens, tokensCount); + break; + + case a64inst_LOADLITERAL: + parseSingleTransfer(instr, opcode, tokens, tokensCount); + break; + + case a64inst_DPREGISTER: + //generate DP operands; + parseDPRegister(instr, tokens, tokensCount); + break; + + case a64inst_DPIMMEDIATE: + parseDPImmediate(instr, tokens, tokensCount); + break; + + default: + printf("Error: Invalid Instruction, '%s'\n", opcode); + break; + + } + + } +} + +static void parseDirective(a64inst_instruction *instr, char *tokens[]) { + char *intValue = tokens[1]; + char *endptr; + if(strncmp(intValue, "0x", 2) == 0) { + intValue += 2; + instr->data.DirectiveData.value = strtol(intValue, &endptr, 16); + } else { + instr->data.DirectiveData.value = strtol(tokens[1], &endptr, 10); + } +} + + +static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { + + switch(instr->type){ + case a64inst_SINGLETRANSFER: + instr->data.SingleTransferData.regType = getRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getRegister(tokens[1]); + break; + + case a64inst_LOADLITERAL: + instr->data.SingleTransferData.regType = getRegisterType(tokens[1]); + instr->data.SingleTransferData.target = getRegister(tokens[1]); + + if(*tokens[2] =='#'){ + //offset is immediate + instr->data.SingleTransferData.processOpData.loadLiteralData.offset = getImmediate(tokens[2]);; + } else { + //offset is label + instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2]; + } + + break; + + default: + break; + + } +} + +void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) { + switch(instr->data.BranchData.BranchType){ + case a64inst_UNCONDITIONAL: + //define and sign extend immediate offset + //use symbol table + printf("unconditional"); + instr->data.BranchData.processOpData.unconditionalData.label = operandList[1]; + break; + case a64inst_REGISTER: + instr->data.BranchData.processOpData.registerData.src = getRegister(operandList[1]); + break; + case a64inst_CONDITIONAL: + { + char condition[strlen(opcode)+1]; + strcpy(condition, opcode+2); + if(strcmp(condition, "eq")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = EQ; + } else if (strcmp(condition, "ne")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = NE; + } else if (strcmp(condition, "ge")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = GE; + } else if (strcmp(condition, "lt")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = LT; + } else if (strcmp(condition, "gt")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = GT; + } else if (strcmp(condition, "le")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = LE; + } else if (strcmp(condition, "al")==0){ + instr->data.BranchData.processOpData.conditionalData.cond = AL; + } + instr->data.BranchData.processOpData.unconditionalData.label = operandList[1]; + + break; + + } + } +} + +void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) { + a64inst_DPImmediateData *data = &inst->data.DPImmediateData; + data->dest = getRegister(tokens[1]); + data->regType = getRegisterType(tokens[1]); + + if (containsString(tokens[0], WIDE_MOV_OPCODES, 4)) { + data->DPIOpType = a64inst_DPI_WIDEMOV; + data->processOp = lastIndexOfString(tokens[0], WIDE_MOV_OPCODES, 4); + data->processOpData.wideMovData.immediate = getImmediate(tokens[2]); + if (tokensCount >= 4) { + ShiftData shData = *parseShift(tokens[3]); + data->processOpData.wideMovData.shiftScalar = shData.immediate; + } + + } else { + data->DPIOpType = a64inst_DPI_ARITHM; + data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOpData.arithmData.src = getRegister(tokens[2]); + data->processOpData.arithmData.immediate = getImmediate(tokens[3]); + + if (tokensCount >= 5) { + ShiftData shData = *parseShift(tokens[4]); + if (shData.immediate > 0) { + data->processOpData.arithmData.shiftImmediate = true; + } + } + + } +} + +void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) { + a64inst_DPRegisterData *data = &inst->data.DPRegisterData; + data->dest = getRegister(tokens[1]); + data->regType = getRegisterType(tokens[1]); + data->src1 = getRegister(tokens[2]); + data->src2 = getRegister(tokens[3]); + + if (containsString(tokens[0], MULTIPLY_OPCODES, 4)) { + // Multiply + data->DPROpType = a64inst_DPR_MULTIPLY; + if (tokensCount >= 5) { + data->processOpData.multiplydata.summand = getRegister(tokens[4]); + data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0; + } + else { + data->processOpData.multiplydata.summand = ZERO_REGISTER; + data->processOpData.multiplydata.negProd = strcmp(tokens[0], "mneg") == 0; + } + + } else { + // Arithmetic/Logic + data->DPROpType = a64inst_DPR_ARITHMLOGIC; + + if (containsString(tokens[0], ARITHMETIC_OPCODES, 4)) { + // Arithmetic + data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4); + data->processOpData.arithmLogicData.type = 1; + if(tokensCount == 5) { + //has a shift + int numTokens = 0; + char **shiftOperands = tokenise(tokens[4], &numTokens); + data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); + data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]); + } + + } else { + // Logic + int opcodeCategory = lastIndexOfString(tokens[0], LOGIC_OPCODES, 8); + switch(opcodeCategory/2){ + case 0: + //and + if((tokens[0][strlen(tokens[0])-1]) == 's'){ + data->processOp = 3; + } else { + data->processOp = 0; + } + data->processOpData.arithmLogicData.negShiftedSrc2 = 0; + break; + case 1: + //negated AND + if((tokens[0][strlen(tokens[0])-1]) == 's'){ + data->processOp = 3; + } else { + data->processOp = 0; + } + data->processOpData.arithmLogicData.negShiftedSrc2 = 1; + break; + case 2: + //XOR + data->processOp = 2; + if(opcodeCategory==4){ + data->processOpData.arithmLogicData.negShiftedSrc2 = 0; + } else { + data->processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + case 3: + //OR + data->processOp = 1; + if(opcodeCategory==6){ + data->processOpData.arithmLogicData.negShiftedSrc2 = 0; + } else { + data->processOpData.arithmLogicData.negShiftedSrc2 = 1; + } + break; + } + if(tokensCount == 5) { + //has a shift + int numTokens = 0; + char **shiftOperands = tokenise(tokens[4], &numTokens); + data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); + data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]); + } + } + } +} + +/** Classifies the given opcode into the correct instruction type. + * Modifies instr to reflect the classification. + */ +static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) { + + // First, if the opcode is an alias, convert it to the target instruction. + translateAlias(opcode, tokens, tokensCount); + + if (containsString(opcode, BRANCH_OPCODES, 9)) { + instr->type = a64inst_BRANCH; + + if (strcmp(opcode, "br") == 0) { + instr->data.BranchData.BranchType = a64inst_REGISTER; + } else if (strcmp(opcode, "b") == 0) { + instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; + } else { + instr->data.BranchData.BranchType = a64inst_CONDITIONAL; + } + + } else if (containsString(opcode, SINGLE_TRANSFER_OPCODES, 2)) { + instr->type = a64inst_SINGLETRANSFER; + if (*tokens[2] == '[') { + instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; + instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0; + + } else { + instr->type = a64inst_LOADLITERAL; + } + + // DP Instruction. + // DP Register if the third operand is a register. + } else if (*tokensCount >= 4 && isRegister(tokens[3])) { + instr->type = a64inst_DPREGISTER; + } else { + instr->type = a64inst_DPIMMEDIATE; + } + +} + +/** Parses a shift string into a ShiftData struct. + */ +static ShiftData *parseShift(char *shift) { + char buffer[20]; + strcpy(buffer, shift); + + char *shiftType = strtok(buffer, " "); + char *shiftAmount = strtok(NULL, " "); + + ShiftData *data = malloc(sizeof(ShiftData)); + + data->type = lastIndexOfString(shiftType, SHIFT_TYPE_OPCODES, 4); + + SKIP_WHITESPACE(shiftAmount); + data->immediate = getImmediate(shiftAmount); + return data; +} + +/** Parses the addressing mode of a single transfer instruction. (Not load literal) + */ +static void parseAddressingMode(a64inst_instruction *instr, char *tokens[], int tokenCount) { + assert(*tokens[2] == '['); + + int operandCount = 0; + char *unsplitString = duplicateString(tokens[2]); + char **operands = tokeniseOperands(tokens[2], &operandCount); + + int baseRegister = getRegister(operands[0]); + + instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; + + if (tokenCount >= 4) { + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(tokens[3]); + + } else if(unsplitString[strlen(unsplitString)-1] == '!') { + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(operands[1]); + + } else if (operandCount == 1 || (!isRegister(operands[1]))) { + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; + if(operandCount > 1){ + int offset = getImmediate(operands[1]); + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; + } + } else { + if((isRegister(operands[0]) == 1) + && (isRegister(operands[1]) == 1)){ + instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; + instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getRegister(operands[1]); + } + } +} diff --git a/src/assembler/parse.h b/src/assembler/parse.h new file mode 100644 index 0000000..8635ffc --- /dev/null +++ b/src/assembler/parse.h @@ -0,0 +1,17 @@ +/** @file parse.h + * @brief A function to parse ARMv8 assembly lines into an array of a special + * internal representation of instructions, a64inst_instruction. + * + * @author Ethan Dias Alberto + * @author Saleh Bubshait + */ + +#include "../a64instruction/a64instruction.h" + +/** @brief Parses a list of ARMv8 assembly lines into an array of a64inst_instruction. + * + * @param asmLines An array of strings, each string is an ARMv8 assembly line. + * @param lineCount The number of lines in the asmLines array. + * @return An array of a64inst_instruction representing the parsed instructions. + */ +a64inst_instruction *parse(char **asmLines, int lineCount); diff --git a/src/assembler/string_util.c b/src/assembler/string_util.c new file mode 100644 index 0000000..e519ef4 --- /dev/null +++ b/src/assembler/string_util.c @@ -0,0 +1,173 @@ +/** @file string_util.c + * @brief This file contains the implementation of some string processing + * utility functions used in the assembler. + * + * @author Saleh Bubshait + */ + +#include +#include +#include +#include +#include "string_util.h" +#include "../global.h" + +/************************************ + * CONSTANTS + ************************************/ + +static const char *SPECIAL_REGISTERS[] = {"sp", "xzr", "wzr"}; +static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"}; +static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"}; +static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"}; + +/************************************ + * FUNCTIONS + ************************************/ + +char *trim(char *str) { + // Skip leading whitespace + while (isspace(*str)) { + str++; + } + + // If the string is all whitespace + if (*str == '\0') { + return str; + } + + // Skip trailing whitespace + char *end = str + strlen(str) - 1; + while (end > str && isspace(*end)) { + end--; + } + end[1] = '\0'; + + return str; +} + +bool containsString(char *str, const char *arr[], int arrSize) { + for (int i = 0; i < arrSize; i++) { + if (strcmp(str, arr[i]) == 0) { + return true; + } + } + return false; +} + +int lastIndexOfString(char *str, const char *arr[], int arrSize) { + for (int i = arrSize - 1; i >= 0; i--) { + if (strcmp(str, arr[i]) == 0) { + return i; + } + } + return -1; +} + +char *duplicateString(char *str) { + char *newStr = malloc(strlen(str) + 1); + strcpy(newStr, str); + return newStr; +} + +bool isRegister(char *str) { + SKIP_WHITESPACE(str); + if (str == NULL) + return false; + + if (containsString(str, SPECIAL_REGISTERS, 3)) + return true; + + return tolower(str[0]) == 'x' || tolower(str[0]) == 'w'; +} + +int getRegister(char *str) { + SKIP_WHITESPACE(str); + if (containsString(str, ZERO_REGISTER_ALIAS, 2)) { + return ZERO_REGISTER; + } + + return strtol(str + 1, NULL, 10); +} + +int getImmediate(char *str) { + SKIP_WHITESPACE(str); + if (strlen(str) < 2) { + return 0; + } + + if (str[0] != '#') + return 0; + + str++; // skip # + + if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 3) == 0) { + // Hex + return strtol(str + 2, NULL, 16); + } else { + // Decimal + return strtol(str, NULL, 10); + } + + return 0; +} + +int getRegisterType(char *str) { + SKIP_WHITESPACE(str); + + return tolower(str[0]) == 'x'; +} + + +/** @brief Translates an alias instruction into its target instruction. + * Note: This function modifies the input tokens array and the tokensCount. + * Assumes there is enough space in the tokens array to add the new tokens. + * + * @param opcode The opcode of the instruction. + * @param tokens The tokens of the instruction. + * @param tokensCount The number of tokens in the instruction. + */ +void translateAlias(char *opcode, char *tokens[], int *tokensCount) { + + int aliasIndex = lastIndexOfString(opcode, ALIAS_OPCODES, 9); + if (aliasIndex == -1) + return; + + // The instruction is one of the aliases, convert into the target. + char *targetOpcode = ALIAS_TARGET_OPCODES[aliasIndex]; + + // To correctly encode the zero register, which is either w31 or x31. + char *zeroReg = malloc(5 * sizeof(char)); + *zeroReg = *tokens[1]; + strcat(zeroReg, "31"); + + switch(aliasIndex) { + case 0: // cmp -> subs rzr, rn, + case 1: // cmn -> adds rzr, rn, + case 4: // tst -> ands rzr, rn, + // Convert from [instr] reg, to [instr] rzr, reg, + tokens[0] = targetOpcode; + tokens[4] = tokens[3]; + tokens[3] = tokens[2]; + tokens[2] = tokens[1]; + tokens[1] = zeroReg; + (*tokensCount)++; + break; + + case 2: // neg -> subs rd, rzr, + case 3: // negs -> subs rd, rzr, + case 5: // mvn -> orn rd, rzr, + case 6: // mov -> orr rd, rzr, rm + tokens[0] = targetOpcode; + tokens[4] = tokens[3]; + tokens[3] = tokens[2]; + tokens[2] = zeroReg; + (*tokensCount)++; + break; + + default: + // Note, the multiply instructions are handled separately. + // See DPReg parsing. + break; + } +} diff --git a/src/assembler/string_util.h b/src/assembler/string_util.h new file mode 100644 index 0000000..c9bca35 --- /dev/null +++ b/src/assembler/string_util.h @@ -0,0 +1,64 @@ +/** @file string_util.h + * @brief This file contains the implementation of some string processing + * utility functions used in the assembler. + * + * @author Saleh Bubshait + */ + +/** @brief Skips whitespace characters in a string. + * @param ptr A pointer to the string to skip whitespace in. + */ +#define SKIP_WHITESPACE(ptr) do { while (isspace(*ptr)) { ptr++; } } while (0) + +/** @brief Removes leading and trailing whitespace from a string. + * Note. This function modifies the input string. + * @param str The string to trim. + * @return A pointer to the first non-whitespace character in the string. + */ +char *trim(char *str); + +/** @brief Checks if a string is in an array of strings. + * + * @param str The string to check. + * @param arr The array of strings to check against. + * @param arrSize The size of the array. + * @return True if the string is in the array, false otherwise. + */ +bool containsString(char *str, const char *arr[], int arrSize); + +/** @brief Finds the last index of a string in an array of strings. + * Note: If multiple occurances of the string exist, the index of the last + * occurance is returned! + * + * @param str The string to find. + * @param arr The array of strings to search. + * @param arrSize The size of the array. + * @return The index of the last occurrence of the string in the array, or -1 if not found. + */ +int lastIndexOfString(char *str, const char *arr[], int arrSize); + +/** @brief Duplicates a string. + * Note: The caller is responsible for freeing the returned string. + * + * @param str The string to duplicate. + * @return A pointer to the duplicated string. + */ +char *duplicateString(char *str); + +/** @brief Checks if a string represents an ARMv8 register. + * A string is considered a register if it is: + * - A general purpose register (x0-x30 or w0-w30) + * - A special register (sp, xzr, wzr) + * + * @param str The string to check. + * @return True if the string is a register, false otherwise. + */ +bool isRegister(char *str); + +int getRegister(char *str); + +int getImmediate(char *str); + +int getRegisterType(char *str); + +void translateAlias(char *opcode, char *tokens[], int *tokensCount); diff --git a/src/assembler/symboltable.c b/src/assembler/symboltable.c new file mode 100644 index 0000000..9ccd3d6 --- /dev/null +++ b/src/assembler/symboltable.c @@ -0,0 +1,82 @@ +/** @file symboltable.c + * @brief An Abstract Data Type (ADT) for a symbol table, an array of + * label-address pairs. Labels are strings and addresses are unsigned integers. + * (uint32_t). The symbol table is implemented as a dynamic array. + * + * @author Saleh Bubshait + */ + +#include +#include +#include +#include "symboltable.h" + +symbol_table *st_init(void) { + symbol_table *st = malloc(sizeof(symbol_table)); + if (st == NULL) { + fprintf(stderr, "Failed to allocate memory for symbol table\n"); + exit(EXIT_FAILURE); + } + + st->table = malloc(INITIAL_CAPACITY * sizeof(symbol_table_map)); + if (st->table == NULL) { + fprintf(stderr, "Failed to allocate memory for table\n"); + exit(EXIT_FAILURE); + } + + st->size = 0; + st->capacity = INITIAL_CAPACITY; + + return st; +} + +/* Grows the symbol table by a factor of GROWTH_FACTOR *only if the table is full*. + */ +static void grow(symbol_table *st) { + if (st->size == st->capacity) { + st->capacity *= GROWTH_FACTOR; + st->table = realloc(st->table, st->capacity * sizeof(symbol_table_map)); + if (st->table == NULL) { + fprintf(stderr, "Failed to reallocate memory for table\n"); + exit(EXIT_FAILURE); + } + } +} + +void st_insert(symbol_table *st, char *label, address addr) { + // If full, grow the table + grow(st); + + // Insert the new entry to the end of the table + symbol_table_map *entry = &st->table[st->size]; + entry->label = label; + entry->address = addr; + + st->size++; +} + +bool st_contains(symbol_table *st, char *label) { + for (int i = 0; i < st->size; i++) { + if (strcmp(st->table[i].label, label) == 0) { + return true; + } + } + + return false; +} + +address st_get(symbol_table *st, char *label) { + for (int i = 0; i < st->size; i++) { + if (strcmp(st->table[i].label, label) == 0) { + return st->table[i].address; + } + } + + fprintf(stderr, "Label %s not found in symbol table\n", label); + exit(EXIT_FAILURE); +} + +void st_free(symbol_table *st) { + free(st->table); + free(st); +} diff --git a/src/assembler/symboltable.h b/src/assembler/symboltable.h new file mode 100644 index 0000000..12c99b8 --- /dev/null +++ b/src/assembler/symboltable.h @@ -0,0 +1,75 @@ +/** @file symboltable.h + * @brief An Abstract Data Type (ADT) for a symbol table, an array of + * label-address pairs. Labels are strings and addresses are unsigned integers. + * (uint32_t). The symbol table is implemented as a dynamic array. + * + * @author Saleh Bubshait + */ + +#ifndef __SYMBOLTABLE__ +#define __SYMBOLTABLE__ + +#include +#include +#include + +#define INITIAL_CAPACITY 5 +#define GROWTH_FACTOR 2 + +typedef uint32_t address; + +/** An entry in the symbol table, a label-address pair. + */ +typedef struct { + char *label; + address address; +} symbol_table_map; + +/** The symbol table ADT. + */ +typedef struct { + symbol_table_map* table; // entries + int size; // number of entries + int capacity; // size of the table. capacity >= size +} symbol_table; + +/** @brief Initializes a new symbol table. + * + * @return A pointer to the new symbol table. + */ +symbol_table *st_init(void); + +/** @brief Inserts a new label-address pair to the symbol table. + * Grows the table if it is full. If the label already exists in the table, + * another entry with the same label is inserted (for performance). + * + * @param st A pointer to the target symbol table. + * @param label The label to insert. + * @param addr The address to insert. + */ +void st_insert(symbol_table *st, char *label, address addr); + +/** @brief Checks if a label exists in the symbol table. + * + * @param st A pointer to the target symbol table. + * @param label The label to check. + * @return True if the label exists in the table, false otherwise. + */ +bool st_contains(symbol_table *st, char *label); + +/** @brief Gets the address of a label in the symbol table. + * st_contains should be called before calling this function! + * + * @param st A pointer to the target symbol table. + * @param label The label to get the address of. + * @return The address of the label in the table. + */ +address st_get(symbol_table *st, char *label); + +/** @brief Frees the memory allocated for the symbol table. + * + * @param st A pointer to the target symbol table. + */ +void st_free(symbol_table *st); + +#endif diff --git a/src/assembler/tokenise.c b/src/assembler/tokenise.c new file mode 100644 index 0000000..09a298c --- /dev/null +++ b/src/assembler/tokenise.c @@ -0,0 +1,106 @@ +/** @file tokenise.c + * @brief Functions to tokenise lines of assembly and operand strings. + * + * @author Saleh Bubshait + */ + +#include +#include +#include +#include +#include +#include +#include "tokenise.h" +#include "string_util.h" + +#define MAX_TOKEN_COUNT 6 +#define MAX_OPERAND_COUNT 5 +#define OPERAND_DELIMITER ", " +#define OPEN_BRACKET '[' +#define CLOSE_BRACKET ']' + +char **tokenise(char *line, int *numTokens) { + char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\ + if (!tokens) { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + + line = trim(line); + + *numTokens = 0; + char *token = strtok(line, " "); + assert(token != NULL); + + tokens[(*numTokens)++] = token; + + char *operandStart = strtok(NULL, ""); + if (operandStart == NULL) { + // No operands. Return the first (opcode) token. + return tokens; + } + + SKIP_WHITESPACE(operandStart); + + // Use tokeniseOperands to tokenise the operands + int operandTokensCount = 0; + char **operandTokens = tokeniseOperands(operandStart, &operandTokensCount); + + for (int i = 0; i < operandTokensCount; i++) { + tokens[(*numTokens)++] = operandTokens[i]; + } + + + free(operandTokens); + return tokens; +} + +char **tokeniseOperands(char *line, int *numTokens) { + char **tokens = malloc(MAX_OPERAND_COUNT * sizeof(char *)); + if (!tokens) { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + + SKIP_WHITESPACE(line); + + // Remove leading and trailing brackets if they exist + if (*line == OPEN_BRACKET) { + line++; // skip '[' + char *end = line + strlen(line) - 1; + while (end > line && *end != CLOSE_BRACKET) { + end--; + } + if (*end == CLOSE_BRACKET) { + *end = '\0'; + } + } + + line = trim(line); + + *numTokens = 0; + bool inBracket = false; + char *currentToken = line; + + for (char *c = line; *c != '\0'; ++c) { + if (*c == '[') { + inBracket = true; + } else if (*c == ']') { + inBracket = false; + } + + if (*c == ',' && !inBracket) { + *c = '\0'; + tokens[(*numTokens)++] = currentToken; + currentToken = c + 1; // skip the comma + SKIP_WHITESPACE(currentToken); + } + } + + if (*currentToken != '\0') { + tokens[*numTokens] = currentToken; + (*numTokens)++; + } + + return tokens; +} diff --git a/src/assembler/tokenise.h b/src/assembler/tokenise.h new file mode 100644 index 0000000..21ba317 --- /dev/null +++ b/src/assembler/tokenise.h @@ -0,0 +1,26 @@ +/** @file tokenise.h + * @brief Functions to tokenise lines of assembly and operand strings. + * + * @author Saleh Bubshait + */ + +/** @brief Tokenises a line of assembly code. The first two tokens are separated + * by a space, and the rest are separated by commas. + * e.g., "add x1, x2, x3" -> ["add", "x1", "x2", "x3"]. Handles and skips any + * whitespaces, e.g., " add x1, x2,#4 " -> ["add", "x1", "x2", "#4"]. + * @param line The line to tokenise. + * @param numTokens A pointer to an integer to store the number of tokens. + * @return An array of strings containing the tokens. + */ +char **tokenise(char *line, int *numTokens); + +/** @brief Tokenises the operands of an instruction. The operands are separated + * by commas. Handles and skips any whitespaces, e.g., "x1, x2, #4" -> ["x1", "x2", "#4"]. + * If the line starts with a bracket, it is removed and the closing bracket. + * Note. It also removes anything after the brackets, for example: + * "[x1, x2, #4]!" -> ["x1", "x2", "#4"]. + * @param line The line to tokenise. + * @param numTokens A pointer to an integer to store the number of tokens. + * @return An array of strings containing the tokens. + */ +char **tokeniseOperands(char *line, int *numTokens); diff --git a/src/util/binary_util.c b/src/util/binary_util.c index 68d8e38..f0cd588 100644 --- a/src/util/binary_util.c +++ b/src/util/binary_util.c @@ -7,6 +7,9 @@ #include #include "binary_util.h" +#include +#include +#include "binary_util.h" word getBits(word wrd, uint8_t lsb, uint8_t msb) { @@ -17,6 +20,23 @@ word getBits(word wrd, uint8_t lsb, uint8_t msb) { return wrd >> lsb; } +void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value) { + // Ensure LSB and MSB are within range of word size, and in the correct order + assert(lsb < msb && msb <= 32); + + // Create a mask with 1s in the range [lsb, msb) and 0s elsewhere + word mask = 0; + for (uint8_t i = lsb; i < msb; i++) { + mask |= 1 << i; + } + + // Clear the bits in the range [lsb, msb) in the word + *wrd &= ~mask; + + // Set the bits in the range [lsb, msb) to the value + *wrd |= (value << lsb) & mask; +} + dword max(dword a, dword b) { return a > b ? a : b; } diff --git a/src/util/binary_util.h b/src/util/binary_util.h index 2b9bc15..db95744 100644 --- a/src/util/binary_util.h +++ b/src/util/binary_util.h @@ -20,6 +20,17 @@ */ word getBits(word wrd, uint8_t lsb, uint8_t msb); +/** @brief Sets a range of bits of a word (32-bit unsigned integer) to a value. + * The range is inclusive of the lsb and exclusive of the msb. The value should + * fit within the range. + * + * @param wrd A pointer to the word to set bits in. + * @param lsb The least significant bit of the range to set, inclusive. + * @param msb The most significant bit of the range to set, exclusive. + * @param value The value to set the bits to. + */ +void setBits(word* wrd, uint8_t lsb, uint8_t msb, word value); + /** @brief Returns the maximum of two given two double words (uint64_t). * * @param a The first double word. diff --git a/src/util/fileio.c b/src/util/fileio.c index c427c09..e446176 100644 --- a/src/util/fileio.c +++ b/src/util/fileio.c @@ -11,6 +11,8 @@ #include "fileio.h" #include "../global.h" +#define MAX_ASM_LINE_LENGTH 300 + byte *fileio_loadBin(const char *filePath, size_t memorySize) { FILE *file = fopen(filePath, "rb"); if (file == NULL) { @@ -47,5 +49,87 @@ byte *fileio_loadBin(const char *filePath, size_t memorySize) { if (i < byteCount) { memset(fileData + i, 0, (byteCount - i) * sizeof(byte)); } + return fileData; } + +void writeBinaryFile(word instrs[], char outputFile[], int numInstrs) { + FILE *fp = fopen(outputFile, "wb"); + if (fp == NULL) { + fprintf(stderr, "Error: Could not open file %s\n", outputFile); + exit(EXIT_FAILURE); + } + + fwrite(instrs, sizeof(word), numInstrs, fp); + fclose(fp); +} + +int countLines(char *filename) { + FILE *file = fopen(filename, "r"); + if (file == NULL) { + fprintf(stderr, "Error: Could not read file %s\n", filename); + exit(EXIT_FAILURE); + } + + int count = 0; + char c; + char prevC = '\n'; + + while ((c = fgetc(file)) != EOF) { + if (c == '\n' && prevC != '\n') { + count++; + } + prevC = c; + } + + return count; +} + +char **readAssemblyFile(char filename[], int lineCount) { + FILE *fp = fopen(filename, "r"); + if (fp == NULL) { + fprintf(stderr, "Error: Could not read file %s\n", filename); + exit(EXIT_FAILURE); + } + + char **lines = malloc(sizeof(char *) * lineCount + 1); + if (lines == NULL) { + fprintf(stderr, "Error: Could not allocate memory to store the assembly lines"); + exit(EXIT_FAILURE); + } + + rewind(fp); // Back to the beginning of the file. + + char buffer[MAX_ASM_LINE_LENGTH]; + int currentLine = 0; + + while (fgets(buffer, MAX_ASM_LINE_LENGTH, fp) != NULL) { + if (buffer[strlen(buffer) - 1] != '\n') { + // It was actually longer than the maximum. + // NOTE: I believe this must mean that this is a malformed line, so throw an error. + fprintf(stderr, "Error: Line %d in the file %s is too long\n", currentLine, filename); + exit(EXIT_FAILURE); + } + + if (*buffer == '\n') { + // Skip empty lines. + continue; + } + + lines[currentLine] = malloc(strlen(buffer) + 1); + if (lines[currentLine] == NULL) { + fprintf(stderr, "Error: Could not allocate memory to store the assembly line"); + exit(EXIT_FAILURE); + } + + strcpy(lines[currentLine], buffer); + currentLine++; + } + + if (ferror(fp)) { + fprintf(stderr, "Error: Could not read file %s", filename); + exit(EXIT_FAILURE); + } + + return lines; +} diff --git a/src/util/fileio.h b/src/util/fileio.h index fc36a23..acaf515 100644 --- a/src/util/fileio.h +++ b/src/util/fileio.h @@ -7,6 +7,8 @@ #ifndef __FILEIO__ #define __FILEIO__ + +#include #include #include "../global.h" @@ -23,4 +25,34 @@ */ byte *fileio_loadBin(const char *filePath, size_t memorySize); +/** @brief Reads an assembly file line by line, storing each line in a char array. + * The number of lines in the file is determined by counting the number of newline + * characters in the file. + * + * @param filename The path to the assembly file to read. + * @param lineCount The number of lines in the file. + * @return An array of char arrays, each containing a line from the file. + * + * @see countLines + */ +char **readAssemblyFile(char filename[], int lineCount); + +/** @brief Writes an array of instructions, represented as unsigned int, to a + * binary file. The number of instructions to write is specified by numInstrs. + * + * @param instrs The array of instructions to write to the file. + * @param outputFile The path to the binary file to write to. + * @param numInstrs The number of instructions in the array. + * + * @see countLines + */ +void writeBinaryFile(word instrs[], char outputFile[], int numInstrs); + +/** @brief Counts the number of lines in a file. Empty lines are not counted. + * + * @param filename The path to the file to count the lines of. + * @return The number of lines in the file. + */ +int countLines(char *filename); + #endif