diff --git a/src/a64instruction.h b/src/a64instruction.h new file mode 100644 index 0000000..463e75a --- /dev/null +++ b/src/a64instruction.h @@ -0,0 +1,28 @@ +#ifndef __A64INSTRUCTION__ +#define __A64INSTRUCTION__ +#include "a64instruction_DPImmediate.h" +#include "a64instruction_Branch.h" +#include "a64instruction_SingleTransfer.h" + +// Define the types of instructions in subset of the AArch64 Instruction Set implemented. +// Each type is defined by the format of the instruction's operand(s). +typedef enum { + a64inst_DPIMMEDIATE, + a64inst_DPREGISTER, + a64inst_SINGLETRANSFER, + a64inst_LOADLITERAL, + a64inst_BRANCH, + a64inst_HALT +} a64inst_type; + +// Structure the holds the type and operand data of an instruction +typedef struct { + a64inst_type type; + union { + a64inst_DPImmediateData DPImmediateData; + a64inst_BranchData BranchData; + a64inst_SingleTransferData SingleTransferData; + } data; +} a64inst_instruction; + +#endif diff --git a/src/a64instruction_Branch.h b/src/a64instruction_Branch.h new file mode 100644 index 0000000..1681768 --- /dev/null +++ b/src/a64instruction_Branch.h @@ -0,0 +1,41 @@ +#include +#include "a64instruction_global.h" +#include "global.h" + +typedef enum { + a64inst_UNCONDITIONAL = 0, + a64inst_REGISTER = 1, + a64inst_CONDITIONAL = 2 +} a64inst_BranchType; + +typedef struct { + word unconditionalOffset; +} a64inst_Branch_UnconditionalData; + +typedef struct { + a64inst_regSpecifier src; +} a64inst_Branch_RegisterData; + +typedef enum { + EQ = 0, // Equal + NE = 1, // Not Equal + GE = 10, // Signed greater or equal + LT = 11, // Signed less than + GT = 12, // Signed greater than + LE = 13, // signed less than or equal + AL = 14 // Always +} a64inst_ConditionType; //a64inst_Branch_ConditionType? + +typedef struct { + a64inst_ConditionType cond; + word offset; +} a64inst_Branch_ConditionalData; + +typedef struct { + a64inst_BranchType BranchType; + union { + a64inst_Branch_UnconditionalData unconditionalData; + a64inst_Branch_RegisterData registerData; + a64inst_Branch_ConditionalData conditionalData; + } processOpData; +} a64inst_BranchData; diff --git a/src/a64instruction_DP.h b/src/a64instruction_DP.h new file mode 100644 index 0000000..113f589 --- /dev/null +++ b/src/a64instruction_DP.h @@ -0,0 +1,7 @@ +// Denotes the type of arithmetic operations supported by the architecture +typedef enum { + a64inst_ADD = 0, + a64inst_ADDS = 1, + a64inst_SUB = 2, + a64inst_SUBS = 3 +} a64inst_arithmOp; diff --git a/src/a64instruction_DPImmediate.h b/src/a64instruction_DPImmediate.h new file mode 100644 index 0000000..8b5e68c --- /dev/null +++ b/src/a64instruction_DPImmediate.h @@ -0,0 +1,42 @@ +#include +#include "a64instruction_global.h" +#include "a64instruction_DP.h" + +// Denotes the type of data processing operation +typedef enum { + a64inst_DPI_ARITHM, + a64inst_DPI_WIDEMOV +} a64inst_DPIOpType; + +// Denotes the type of wide move operations supported by the architecture +typedef enum { + a64inst_MOVN = 0, + a64inst_UNDEFINED = 1, + a64inst_MOVZ = 2, + a64inst_MOVK = 3 +} a64inst_wideMovOp; + +// Holds data specific to arithmetic immediate data processing instructions +typedef struct { + bool shiftImmediate; + uint16_t immediate; + a64inst_regSpecifier src; +} a64inst_DPImmediate_ArithmData; + +// Holds data specific to wide move immediate data processing instructions +typedef struct { + uint8_t shiftScalar; + uint16_t immediate; +} a64inst_DPImmediate_WideMovData; + +// Holds data for immediate data processing instructions +typedef struct { + a64inst_regType regType; + a64inst_DPIOpType DPIOpType; + unsigned int processOp; + union { + a64inst_DPImmediate_ArithmData arithmData; + a64inst_DPImmediate_WideMovData wideMovData; + } processOpData; + a64inst_regSpecifier dest; +} a64inst_DPImmediateData; diff --git a/src/a64instruction_DPRegister.h b/src/a64instruction_DPRegister.h new file mode 100644 index 0000000..d0252ee --- /dev/null +++ b/src/a64instruction_DPRegister.h @@ -0,0 +1,58 @@ +#include +#include "a64instruction_global.h" +#include "a64instruction_DP.h" + +// Denotes the type of data processing operation +typedef enum { + a64inst_DPR_ARITHMLOGIC, + a64inst_DPR_MULTIPLY +} a64inst_DPROpType; + +// Denotes the logical operations supported by the architecture +typedef enum { + a64inst_AND = 0, + a64inst_OR = 1, + a64inst_XOR = 2, + a64inst_AND_FLAGGED = 3 +} a64inst_logicOp; + +// Denotes the different kinds of shifts supported by the architecture +typedef enum { + a64inst_LSL = 0, + a64inst_LSR = 1, + a64inst_ASR = 2, + a64inst_ROR = 3 +} a64inst_ShiftType; + +// Holds data specific to arithmetic/logic register data processing instructions +typedef struct { + enum { + a64inst_DPR_ARITHM = 0, + a64inst_DPR_LOGIC = 1 + } type; + a64inst_ShiftType shiftType; + bool negShiftedSrc2; // Guaranteed to be 0 for arithmetic instructions +} a64inst_DPRegister_ArithmLogicData; + +// Holds data specific to multiply register data processing instructions +typedef struct { + bool negProd; + a64inst_regSpecifier summand; +} a64inst_DPRegister_MultiplyData; + +// Holds data for register data processing instructions +typedef struct { + a64inst_regType regType; + a64inst_DPROpType DPROpType; + union { + a64inst_logicOp logicOp; + a64inst_arithmOp arithmOp; + } processOpId; + a64inst_regSpecifier src2; + union { + a64inst_DPRegister_ArithmLogicData arithmLogicData; + a64inst_DPRegister_MultiplyData multiplydata; + } processOpData; + a64inst_regSpecifier src1; + a64inst_regSpecifier dest; +} a64inst_DPRegisterData; diff --git a/src/a64instruction_SingleTransfer.h b/src/a64instruction_SingleTransfer.h new file mode 100644 index 0000000..f661116 --- /dev/null +++ b/src/a64instruction_SingleTransfer.h @@ -0,0 +1,47 @@ +#include +#include "a64instruction_global.h" +#include "global.h" + +typedef enum { + a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER = 1, + a64inst_SINGLE_TRANSFER_LOAD_LITERAL = 0 +} a64inst_SingleTransferType; + +typedef enum { + a64inst_STORE, + a64inst_LOAD +} a64inst_TransferType; + +typedef enum { + a64inst_REGISTER_OFFSET = 2, + a64inst_PRE_INDEXED = 1, + a64inst_POST_INDEXED = 0, + a64inst_UNSIGNED_OFFSET = 3 +} a64inst_AddressingMode; + +typedef struct { + a64inst_TransferType transferType; + a64inst_AddressingMode addressingMode; + + union { + a64inst_regSpecifier offsetReg; + uint16_t indexedOffset; + uint16_t unsignedOffset; + } a64inst_addressingModeData; + + a64inst_regSpecifier base; +} a64inst_SingleDataTransferData; + +typedef struct { + uint32_t offset; +} a64inst_LoadLiteralData; + +typedef struct { + a64inst_SingleTransferType SingleTransferOpType; + a64inst_regType regType; + a64inst_regSpecifier target; + union { + a64inst_SingleDataTransferData singleDataTransferData; + a64inst_LoadLiteralData loadLiteralData; + } processOpData; +} a64inst_SingleTransferData; diff --git a/src/a64instruction_global.h b/src/a64instruction_global.h new file mode 100644 index 0000000..489fe06 --- /dev/null +++ b/src/a64instruction_global.h @@ -0,0 +1,14 @@ +#ifndef __A64INSTRUCTION_GLOBAL__ +#define __A64INSTRUCTION_GLOBAL__ +#include + +// Specifies the register being referred to +typedef uint8_t a64inst_regSpecifier; + +// Denotes the type of register being referred to +typedef enum { + a64inst_W = 0, + a64inst_R = 1 +} a64inst_regType; + +#endif diff --git a/src/fileio.c b/src/fileio.c new file mode 100644 index 0000000..45b25e8 --- /dev/null +++ b/src/fileio.c @@ -0,0 +1,62 @@ +#include +#include + +//validates inputted charlist as valid filename against expected extension +int isValidFileFormat(char filename[], char expectedExtension[]){ + int *pointLoc = strrchr(filename, '.'); + + if(pointLoc != NULL){ + if(strcmp(pointLoc, expectedExtension)==0){ + return(1); + } + } + return(0); +} + +//writes a list of words (list of binary instructions) to a named output file +int writeBinaryFile(word instrs[], char outputFile[]){ + + if (!isValidFileFormat(filename, "bin")){ + exit(EXIT_FAILURE); + } + + FILE *fp; + + fp = fopen(outputFile, "wb"); + + if(fp == NULL){ + exit(EXIT_FAILURE); + } + + fwrite(instrs, sizeof(word), sizeof(instrs), fp); + fclose(fp); + + exit(EXIT_SUCCESS); +} + + +//reads assembly file of "inputFile" name, and passes +//each line into a parser +//TODO: allocate whole file in memory, line-by-line +int readAssemblyFile(char inputFile[]) { + if (!isValidFileFormat(filename, "s")){ + exit(EXIT_FAILURE); + } + + FILE *fp; + char savedLine[sizeof(a64inst_instruction)]; + + fp = fopen(inputFile, "r"); + + if(fp == NULL){ + exit(EXIT_FAILURE); + } + + while (fgets(savedLine, MAX_ASM_LINE_LENGTH-1, fp) != NULL) { + // removes newline char before saving them + savedLine[strcspn(savedLine, "\n")] = 0; + + } + + exit(EXIT_SUCCESS); +} \ No newline at end of file diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..9f5f526 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,63 @@ +#include +#include +#include "parser.h" + +#include "a64instruction.h" + +//takes input string, read from asm file and returns +//input as an a64 instruction + +//TODO: +// - use string matching to get opcode, and operands (DONE) +// - check operand count (DONE) +// - match opcode to a64 struct types +// - count operands and match type/values +// - generate final a64inst and return + +//takes string of operands, and reference to operandcounter +//takes input of result array +//outputs array of operands +void tokeniseOperands(char* str, int operandCount, char *operands[]){ + char *operandsDupe = strdup(str); + int operandCount = 0; + char *operand = strtok(operandsDupe, OPERAND_DELIMITER); + operands[0] = operand; + + while (operand != NULL){ + operandCount++; + operand = strtok(NULL, OPERAND_DELIMITER); + operands[operandCount] = operand; + } +} + +//takes inputted assembly line and returns a +//pointer to an abstract representation of the instruction +a64inst_instruction *parser(char asmLine[]){ + a64inst_instruction *instr = malloc(sizeof(a64inst_instruction)); + if (instr == NULL){ + exit(EXIT_FAILURE); + } + + //"opcode operand1, {operand2}, ..." + //duplicated as strtok modifies the input string + char *stringptr = strdup(asmLine); + + char *opcode = strtok(stringptr, " "); + char *operands = strtok(NULL, ""); + + if(opcode[0]=="."){ + //type is directive + } else if(opcode[strlen(opcode)-1]==":") { + //type is label + } else { + //type is instruction + int operandCount = 0; + const char *operandList[4]; + tokeniseOperands(operands, &operandCount, operandList); + + } + + return(a64inst_instruction); + +} + diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..5542aca --- /dev/null +++ b/src/parser.h @@ -0,0 +1 @@ +#define OPERAND_DELIMITER ", " \ No newline at end of file