Restructure overall assembler. Add string_util and Docs
This commit is contained in:
parent
3501ac93aa
commit
6de1915dbe
@ -9,7 +9,7 @@ CFLAGS ?= -std=c17 -g\
|
|||||||
|
|
||||||
all: assemble
|
all: assemble
|
||||||
|
|
||||||
assemble: assemble.o parser.o fileio.o
|
assemble: assemble.o parser.o fileio.o tokeniser.o string_util.o
|
||||||
emulate: emulate.o
|
emulate: emulate.o
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|||||||
@ -1,3 +1,9 @@
|
|||||||
|
/** @file assemble.c
|
||||||
|
* @brief The main file for the ARMv8 assembler. Reads an assembly file and outputs the binary file.
|
||||||
|
*
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "a64instruction/a64instruction.h"
|
#include "a64instruction/a64instruction.h"
|
||||||
@ -31,11 +37,13 @@ int main(int argc, char **argv) {
|
|||||||
// Write the binary to the output file
|
// Write the binary to the output file
|
||||||
writeBinaryFile(binary, argv[2], lineCount);
|
writeBinaryFile(binary, argv[2], lineCount);
|
||||||
|
|
||||||
/* TODO: FREE MEMORY!! */
|
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** The first pass of the assembler. Creates the symbol table. Adds all labels
|
||||||
|
* and the address of the instruction following the label to the symbol table.
|
||||||
|
* Returns the final symbol table.
|
||||||
|
*/
|
||||||
static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) {
|
static symbol_table *firstPass(a64inst_instruction *instructions, int lineCount) {
|
||||||
symbol_table *table = st_init();
|
symbol_table *table = st_init();
|
||||||
int labelCount = 0;
|
int labelCount = 0;
|
||||||
|
|||||||
21
src/encode.c
21
src/encode.c
@ -1,3 +1,12 @@
|
|||||||
|
/** @file encode.c
|
||||||
|
* @brief A function to encode the internal representation of ARMv8
|
||||||
|
* instructions, a64inst_instruction, into binary.
|
||||||
|
*
|
||||||
|
* @author Ethan Dias Alberto
|
||||||
|
* @author George Niedringhaus
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "global.h"
|
#include "global.h"
|
||||||
#include "a64instruction/a64instruction.h"
|
#include "a64instruction/a64instruction.h"
|
||||||
@ -53,7 +62,7 @@ static int getLabelOffset(symbol_table* table, char* label, int currentIndex, in
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generates assembled code based on the two-pass assembly method
|
// Generates assembled code based on the two-pass assembly method
|
||||||
word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) {
|
static word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) {
|
||||||
word wrd = 0;
|
word wrd = 0;
|
||||||
|
|
||||||
switch (instr->data.BranchData.BranchType) {
|
switch (instr->data.BranchData.BranchType) {
|
||||||
@ -77,7 +86,7 @@ word encodeBranch(a64inst_instruction *instr, int index, symbol_table *st) {
|
|||||||
return wrd;
|
return wrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
word encodeDPImmediate(a64inst_instruction inst) {
|
static word encodeDPImmediate(a64inst_instruction inst) {
|
||||||
word wrd = 0;
|
word wrd = 0;
|
||||||
|
|
||||||
a64inst_DPImmediateData data = inst.data.DPImmediateData;
|
a64inst_DPImmediateData data = inst.data.DPImmediateData;
|
||||||
@ -104,7 +113,7 @@ word encodeDPImmediate(a64inst_instruction inst) {
|
|||||||
return wrd;
|
return wrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
word encodeDPRegister(a64inst_instruction inst) {
|
static word encodeDPRegister(a64inst_instruction inst) {
|
||||||
word wrd = 0;
|
word wrd = 0;
|
||||||
|
|
||||||
a64inst_DPRegisterData data = inst.data.DPRegisterData;
|
a64inst_DPRegisterData data = inst.data.DPRegisterData;
|
||||||
@ -139,7 +148,7 @@ word encodeDPRegister(a64inst_instruction inst) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
word encodeSingleDataTransfer(a64inst_instruction inst) {
|
static word encodeSingleDataTransfer(a64inst_instruction inst) {
|
||||||
word wrd = 0;
|
word wrd = 0;
|
||||||
|
|
||||||
a64inst_SingleTransferData data = inst.data.SingleTransferData;
|
a64inst_SingleTransferData data = inst.data.SingleTransferData;
|
||||||
@ -175,7 +184,7 @@ word encodeSingleDataTransfer(a64inst_instruction inst) {
|
|||||||
return wrd;
|
return wrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) {
|
static word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) {
|
||||||
word wrd = 0;
|
word wrd = 0;
|
||||||
|
|
||||||
a64inst_SingleTransferData data = cI.data.SingleTransferData;
|
a64inst_SingleTransferData data = cI.data.SingleTransferData;
|
||||||
@ -189,7 +198,7 @@ word encodeLoadLiteral(a64inst_instruction cI, int arrIndex, symbol_table *st) {
|
|||||||
return wrd;
|
return wrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) {
|
static word *encode(a64inst_instruction insts[], int instCount, symbol_table* st) {
|
||||||
word *arr = (word*)malloc(sizeof(word) * instCount);
|
word *arr = (word*)malloc(sizeof(word) * instCount);
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (int i = 0; i < instCount; i++) {
|
for (int i = 0; i < instCount; i++) {
|
||||||
|
|||||||
445
src/parser.c
445
src/parser.c
@ -1,24 +1,50 @@
|
|||||||
|
/** @file parser.c
|
||||||
|
* @brief Functions to parse ARMv8 assembly lines into an array of a special
|
||||||
|
* internal representation of instructions, a64inst_instruction.
|
||||||
|
* @author Ethan Dias Alberto
|
||||||
|
* @author George Niedringhaus
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
#include "a64instruction/a64instruction.h"
|
#include "a64instruction/a64instruction.h"
|
||||||
#include "global.h"
|
#include "global.h"
|
||||||
#include "tokeniser.c"
|
#include "tokeniser.h"
|
||||||
|
#include "string_util.h"
|
||||||
|
|
||||||
/** Prototypes */
|
/************************************
|
||||||
void parse_instruction(char asmLine[], a64inst_instruction *instr);
|
* STRUCTS
|
||||||
static char *duplicateString(char *str);
|
************************************/
|
||||||
void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands);
|
|
||||||
void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]);
|
typedef struct {
|
||||||
void calculateAddressFormat(a64inst_instruction *instr, char *operandList[], int numOperands);
|
int type;
|
||||||
void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount);
|
int immediate;
|
||||||
void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount);
|
} ShiftData;
|
||||||
void parseDirective(a64inst_instruction *inst, char *tokens[]);
|
|
||||||
|
/************************************
|
||||||
|
* PROTOTYPES
|
||||||
|
************************************/
|
||||||
|
|
||||||
|
static void parse_instruction(char asmLine[], a64inst_instruction *instr);
|
||||||
|
static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands);
|
||||||
|
static void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]);
|
||||||
|
static void parseAddressingMode(a64inst_instruction *instr, char *operandList[], int numOperands);
|
||||||
|
static void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount);
|
||||||
|
static void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount);
|
||||||
|
static void parseDirective(a64inst_instruction *inst, char *tokens[]);
|
||||||
|
static ShiftData *parseShift(char *shift);
|
||||||
|
static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount);
|
||||||
|
|
||||||
|
/************************************
|
||||||
|
* CONSTANTS
|
||||||
|
************************************/
|
||||||
|
|
||||||
/** Constants */
|
|
||||||
static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"};
|
static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"};
|
||||||
static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"};
|
static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"};
|
||||||
static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"};
|
static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"};
|
||||||
@ -26,9 +52,11 @@ static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"};
|
|||||||
static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"};
|
static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"};
|
||||||
static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"};
|
static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"};
|
||||||
static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"};
|
static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"};
|
||||||
static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"};
|
|
||||||
static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"};
|
|
||||||
static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"};
|
/************************************
|
||||||
|
* FUNCTIONS
|
||||||
|
************************************/
|
||||||
|
|
||||||
a64inst_instruction *parse(char **asmLines, int lineCount) {
|
a64inst_instruction *parse(char **asmLines, int lineCount) {
|
||||||
a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
|
a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
|
||||||
@ -42,176 +70,29 @@ a64inst_instruction *parse(char **asmLines, int lineCount) {
|
|||||||
return instructions;
|
return instructions;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *duplicateString(char *str) {
|
/** Parses a single ARMv8 assembly line into an a64inst_instruction.
|
||||||
char *newStr = malloc(strlen(str) + 1);
|
*/
|
||||||
strcpy(newStr, str);
|
static void parse_instruction(char asmLine[], a64inst_instruction *instr) {
|
||||||
return newStr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool isStringIn(char *str, const char *arr[], int arrSize) {
|
|
||||||
for (int i = 0; i < arrSize; i++) {
|
|
||||||
if (strcmp(str, arr[i]) == 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If more than one occurance, return the last index
|
|
||||||
static int indexStringIn(char *str, const char *arr[], int arrSize) {
|
|
||||||
for (int i = arrSize - 1; i >= 0; i--) {
|
|
||||||
if (strcmp(str, arr[i]) == 0) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int type;
|
|
||||||
int immediate;
|
|
||||||
} ShiftData;
|
|
||||||
|
|
||||||
static ShiftData *parseShift(char *shift) {
|
|
||||||
char buffer[100];
|
|
||||||
strcpy(buffer, shift);
|
|
||||||
char *shiftType = strtok(buffer, " ");
|
|
||||||
char *shiftAmount = strtok(NULL, " ");
|
|
||||||
ShiftData *data = malloc(sizeof(ShiftData));
|
|
||||||
data->type = indexStringIn(shiftType, SHIFT_TYPE_OPCODES, 4);
|
|
||||||
while (*shiftAmount == ' ' || *shiftAmount == '#') {
|
|
||||||
shiftAmount++;
|
|
||||||
}
|
|
||||||
data->immediate = atoi(shiftAmount);
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
int isOperandRegister(char regStartChar) {
|
|
||||||
return((regStartChar == 'x') || (regStartChar == 'w'));
|
|
||||||
}
|
|
||||||
|
|
||||||
int classifyDPInst(char *operandList[]){
|
|
||||||
return(isOperandRegister(operandList[1][0]) &&
|
|
||||||
isOperandRegister(operandList[2][0]) &&
|
|
||||||
isOperandRegister(operandList[3][0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
void classifyAlias(char *opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) {
|
|
||||||
|
|
||||||
int aliasIndex = indexStringIn(opcode, ALIAS_OPCODES, 9);
|
|
||||||
if (aliasIndex != -1) {
|
|
||||||
// The instruction is one of the aliases, convert into the target.
|
|
||||||
char *opcode = ALIAS_TARGET_OPCODES[aliasIndex];
|
|
||||||
|
|
||||||
// To correctly encode the zero register, which is either w31 or x31.
|
|
||||||
char *start_zeroReg = tokens[1];
|
|
||||||
while (isspace(*start_zeroReg)) start_zeroReg++;
|
|
||||||
char *zeroReg = malloc(5 * sizeof(char));
|
|
||||||
*zeroReg = *start_zeroReg;
|
|
||||||
strcat(zeroReg, "31");
|
|
||||||
|
|
||||||
switch(aliasIndex) {
|
|
||||||
case 0: // cmp -> subs rzr, rn, <op2>
|
|
||||||
case 1: // cmn -> adds rzr, rn, <op2>
|
|
||||||
case 4: // tst -> ands rzr, rn, <op2>
|
|
||||||
// Convert from [instr] REG, <op2> to [instr] RZR, REG, <op2>
|
|
||||||
tokens[0] = opcode;
|
|
||||||
tokens[4] = tokens[3];
|
|
||||||
tokens[3] = tokens[2];
|
|
||||||
tokens[2] = tokens[1];
|
|
||||||
tokens[1] = zeroReg;
|
|
||||||
(*tokensCount)++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 2: // neg -> subs rd, rzr, <op2>
|
|
||||||
case 3: // negs -> subs rd, rzr, <op2>
|
|
||||||
case 5: // mvn -> orn rd, rzr, <op2>
|
|
||||||
case 6: // mov -> orr rd, rzr, rm
|
|
||||||
tokens[0] = opcode;
|
|
||||||
tokens[4] = tokens[3];
|
|
||||||
tokens[3] = tokens[2];
|
|
||||||
tokens[2] = zeroReg;
|
|
||||||
(*tokensCount)++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount){
|
|
||||||
|
|
||||||
classifyAlias(opcode, instr, tokens, tokensCount);
|
|
||||||
|
|
||||||
if (isStringIn(opcode, BRANCH_OPCODES, 9)) {
|
|
||||||
instr->type = a64inst_BRANCH;
|
|
||||||
|
|
||||||
if (strcmp(opcode, "br") == 0) {
|
|
||||||
instr->data.BranchData.BranchType = a64inst_REGISTER;
|
|
||||||
} else if (strcmp(opcode, "b") == 0) {
|
|
||||||
instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
|
|
||||||
} else {
|
|
||||||
instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (isStringIn(opcode, SINGLE_TRANSFER_OPCODES, 2)) {
|
|
||||||
instr->type = a64inst_SINGLETRANSFER;
|
|
||||||
if (*tokens[2] == '[') {
|
|
||||||
instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
instr->type = a64inst_LOADLITERAL;
|
|
||||||
}
|
|
||||||
} else if (classifyDPInst(tokens)) {
|
|
||||||
instr->type = a64inst_DPREGISTER;
|
|
||||||
} else {
|
|
||||||
instr->type = a64inst_DPIMMEDIATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//takes inputted char array and returns the integer of the operand, skipping the first character
|
|
||||||
//e.g. for a passed "R32", it skips the 'R' and returns 32
|
|
||||||
int getOperandNumber(char *operand){
|
|
||||||
if (isStringIn(operand, ZERO_REGISTER_ALIAS, 2)) {
|
|
||||||
return ZERO_REGISTER;
|
|
||||||
}
|
|
||||||
|
|
||||||
char operandCpy[strlen(operand)];
|
|
||||||
strcpy(operandCpy, operand+1);
|
|
||||||
char **endptr = NULL;
|
|
||||||
int number;
|
|
||||||
if(strncmp(operandCpy, "0x", 2)==0){
|
|
||||||
//hex value
|
|
||||||
strcpy(operandCpy, operand+3);
|
|
||||||
number = strtol(operandCpy, endptr, 16);
|
|
||||||
} else if(operandCpy[0] == 'x'){
|
|
||||||
number = strtol(operandCpy+1, endptr, 16);
|
|
||||||
} else {
|
|
||||||
number = strtol(operandCpy, endptr, 10);
|
|
||||||
}
|
|
||||||
return number;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void parse_instruction(char asmLine[], a64inst_instruction *instr) {
|
|
||||||
if (instr == NULL){
|
if (instr == NULL){
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(strcmp(asmLine, HALT_ASM_CMD) == 0){
|
|
||||||
instr->type = a64inst_HALT;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *asmLineCopy = duplicateString(asmLine);
|
char *asmLineCopy = duplicateString(asmLine);
|
||||||
int tokensCount = 0;
|
int tokensCount = 0;
|
||||||
char **tokens = tokenise(asmLineCopy, &tokensCount);
|
char **tokens = tokenise(asmLineCopy, &tokensCount);
|
||||||
char *opcode = tokens[0];
|
char *opcode = tokens[0];
|
||||||
|
|
||||||
|
// Check if the instruction is the halt instruction, "and x0, x0, x0".
|
||||||
|
if (tokensCount == 4 && strcmp(opcode, "and") == 0
|
||||||
|
&& getRegister(tokens[1]) == 0
|
||||||
|
&& getRegister(tokens[2]) == 0
|
||||||
|
&& getRegister(tokens[3]) == 0) {
|
||||||
|
|
||||||
|
instr->type = a64inst_HALT;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if(strcmp(opcode, ".int") == 0){
|
if(strcmp(opcode, ".int") == 0){
|
||||||
// Directive
|
// Directive
|
||||||
instr->type = a64inst_DIRECTIVE;
|
instr->type = a64inst_DIRECTIVE;
|
||||||
@ -226,6 +107,8 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) {
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Instruction
|
// Instruction
|
||||||
|
|
||||||
|
// Classify the opcode into the correct instruction type.
|
||||||
classifyOpcode(opcode, instr, tokens, &tokensCount);
|
classifyOpcode(opcode, instr, tokens, &tokensCount);
|
||||||
|
|
||||||
switch(instr->type){
|
switch(instr->type){
|
||||||
@ -235,74 +118,32 @@ void parse_instruction(char asmLine[], a64inst_instruction *instr) {
|
|||||||
|
|
||||||
case a64inst_SINGLETRANSFER:
|
case a64inst_SINGLETRANSFER:
|
||||||
parseSingleTransfer(instr, opcode, tokens, tokensCount);
|
parseSingleTransfer(instr, opcode, tokens, tokensCount);
|
||||||
calculateAddressFormat(instr, tokens, tokensCount);
|
parseAddressingMode(instr, tokens, tokensCount);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case a64inst_LOADLITERAL:
|
case a64inst_LOADLITERAL:
|
||||||
parseSingleTransfer(instr, opcode, tokens, tokensCount);
|
parseSingleTransfer(instr, opcode, tokens, tokensCount);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case a64inst_DPREGISTER:
|
case a64inst_DPREGISTER:
|
||||||
//generate DP operands;
|
//generate DP operands;
|
||||||
parseDPRegister(instr, tokens, tokensCount);
|
parseDPRegister(instr, tokens, tokensCount);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case a64inst_DPIMMEDIATE:
|
case a64inst_DPIMMEDIATE:
|
||||||
parseDPImmediate(instr, tokens, tokensCount);
|
parseDPImmediate(instr, tokens, tokensCount);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
printf("Error: Invalid Instruction\n");
|
printf("Error: Invalid Instruction, '%s'\n", opcode);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: FREE MEMORY! */
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void parseDirective(a64inst_instruction *instr, char *tokens[]) {
|
||||||
|
|
||||||
|
|
||||||
void calculateAddressFormat(a64inst_instruction *instr, char *tokens[], int tokenCount) {
|
|
||||||
assert(*tokens[2] == '[');
|
|
||||||
|
|
||||||
int operandCount = 0;
|
|
||||||
char unsplitString[strlen(tokens[2])];
|
|
||||||
strcpy(unsplitString, tokens[2]);
|
|
||||||
char **operands = tokeniseOperands(tokens[2], &operandCount);
|
|
||||||
|
|
||||||
int baseRegister = getOperandNumber(operands[0]);
|
|
||||||
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister;
|
|
||||||
|
|
||||||
if (tokenCount >= 4) {
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(tokens[3]);
|
|
||||||
|
|
||||||
} else if(unsplitString[strlen(unsplitString)-1] == '!') {
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getOperandNumber(operands[1]);
|
|
||||||
|
|
||||||
} else if (operandCount == 1 || (!isOperandRegister(*operands[1]))) {
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
|
|
||||||
if(operandCount > 1){
|
|
||||||
int offset = getOperandNumber(operands[1]);
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
|
|
||||||
//NEED TO SCALE IMMEDIATE VALUE BASED ON REGISTER TYPE IN ASSEMBLER
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if((isOperandRegister(*operands[0]) == 1)
|
|
||||||
&& (isOperandRegister(*operands[1]) == 1)){
|
|
||||||
//register
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
|
|
||||||
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getOperandNumber(operands[1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int parseRegisterType(char *operand) {
|
|
||||||
return operand[0] == 'x';
|
|
||||||
}
|
|
||||||
|
|
||||||
void parseDirective(a64inst_instruction *instr, char *tokens[]) {
|
|
||||||
char *intValue = tokens[1];
|
char *intValue = tokens[1];
|
||||||
char *endptr;
|
char *endptr;
|
||||||
if(strncmp(intValue, "0x", 2) == 0) {
|
if(strncmp(intValue, "0x", 2) == 0) {
|
||||||
@ -314,27 +155,28 @@ void parseDirective(a64inst_instruction *instr, char *tokens[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) {
|
static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) {
|
||||||
|
|
||||||
switch(instr->type){
|
switch(instr->type){
|
||||||
case a64inst_SINGLETRANSFER:
|
case a64inst_SINGLETRANSFER:
|
||||||
instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]);
|
instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
|
||||||
instr->data.SingleTransferData.target = getOperandNumber(tokens[1]);
|
instr->data.SingleTransferData.target = getRegister(tokens[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case a64inst_LOADLITERAL:
|
case a64inst_LOADLITERAL:
|
||||||
instr->data.SingleTransferData.regType = parseRegisterType(tokens[1]);
|
instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
|
||||||
instr->data.SingleTransferData.target = getOperandNumber(tokens[1]);
|
instr->data.SingleTransferData.target = getRegister(tokens[1]);
|
||||||
|
|
||||||
if(*tokens[2] =='#'){
|
if(*tokens[2] =='#'){
|
||||||
//offset is immediate
|
//offset is immediate
|
||||||
int offset = getOperandNumber(tokens[1]);
|
instr->data.SingleTransferData.processOpData.loadLiteralData.offset = getImmediate(tokens[2]);;
|
||||||
instr->data.SingleTransferData.processOpData.loadLiteralData.offset = offset;
|
|
||||||
} else {
|
} else {
|
||||||
|
//offset is label
|
||||||
instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2];
|
instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2];
|
||||||
//offset is literal, use symbol table and calculate difference
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -350,7 +192,7 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[])
|
|||||||
instr->data.BranchData.processOpData.unconditionalData.label = operandList[1];
|
instr->data.BranchData.processOpData.unconditionalData.label = operandList[1];
|
||||||
break;
|
break;
|
||||||
case a64inst_REGISTER:
|
case a64inst_REGISTER:
|
||||||
instr->data.BranchData.processOpData.registerData.src = getOperandNumber(operandList[1]);
|
instr->data.BranchData.processOpData.registerData.src = getRegister(operandList[1]);
|
||||||
break;
|
break;
|
||||||
case a64inst_CONDITIONAL:
|
case a64inst_CONDITIONAL:
|
||||||
{
|
{
|
||||||
@ -381,13 +223,13 @@ void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[])
|
|||||||
|
|
||||||
void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) {
|
void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) {
|
||||||
a64inst_DPImmediateData *data = &inst->data.DPImmediateData;
|
a64inst_DPImmediateData *data = &inst->data.DPImmediateData;
|
||||||
data->dest = getOperandNumber(tokens[1]);
|
data->dest = getRegister(tokens[1]);
|
||||||
data->regType = parseRegisterType(tokens[1]);
|
data->regType = getRegisterType(tokens[1]);
|
||||||
|
|
||||||
if (isStringIn(tokens[0], WIDE_MOV_OPCODES, 4)) {
|
if (containsString(tokens[0], WIDE_MOV_OPCODES, 4)) {
|
||||||
data->DPIOpType = a64inst_DPI_WIDEMOV;
|
data->DPIOpType = a64inst_DPI_WIDEMOV;
|
||||||
data->processOp = indexStringIn(tokens[0], WIDE_MOV_OPCODES, 4);
|
data->processOp = lastIndexOfString(tokens[0], WIDE_MOV_OPCODES, 4);
|
||||||
data->processOpData.wideMovData.immediate = getOperandNumber(tokens[2]);
|
data->processOpData.wideMovData.immediate = getImmediate(tokens[2]);
|
||||||
if (tokensCount >= 4) {
|
if (tokensCount >= 4) {
|
||||||
ShiftData shData = *parseShift(tokens[3]);
|
ShiftData shData = *parseShift(tokens[3]);
|
||||||
data->processOpData.wideMovData.shiftScalar = shData.immediate;
|
data->processOpData.wideMovData.shiftScalar = shData.immediate;
|
||||||
@ -395,9 +237,9 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
data->DPIOpType = a64inst_DPI_ARITHM;
|
data->DPIOpType = a64inst_DPI_ARITHM;
|
||||||
data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4);
|
data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
|
||||||
data->processOpData.arithmData.src = getOperandNumber(tokens[2]);
|
data->processOpData.arithmData.src = getRegister(tokens[2]);
|
||||||
data->processOpData.arithmData.immediate = getOperandNumber(tokens[3]);
|
data->processOpData.arithmData.immediate = getImmediate(tokens[3]);
|
||||||
|
|
||||||
if (tokensCount >= 5) {
|
if (tokensCount >= 5) {
|
||||||
ShiftData shData = *parseShift(tokens[4]);
|
ShiftData shData = *parseShift(tokens[4]);
|
||||||
@ -411,16 +253,16 @@ void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount
|
|||||||
|
|
||||||
void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) {
|
void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) {
|
||||||
a64inst_DPRegisterData *data = &inst->data.DPRegisterData;
|
a64inst_DPRegisterData *data = &inst->data.DPRegisterData;
|
||||||
data->dest = getOperandNumber(tokens[1]);
|
data->dest = getRegister(tokens[1]);
|
||||||
data->regType = parseRegisterType(tokens[1]);
|
data->regType = getRegisterType(tokens[1]);
|
||||||
data->src1 = getOperandNumber(tokens[2]);
|
data->src1 = getRegister(tokens[2]);
|
||||||
data->src2 = getOperandNumber(tokens[3]);
|
data->src2 = getRegister(tokens[3]);
|
||||||
|
|
||||||
if (isStringIn(tokens[0], MULTIPLY_OPCODES, 4)) {
|
if (containsString(tokens[0], MULTIPLY_OPCODES, 4)) {
|
||||||
// Multiply
|
// Multiply
|
||||||
data->DPROpType = a64inst_DPR_MULTIPLY;
|
data->DPROpType = a64inst_DPR_MULTIPLY;
|
||||||
if (tokensCount >= 5) {
|
if (tokensCount >= 5) {
|
||||||
data->processOpData.multiplydata.summand = getOperandNumber(tokens[4]);
|
data->processOpData.multiplydata.summand = getRegister(tokens[4]);
|
||||||
data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0;
|
data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -432,21 +274,21 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount)
|
|||||||
// Arithmetic/Logic
|
// Arithmetic/Logic
|
||||||
data->DPROpType = a64inst_DPR_ARITHMLOGIC;
|
data->DPROpType = a64inst_DPR_ARITHMLOGIC;
|
||||||
|
|
||||||
if (isStringIn(tokens[0], ARITHMETIC_OPCODES, 4)) {
|
if (containsString(tokens[0], ARITHMETIC_OPCODES, 4)) {
|
||||||
// Arithmetic
|
// Arithmetic
|
||||||
data->processOp = indexStringIn(tokens[0], ARITHMETIC_OPCODES, 4);
|
data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
|
||||||
data->processOpData.arithmLogicData.type = 1;
|
data->processOpData.arithmLogicData.type = 1;
|
||||||
if(tokensCount == 5) {
|
if(tokensCount == 5) {
|
||||||
//has a shift
|
//has a shift
|
||||||
int numTokens = 0;
|
int numTokens = 0;
|
||||||
char **shiftOperands = tokenise(tokens[4], &numTokens);
|
char **shiftOperands = tokenise(tokens[4], &numTokens);
|
||||||
data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
|
data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
|
||||||
data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]);
|
data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Logic
|
// Logic
|
||||||
int opcodeCategory = indexStringIn(tokens[0], LOGIC_OPCODES, 8);
|
int opcodeCategory = lastIndexOfString(tokens[0], LOGIC_OPCODES, 8);
|
||||||
switch(opcodeCategory/2){
|
switch(opcodeCategory/2){
|
||||||
case 0:
|
case 0:
|
||||||
//and
|
//and
|
||||||
@ -489,9 +331,102 @@ void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount)
|
|||||||
//has a shift
|
//has a shift
|
||||||
int numTokens = 0;
|
int numTokens = 0;
|
||||||
char **shiftOperands = tokenise(tokens[4], &numTokens);
|
char **shiftOperands = tokenise(tokens[4], &numTokens);
|
||||||
data->processOpData.arithmLogicData.shiftType = indexStringIn(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
|
data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
|
||||||
data->processOpData.arithmLogicData.shiftAmount = getOperandNumber(shiftOperands[1]);
|
data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Classifies the given opcode into the correct instruction type.
|
||||||
|
* Modifies instr to reflect the classification.
|
||||||
|
*/
|
||||||
|
static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) {
|
||||||
|
|
||||||
|
// First, if the opcode is an alias, convert it to the target instruction.
|
||||||
|
translateAlias(opcode, tokens, tokensCount);
|
||||||
|
|
||||||
|
if (containsString(opcode, BRANCH_OPCODES, 9)) {
|
||||||
|
instr->type = a64inst_BRANCH;
|
||||||
|
|
||||||
|
if (strcmp(opcode, "br") == 0) {
|
||||||
|
instr->data.BranchData.BranchType = a64inst_REGISTER;
|
||||||
|
} else if (strcmp(opcode, "b") == 0) {
|
||||||
|
instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
|
||||||
|
} else {
|
||||||
|
instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (containsString(opcode, SINGLE_TRANSFER_OPCODES, 2)) {
|
||||||
|
instr->type = a64inst_SINGLETRANSFER;
|
||||||
|
if (*tokens[2] == '[') {
|
||||||
|
instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
instr->type = a64inst_LOADLITERAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DP Instruction.
|
||||||
|
// DP Register if the third operand is a register.
|
||||||
|
} else if (*tokensCount >= 4 && isRegister(tokens[3])) {
|
||||||
|
instr->type = a64inst_DPREGISTER;
|
||||||
|
} else {
|
||||||
|
instr->type = a64inst_DPIMMEDIATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Parses a shift string into a ShiftData struct.
|
||||||
|
*/
|
||||||
|
static ShiftData *parseShift(char *shift) {
|
||||||
|
char buffer[20];
|
||||||
|
strcpy(buffer, shift);
|
||||||
|
|
||||||
|
char *shiftType = strtok(buffer, " ");
|
||||||
|
char *shiftAmount = strtok(NULL, " ");
|
||||||
|
|
||||||
|
ShiftData *data = malloc(sizeof(ShiftData));
|
||||||
|
|
||||||
|
data->type = lastIndexOfString(shiftType, SHIFT_TYPE_OPCODES, 4);
|
||||||
|
|
||||||
|
SKIP_WHITESPACE(shiftAmount);
|
||||||
|
data->immediate = getImmediate(shiftAmount);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Parses the addressing mode of a single transfer instruction. (Not load literal)
|
||||||
|
*/
|
||||||
|
static void parseAddressingMode(a64inst_instruction *instr, char *tokens[], int tokenCount) {
|
||||||
|
assert(*tokens[2] == '[');
|
||||||
|
|
||||||
|
int operandCount = 0;
|
||||||
|
char *unsplitString = duplicateString(tokens[2]);
|
||||||
|
char **operands = tokeniseOperands(tokens[2], &operandCount);
|
||||||
|
|
||||||
|
int baseRegister = getRegister(operands[0]);
|
||||||
|
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister;
|
||||||
|
|
||||||
|
if (tokenCount >= 4) {
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(tokens[3]);
|
||||||
|
|
||||||
|
} else if(unsplitString[strlen(unsplitString)-1] == '!') {
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(operands[1]);
|
||||||
|
|
||||||
|
} else if (operandCount == 1 || (!isRegister(operands[1]))) {
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
|
||||||
|
if(operandCount > 1){
|
||||||
|
int offset = getImmediate(operands[1]);
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if((isRegister(operands[0]) == 1)
|
||||||
|
&& (isRegister(operands[1]) == 1)){
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
|
||||||
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getRegister(operands[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
17
src/parser.h
17
src/parser.h
@ -1,6 +1,17 @@
|
|||||||
|
/** @file parser.h
|
||||||
|
* @brief A function to parse ARMv8 assembly lines into an array of a special
|
||||||
|
* internal representation of instructions, a64inst_instruction.
|
||||||
|
*
|
||||||
|
* @author Ethan Dias Alberto
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
#include "a64instruction/a64instruction.h"
|
#include "a64instruction/a64instruction.h"
|
||||||
|
|
||||||
#define OPERAND_DELIMITER ", "
|
/** @brief Parses a list of ARMv8 assembly lines into an array of a64inst_instruction.
|
||||||
#define HALT_ASM_CMD "and x0, x0, x0\n"
|
*
|
||||||
|
* @param asmLines An array of strings, each string is an ARMv8 assembly line.
|
||||||
|
* @param lineCount The number of lines in the asmLines array.
|
||||||
|
* @return An array of a64inst_instruction representing the parsed instructions.
|
||||||
|
*/
|
||||||
a64inst_instruction *parse(char **asmLines, int lineCount);
|
a64inst_instruction *parse(char **asmLines, int lineCount);
|
||||||
|
|||||||
173
src/string_util.c
Normal file
173
src/string_util.c
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
/** @file string_util.c
|
||||||
|
* @brief This file contains the implementation of some string processing
|
||||||
|
* utility functions used in the assembler.
|
||||||
|
*
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "string_util.h"
|
||||||
|
#include "global.h"
|
||||||
|
|
||||||
|
/************************************
|
||||||
|
* CONSTANTS
|
||||||
|
************************************/
|
||||||
|
|
||||||
|
static const char *SPECIAL_REGISTERS[] = {"sp", "xzr", "wzr"};
|
||||||
|
static const char *ZERO_REGISTER_ALIAS[] = {"xzr", "wzr"};
|
||||||
|
static const char *ALIAS_OPCODES[] = {"cmp", "cmn", "neg", "negs", "tst", "mvn", "mov"};
|
||||||
|
static char *ALIAS_TARGET_OPCODES[] = {"subs", "adds", "sub", "subs", "ands", "orn", "orr"};
|
||||||
|
|
||||||
|
/************************************
|
||||||
|
* FUNCTIONS
|
||||||
|
************************************/
|
||||||
|
|
||||||
|
char *trim(char *str) {
|
||||||
|
// Skip leading whitespace
|
||||||
|
while (isspace(*str)) {
|
||||||
|
str++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the string is all whitespace
|
||||||
|
if (*str == '\0') {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip trailing whitespace
|
||||||
|
char *end = str + strlen(str) - 1;
|
||||||
|
while (end > str && isspace(*end)) {
|
||||||
|
end--;
|
||||||
|
}
|
||||||
|
end[1] = '\0';
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool containsString(char *str, const char *arr[], int arrSize) {
|
||||||
|
for (int i = 0; i < arrSize; i++) {
|
||||||
|
if (strcmp(str, arr[i]) == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lastIndexOfString(char *str, const char *arr[], int arrSize) {
|
||||||
|
for (int i = arrSize - 1; i >= 0; i--) {
|
||||||
|
if (strcmp(str, arr[i]) == 0) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *duplicateString(char *str) {
|
||||||
|
char *newStr = malloc(strlen(str) + 1);
|
||||||
|
strcpy(newStr, str);
|
||||||
|
return newStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isRegister(char *str) {
|
||||||
|
SKIP_WHITESPACE(str);
|
||||||
|
if (str == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (containsString(str, SPECIAL_REGISTERS, 3))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return tolower(str[0]) == 'x' || tolower(str[0]) == 'w';
|
||||||
|
}
|
||||||
|
|
||||||
|
int getRegister(char *str) {
|
||||||
|
SKIP_WHITESPACE(str);
|
||||||
|
if (containsString(str, ZERO_REGISTER_ALIAS, 2)) {
|
||||||
|
return ZERO_REGISTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
return strtol(str + 1, NULL, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getImmediate(char *str) {
|
||||||
|
SKIP_WHITESPACE(str);
|
||||||
|
if (strlen(str) < 2) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str[0] != '#')
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
str++; // skip #
|
||||||
|
|
||||||
|
if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 3) == 0) {
|
||||||
|
// Hex
|
||||||
|
return strtol(str + 2, NULL, 16);
|
||||||
|
} else {
|
||||||
|
// Decimal
|
||||||
|
return strtol(str, NULL, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getRegisterType(char *str) {
|
||||||
|
SKIP_WHITESPACE(str);
|
||||||
|
|
||||||
|
return tolower(str[0]) == 'x';
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** @brief Translates an alias instruction into its target instruction.
|
||||||
|
* Note: This function modifies the input tokens array and the tokensCount.
|
||||||
|
* Assumes there is enough space in the tokens array to add the new tokens.
|
||||||
|
*
|
||||||
|
* @param opcode The opcode of the instruction.
|
||||||
|
* @param tokens The tokens of the instruction.
|
||||||
|
* @param tokensCount The number of tokens in the instruction.
|
||||||
|
*/
|
||||||
|
void translateAlias(char *opcode, char *tokens[], int *tokensCount) {
|
||||||
|
|
||||||
|
int aliasIndex = lastIndexOfString(opcode, ALIAS_OPCODES, 9);
|
||||||
|
if (aliasIndex == -1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// The instruction is one of the aliases, convert into the target.
|
||||||
|
char *targetOpcode = ALIAS_TARGET_OPCODES[aliasIndex];
|
||||||
|
|
||||||
|
// To correctly encode the zero register, which is either w31 or x31.
|
||||||
|
char *zeroReg = malloc(5 * sizeof(char));
|
||||||
|
*zeroReg = *tokens[1];
|
||||||
|
strcat(zeroReg, "31");
|
||||||
|
|
||||||
|
switch(aliasIndex) {
|
||||||
|
case 0: // cmp -> subs rzr, rn, <op2>
|
||||||
|
case 1: // cmn -> adds rzr, rn, <op2>
|
||||||
|
case 4: // tst -> ands rzr, rn, <op2>
|
||||||
|
// Convert from [instr] reg, <op2> to [instr] rzr, reg, <op2>
|
||||||
|
tokens[0] = targetOpcode;
|
||||||
|
tokens[4] = tokens[3];
|
||||||
|
tokens[3] = tokens[2];
|
||||||
|
tokens[2] = tokens[1];
|
||||||
|
tokens[1] = zeroReg;
|
||||||
|
(*tokensCount)++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2: // neg -> subs rd, rzr, <op2>
|
||||||
|
case 3: // negs -> subs rd, rzr, <op2>
|
||||||
|
case 5: // mvn -> orn rd, rzr, <op2>
|
||||||
|
case 6: // mov -> orr rd, rzr, rm
|
||||||
|
tokens[0] = targetOpcode;
|
||||||
|
tokens[4] = tokens[3];
|
||||||
|
tokens[3] = tokens[2];
|
||||||
|
tokens[2] = zeroReg;
|
||||||
|
(*tokensCount)++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Note, the multiply instructions are handled separately.
|
||||||
|
// See DPReg parsing.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
64
src/string_util.h
Normal file
64
src/string_util.h
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
/** @file string_util.h
|
||||||
|
* @brief This file contains the implementation of some string processing
|
||||||
|
* utility functions used in the assembler.
|
||||||
|
*
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** @brief Skips whitespace characters in a string.
|
||||||
|
* @param ptr A pointer to the string to skip whitespace in.
|
||||||
|
*/
|
||||||
|
#define SKIP_WHITESPACE(ptr) do { while (isspace(*ptr)) { ptr++; } } while (0)
|
||||||
|
|
||||||
|
/** @brief Removes leading and trailing whitespace from a string.
|
||||||
|
* Note. This function modifies the input string.
|
||||||
|
* @param str The string to trim.
|
||||||
|
* @return A pointer to the first non-whitespace character in the string.
|
||||||
|
*/
|
||||||
|
char *trim(char *str);
|
||||||
|
|
||||||
|
/** @brief Checks if a string is in an array of strings.
|
||||||
|
*
|
||||||
|
* @param str The string to check.
|
||||||
|
* @param arr The array of strings to check against.
|
||||||
|
* @param arrSize The size of the array.
|
||||||
|
* @return True if the string is in the array, false otherwise.
|
||||||
|
*/
|
||||||
|
bool containsString(char *str, const char *arr[], int arrSize);
|
||||||
|
|
||||||
|
/** @brief Finds the last index of a string in an array of strings.
|
||||||
|
* Note: If multiple occurances of the string exist, the index of the last
|
||||||
|
* occurance is returned!
|
||||||
|
*
|
||||||
|
* @param str The string to find.
|
||||||
|
* @param arr The array of strings to search.
|
||||||
|
* @param arrSize The size of the array.
|
||||||
|
* @return The index of the last occurrence of the string in the array, or -1 if not found.
|
||||||
|
*/
|
||||||
|
int lastIndexOfString(char *str, const char *arr[], int arrSize);
|
||||||
|
|
||||||
|
/** @brief Duplicates a string.
|
||||||
|
* Note: The caller is responsible for freeing the returned string.
|
||||||
|
*
|
||||||
|
* @param str The string to duplicate.
|
||||||
|
* @return A pointer to the duplicated string.
|
||||||
|
*/
|
||||||
|
char *duplicateString(char *str);
|
||||||
|
|
||||||
|
/** @brief Checks if a string represents an ARMv8 register.
|
||||||
|
* A string is considered a register if it is:
|
||||||
|
* - A general purpose register (x0-x30 or w0-w30)
|
||||||
|
* - A special register (sp, xzr, wzr)
|
||||||
|
*
|
||||||
|
* @param str The string to check.
|
||||||
|
* @return True if the string is a register, false otherwise.
|
||||||
|
*/
|
||||||
|
bool isRegister(char *str);
|
||||||
|
|
||||||
|
int getRegister(char *str);
|
||||||
|
|
||||||
|
int getImmediate(char *str);
|
||||||
|
|
||||||
|
int getRegisterType(char *str);
|
||||||
|
|
||||||
|
void translateAlias(char *opcode, char *tokens[], int *tokensCount);
|
||||||
@ -1,3 +1,11 @@
|
|||||||
|
/** @file symboltable.c
|
||||||
|
* @brief An Abstract Data Type (ADT) for a symbol table, an array of
|
||||||
|
* label-address pairs. Labels are strings and addresses are unsigned integers.
|
||||||
|
* (uint32_t). The symbol table is implemented as a dynamic array.
|
||||||
|
*
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|||||||
@ -1,3 +1,11 @@
|
|||||||
|
/** @file symboltable.h
|
||||||
|
* @brief An Abstract Data Type (ADT) for a symbol table, an array of
|
||||||
|
* label-address pairs. Labels are strings and addresses are unsigned integers.
|
||||||
|
* (uint32_t). The symbol table is implemented as a dynamic array.
|
||||||
|
*
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -7,21 +15,56 @@
|
|||||||
|
|
||||||
typedef uint32_t address;
|
typedef uint32_t address;
|
||||||
|
|
||||||
|
/** An entry in the symbol table, a label-address pair.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *label;
|
char *label;
|
||||||
address address;
|
address address;
|
||||||
} symbol_table_map;
|
} symbol_table_map;
|
||||||
|
|
||||||
|
/** The symbol table ADT.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
symbol_table_map* table;
|
symbol_table_map* table; // entries
|
||||||
int size;
|
int size; // number of entries
|
||||||
int capacity;
|
int capacity; // size of the table. capacity >= size
|
||||||
} symbol_table;
|
} symbol_table;
|
||||||
|
|
||||||
|
/** @brief Initializes a new symbol table.
|
||||||
|
*
|
||||||
|
* @return A pointer to the new symbol table.
|
||||||
|
*/
|
||||||
symbol_table *st_init(void);
|
symbol_table *st_init(void);
|
||||||
|
|
||||||
|
/** @brief Inserts a new label-address pair to the symbol table.
|
||||||
|
* Grows the table if it is full. If the label already exists in the table,
|
||||||
|
* another entry with the same label is inserted (for performance).
|
||||||
|
*
|
||||||
|
* @param st A pointer to the target symbol table.
|
||||||
|
* @param label The label to insert.
|
||||||
|
* @param addr The address to insert.
|
||||||
|
*/
|
||||||
void st_insert(symbol_table *st, char *label, address addr);
|
void st_insert(symbol_table *st, char *label, address addr);
|
||||||
|
|
||||||
|
/** @brief Checks if a label exists in the symbol table.
|
||||||
|
*
|
||||||
|
* @param st A pointer to the target symbol table.
|
||||||
|
* @param label The label to check.
|
||||||
|
* @return True if the label exists in the table, false otherwise.
|
||||||
|
*/
|
||||||
bool st_contains(symbol_table *st, char *label);
|
bool st_contains(symbol_table *st, char *label);
|
||||||
|
|
||||||
|
/** @brief Gets the address of a label in the symbol table.
|
||||||
|
* st_contains should be called before calling this function!
|
||||||
|
*
|
||||||
|
* @param st A pointer to the target symbol table.
|
||||||
|
* @param label The label to get the address of.
|
||||||
|
* @return The address of the label in the table.
|
||||||
|
*/
|
||||||
address st_get(symbol_table *st, char *label);
|
address st_get(symbol_table *st, char *label);
|
||||||
|
|
||||||
|
/** @brief Frees the memory allocated for the symbol table.
|
||||||
|
*
|
||||||
|
* @param st A pointer to the target symbol table.
|
||||||
|
*/
|
||||||
|
void st_free(symbol_table *st);
|
||||||
|
|||||||
@ -1,33 +1,23 @@
|
|||||||
// Tokeniser.c
|
/** @file tokeniser.c
|
||||||
|
* @brief Functions to tokenise lines of assembly and operand strings.
|
||||||
|
*
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
#include "tokeniser.h"
|
||||||
|
#include "string_util.h"
|
||||||
|
|
||||||
#define MAX_TOKEN_COUNT 5
|
#define MAX_TOKEN_COUNT 6
|
||||||
#define MAX_OPERAND_COUNT 4
|
#define MAX_OPERAND_COUNT 5
|
||||||
#define OPERAND_DELIMITER ", "
|
#define OPERAND_DELIMITER ", "
|
||||||
|
#define OPEN_BRACKET '['
|
||||||
char *trim(char *str) {
|
#define CLOSE_BRACKET ']'
|
||||||
while (isspace(*str)) {
|
|
||||||
str++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*str == '\0') {
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *end = str + strlen(str) - 1;
|
|
||||||
while (end > str && isspace(*end)) {
|
|
||||||
end--;
|
|
||||||
}
|
|
||||||
|
|
||||||
end[1] = '\0';
|
|
||||||
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
char **tokenise(char *line, int *numTokens) {
|
char **tokenise(char *line, int *numTokens) {
|
||||||
char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\
|
char **tokens = malloc(MAX_TOKEN_COUNT * sizeof(char *));\
|
||||||
@ -46,36 +36,22 @@ char **tokenise(char *line, int *numTokens) {
|
|||||||
|
|
||||||
char *operandStart = strtok(NULL, "");
|
char *operandStart = strtok(NULL, "");
|
||||||
if (operandStart == NULL) {
|
if (operandStart == NULL) {
|
||||||
// No operands. Return the instruction token.
|
// No operands. Return the first (opcode) token.
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool inBracket = false;
|
SKIP_WHITESPACE(operandStart);
|
||||||
char *currentToken = operandStart;
|
|
||||||
|
|
||||||
for (char *c = operandStart; *c != '\0'; ++c) {
|
// Use tokeniseOperands to tokenise the operands
|
||||||
if (*c == '[' || *c == '{') {
|
int operandTokensCount = 0;
|
||||||
inBracket = true;
|
char **operandTokens = tokeniseOperands(operandStart, &operandTokensCount);
|
||||||
} else if (*c == ']' || *c == '}') {
|
|
||||||
inBracket = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
for (int i = 0; i < operandTokensCount; i++) {
|
||||||
if (*c == ',' && !inBracket) {
|
tokens[(*numTokens)++] = operandTokens[i];
|
||||||
*c = '\0';
|
|
||||||
tokens[(*numTokens)++] = currentToken;
|
|
||||||
currentToken = c + 1;
|
|
||||||
while (*currentToken == ' ') {
|
|
||||||
currentToken++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*currentToken != '\0') {
|
|
||||||
tokens[*numTokens] = currentToken;
|
|
||||||
(*numTokens)++;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
free(operandTokens);
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,42 +62,43 @@ char **tokeniseOperands(char *line, int *numTokens) {
|
|||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*line == '[') {
|
SKIP_WHITESPACE(line);
|
||||||
|
|
||||||
|
// Remove leading and trailing brackets if they exist
|
||||||
|
if (*line == OPEN_BRACKET) {
|
||||||
line++; // skip '['
|
line++; // skip '['
|
||||||
line[strlen(line) - 1] = '\0'; // remove ']'
|
char *end = line + strlen(line) - 1;
|
||||||
} else if (*line == '{') {
|
while (end > line && *end != CLOSE_BRACKET) {
|
||||||
line++; // skip '{'
|
end--;
|
||||||
line[strlen(line) - 1] = '\0'; // remove '}'
|
}
|
||||||
|
if (*end == CLOSE_BRACKET) {
|
||||||
|
*end = '\0';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
line = trim(line);
|
||||||
|
|
||||||
*numTokens = 0;
|
*numTokens = 0;
|
||||||
bool inBracket = false;
|
bool inBracket = false;
|
||||||
char *currentToken = line;
|
char *currentToken = line;
|
||||||
|
|
||||||
for (char *c = line; *c != '\0'; ++c) {
|
for (char *c = line; *c != '\0'; ++c) {
|
||||||
if (*c == '[' || *c == '{') {
|
if (*c == '[') {
|
||||||
inBracket = true;
|
inBracket = true;
|
||||||
} else if (*c == ']' || *c == '}') {
|
} else if (*c == ']') {
|
||||||
inBracket = false;
|
inBracket = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*c == ',' && !inBracket) {
|
if (*c == ',' && !inBracket) {
|
||||||
*c = '\0';
|
*c = '\0';
|
||||||
tokens[(*numTokens)++] = currentToken;
|
tokens[(*numTokens)++] = currentToken;
|
||||||
currentToken = c + 1;
|
currentToken = c + 1; // skip the comma
|
||||||
while (*currentToken == ' ') {
|
SKIP_WHITESPACE(currentToken);
|
||||||
currentToken++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*currentToken != '\0') {
|
if (*currentToken != '\0') {
|
||||||
tokens[*numTokens] = currentToken;
|
tokens[*numTokens] = currentToken;
|
||||||
|
|
||||||
if (tokens[*numTokens][strlen(tokens[*numTokens]) - 1] == '\n') {
|
|
||||||
tokens[*numTokens][strlen(tokens[*numTokens]) - 1] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
(*numTokens)++;
|
(*numTokens)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
26
src/tokeniser.h
Normal file
26
src/tokeniser.h
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
/** @file tokeniser.h
|
||||||
|
* @brief Functions to tokenise lines of assembly and operand strings.
|
||||||
|
*
|
||||||
|
* @author Saleh Bubshait
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** @brief Tokenises a line of assembly code. The first two tokens are separated
|
||||||
|
* by a space, and the rest are separated by commas.
|
||||||
|
* e.g., "add x1, x2, x3" -> ["add", "x1", "x2", "x3"]. Handles and skips any
|
||||||
|
* whitespaces, e.g., " add x1, x2,#4 " -> ["add", "x1", "x2", "#4"].
|
||||||
|
* @param line The line to tokenise.
|
||||||
|
* @param numTokens A pointer to an integer to store the number of tokens.
|
||||||
|
* @return An array of strings containing the tokens.
|
||||||
|
*/
|
||||||
|
char **tokenise(char *line, int *numTokens);
|
||||||
|
|
||||||
|
/** @brief Tokenises the operands of an instruction. The operands are separated
|
||||||
|
* by commas. Handles and skips any whitespaces, e.g., "x1, x2, #4" -> ["x1", "x2", "#4"].
|
||||||
|
* If the line starts with a bracket, it is removed and the closing bracket.
|
||||||
|
* Note. It also removes anything after the brackets, for example:
|
||||||
|
* "[x1, x2, #4]!" -> ["x1", "x2", "#4"].
|
||||||
|
* @param line The line to tokenise.
|
||||||
|
* @param numTokens A pointer to an integer to store the number of tokens.
|
||||||
|
* @return An array of strings containing the tokens.
|
||||||
|
*/
|
||||||
|
char **tokeniseOperands(char *line, int *numTokens);
|
||||||
Loading…
Reference in New Issue
Block a user