433 lines
16 KiB
C
433 lines
16 KiB
C
/** @file parser.c
|
|
* @brief Functions to parse ARMv8 assembly lines into an array of a special
|
|
* internal representation of instructions, a64inst_instruction.
|
|
* @author Ethan Dias Alberto
|
|
* @author George Niedringhaus
|
|
* @author Saleh Bubshait
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdbool.h>
|
|
#include "parser.h"
|
|
#include "a64instruction/a64instruction.h"
|
|
#include "global.h"
|
|
#include "tokeniser.h"
|
|
#include "string_util.h"
|
|
|
|
/************************************
|
|
* STRUCTS
|
|
************************************/
|
|
|
|
typedef struct {
|
|
int type;
|
|
int immediate;
|
|
} ShiftData;
|
|
|
|
/************************************
|
|
* PROTOTYPES
|
|
************************************/
|
|
|
|
static void parse_instruction(char asmLine[], a64inst_instruction *instr);
|
|
static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands);
|
|
static void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]);
|
|
static void parseAddressingMode(a64inst_instruction *instr, char *operandList[], int numOperands);
|
|
static void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount);
|
|
static void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount);
|
|
static void parseDirective(a64inst_instruction *inst, char *tokens[]);
|
|
static ShiftData *parseShift(char *shift);
|
|
static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount);
|
|
|
|
/************************************
|
|
* CONSTANTS
|
|
************************************/
|
|
|
|
static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"};
|
|
static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"};
|
|
static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"};
|
|
static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"};
|
|
static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"};
|
|
static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"};
|
|
static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"};
|
|
|
|
|
|
/************************************
|
|
* FUNCTIONS
|
|
************************************/
|
|
|
|
a64inst_instruction *parse(char **asmLines, int lineCount) {
|
|
a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount);
|
|
|
|
int i = 0;
|
|
while (asmLines[i] != NULL) {
|
|
parse_instruction(asmLines[i], &instructions[i]);
|
|
i++;
|
|
}
|
|
|
|
return instructions;
|
|
}
|
|
|
|
/** Parses a single ARMv8 assembly line into an a64inst_instruction.
|
|
*/
|
|
static void parse_instruction(char asmLine[], a64inst_instruction *instr) {
|
|
if (instr == NULL){
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
char *asmLineCopy = duplicateString(asmLine);
|
|
int tokensCount = 0;
|
|
char **tokens = tokenise(asmLineCopy, &tokensCount);
|
|
char *opcode = tokens[0];
|
|
|
|
// Check if the instruction is the halt instruction, "and x0, x0, x0".
|
|
if (tokensCount == 4 && strcmp(opcode, "and") == 0
|
|
&& getRegister(tokens[1]) == 0
|
|
&& getRegister(tokens[2]) == 0
|
|
&& getRegister(tokens[3]) == 0) {
|
|
|
|
instr->type = a64inst_HALT;
|
|
return;
|
|
}
|
|
|
|
|
|
if(strcmp(opcode, ".int") == 0){
|
|
// Directive
|
|
instr->type = a64inst_DIRECTIVE;
|
|
parseDirective(instr, tokens);
|
|
|
|
|
|
} else if(opcode[strlen(opcode)-1]== ':') {
|
|
// Label
|
|
instr->type = a64inst_LABEL;
|
|
opcode[strlen(opcode) - 1] = '\0'; // Remove the colon
|
|
instr->data.LabelData.label = opcode;
|
|
|
|
} else {
|
|
// Instruction
|
|
|
|
// Classify the opcode into the correct instruction type.
|
|
classifyOpcode(opcode, instr, tokens, &tokensCount);
|
|
|
|
switch(instr->type){
|
|
case a64inst_BRANCH:
|
|
parseBranch(instr, opcode, tokens);
|
|
break;
|
|
|
|
case a64inst_SINGLETRANSFER:
|
|
parseSingleTransfer(instr, opcode, tokens, tokensCount);
|
|
parseAddressingMode(instr, tokens, tokensCount);
|
|
break;
|
|
|
|
case a64inst_LOADLITERAL:
|
|
parseSingleTransfer(instr, opcode, tokens, tokensCount);
|
|
break;
|
|
|
|
case a64inst_DPREGISTER:
|
|
//generate DP operands;
|
|
parseDPRegister(instr, tokens, tokensCount);
|
|
break;
|
|
|
|
case a64inst_DPIMMEDIATE:
|
|
parseDPImmediate(instr, tokens, tokensCount);
|
|
break;
|
|
|
|
default:
|
|
printf("Error: Invalid Instruction, '%s'\n", opcode);
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
static void parseDirective(a64inst_instruction *instr, char *tokens[]) {
|
|
char *intValue = tokens[1];
|
|
char *endptr;
|
|
if(strncmp(intValue, "0x", 2) == 0) {
|
|
intValue += 2;
|
|
instr->data.DirectiveData.value = strtol(intValue, &endptr, 16);
|
|
} else {
|
|
instr->data.DirectiveData.value = strtol(tokens[1], &endptr, 10);
|
|
}
|
|
}
|
|
|
|
|
|
static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) {
|
|
|
|
switch(instr->type){
|
|
case a64inst_SINGLETRANSFER:
|
|
instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
|
|
instr->data.SingleTransferData.target = getRegister(tokens[1]);
|
|
break;
|
|
|
|
case a64inst_LOADLITERAL:
|
|
instr->data.SingleTransferData.regType = getRegisterType(tokens[1]);
|
|
instr->data.SingleTransferData.target = getRegister(tokens[1]);
|
|
|
|
if(*tokens[2] =='#'){
|
|
//offset is immediate
|
|
instr->data.SingleTransferData.processOpData.loadLiteralData.offset = getImmediate(tokens[2]);;
|
|
} else {
|
|
//offset is label
|
|
instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2];
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) {
|
|
switch(instr->data.BranchData.BranchType){
|
|
case a64inst_UNCONDITIONAL:
|
|
//define and sign extend immediate offset
|
|
//use symbol table
|
|
printf("unconditional");
|
|
instr->data.BranchData.processOpData.unconditionalData.label = operandList[1];
|
|
break;
|
|
case a64inst_REGISTER:
|
|
instr->data.BranchData.processOpData.registerData.src = getRegister(operandList[1]);
|
|
break;
|
|
case a64inst_CONDITIONAL:
|
|
{
|
|
char condition[strlen(opcode)+1];
|
|
strcpy(condition, opcode+2);
|
|
if(strcmp(condition, "eq")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = EQ;
|
|
} else if (strcmp(condition, "ne")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = NE;
|
|
} else if (strcmp(condition, "ge")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = GE;
|
|
} else if (strcmp(condition, "lt")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = LT;
|
|
} else if (strcmp(condition, "gt")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = GT;
|
|
} else if (strcmp(condition, "le")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = LE;
|
|
} else if (strcmp(condition, "al")==0){
|
|
instr->data.BranchData.processOpData.conditionalData.cond = AL;
|
|
}
|
|
instr->data.BranchData.processOpData.unconditionalData.label = operandList[1];
|
|
|
|
break;
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) {
|
|
a64inst_DPImmediateData *data = &inst->data.DPImmediateData;
|
|
data->dest = getRegister(tokens[1]);
|
|
data->regType = getRegisterType(tokens[1]);
|
|
|
|
if (containsString(tokens[0], WIDE_MOV_OPCODES, 4)) {
|
|
data->DPIOpType = a64inst_DPI_WIDEMOV;
|
|
data->processOp = lastIndexOfString(tokens[0], WIDE_MOV_OPCODES, 4);
|
|
data->processOpData.wideMovData.immediate = getImmediate(tokens[2]);
|
|
if (tokensCount >= 4) {
|
|
ShiftData shData = *parseShift(tokens[3]);
|
|
data->processOpData.wideMovData.shiftScalar = shData.immediate;
|
|
}
|
|
|
|
} else {
|
|
data->DPIOpType = a64inst_DPI_ARITHM;
|
|
data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
|
|
data->processOpData.arithmData.src = getRegister(tokens[2]);
|
|
data->processOpData.arithmData.immediate = getImmediate(tokens[3]);
|
|
|
|
if (tokensCount >= 5) {
|
|
ShiftData shData = *parseShift(tokens[4]);
|
|
if (shData.immediate > 0) {
|
|
data->processOpData.arithmData.shiftImmediate = true;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) {
|
|
a64inst_DPRegisterData *data = &inst->data.DPRegisterData;
|
|
data->dest = getRegister(tokens[1]);
|
|
data->regType = getRegisterType(tokens[1]);
|
|
data->src1 = getRegister(tokens[2]);
|
|
data->src2 = getRegister(tokens[3]);
|
|
|
|
if (containsString(tokens[0], MULTIPLY_OPCODES, 4)) {
|
|
// Multiply
|
|
data->DPROpType = a64inst_DPR_MULTIPLY;
|
|
if (tokensCount >= 5) {
|
|
data->processOpData.multiplydata.summand = getRegister(tokens[4]);
|
|
data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0;
|
|
}
|
|
else {
|
|
data->processOpData.multiplydata.summand = ZERO_REGISTER;
|
|
data->processOpData.multiplydata.negProd = strcmp(tokens[0], "mneg") == 0;
|
|
}
|
|
|
|
} else {
|
|
// Arithmetic/Logic
|
|
data->DPROpType = a64inst_DPR_ARITHMLOGIC;
|
|
|
|
if (containsString(tokens[0], ARITHMETIC_OPCODES, 4)) {
|
|
// Arithmetic
|
|
data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4);
|
|
data->processOpData.arithmLogicData.type = 1;
|
|
if(tokensCount == 5) {
|
|
//has a shift
|
|
int numTokens = 0;
|
|
char **shiftOperands = tokenise(tokens[4], &numTokens);
|
|
data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
|
|
data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
|
|
}
|
|
|
|
} else {
|
|
// Logic
|
|
int opcodeCategory = lastIndexOfString(tokens[0], LOGIC_OPCODES, 8);
|
|
switch(opcodeCategory/2){
|
|
case 0:
|
|
//and
|
|
if((tokens[0][strlen(tokens[0])-1]) == 's'){
|
|
data->processOp = 3;
|
|
} else {
|
|
data->processOp = 0;
|
|
}
|
|
data->processOpData.arithmLogicData.negShiftedSrc2 = 0;
|
|
break;
|
|
case 1:
|
|
//negated AND
|
|
if((tokens[0][strlen(tokens[0])-1]) == 's'){
|
|
data->processOp = 3;
|
|
} else {
|
|
data->processOp = 0;
|
|
}
|
|
data->processOpData.arithmLogicData.negShiftedSrc2 = 1;
|
|
break;
|
|
case 2:
|
|
//XOR
|
|
data->processOp = 2;
|
|
if(opcodeCategory==4){
|
|
data->processOpData.arithmLogicData.negShiftedSrc2 = 0;
|
|
} else {
|
|
data->processOpData.arithmLogicData.negShiftedSrc2 = 1;
|
|
}
|
|
break;
|
|
case 3:
|
|
//OR
|
|
data->processOp = 1;
|
|
if(opcodeCategory==6){
|
|
data->processOpData.arithmLogicData.negShiftedSrc2 = 0;
|
|
} else {
|
|
data->processOpData.arithmLogicData.negShiftedSrc2 = 1;
|
|
}
|
|
break;
|
|
}
|
|
if(tokensCount == 5) {
|
|
//has a shift
|
|
int numTokens = 0;
|
|
char **shiftOperands = tokenise(tokens[4], &numTokens);
|
|
data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4);
|
|
data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Classifies the given opcode into the correct instruction type.
|
|
* Modifies instr to reflect the classification.
|
|
*/
|
|
static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) {
|
|
|
|
// First, if the opcode is an alias, convert it to the target instruction.
|
|
translateAlias(opcode, tokens, tokensCount);
|
|
|
|
if (containsString(opcode, BRANCH_OPCODES, 9)) {
|
|
instr->type = a64inst_BRANCH;
|
|
|
|
if (strcmp(opcode, "br") == 0) {
|
|
instr->data.BranchData.BranchType = a64inst_REGISTER;
|
|
} else if (strcmp(opcode, "b") == 0) {
|
|
instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL;
|
|
} else {
|
|
instr->data.BranchData.BranchType = a64inst_CONDITIONAL;
|
|
}
|
|
|
|
} else if (containsString(opcode, SINGLE_TRANSFER_OPCODES, 2)) {
|
|
instr->type = a64inst_SINGLETRANSFER;
|
|
if (*tokens[2] == '[') {
|
|
instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER;
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0;
|
|
|
|
} else {
|
|
instr->type = a64inst_LOADLITERAL;
|
|
}
|
|
|
|
// DP Instruction.
|
|
// DP Register if the third operand is a register.
|
|
} else if (*tokensCount >= 4 && isRegister(tokens[3])) {
|
|
instr->type = a64inst_DPREGISTER;
|
|
} else {
|
|
instr->type = a64inst_DPIMMEDIATE;
|
|
}
|
|
|
|
}
|
|
|
|
/** Parses a shift string into a ShiftData struct.
|
|
*/
|
|
static ShiftData *parseShift(char *shift) {
|
|
char buffer[20];
|
|
strcpy(buffer, shift);
|
|
|
|
char *shiftType = strtok(buffer, " ");
|
|
char *shiftAmount = strtok(NULL, " ");
|
|
|
|
ShiftData *data = malloc(sizeof(ShiftData));
|
|
|
|
data->type = lastIndexOfString(shiftType, SHIFT_TYPE_OPCODES, 4);
|
|
|
|
SKIP_WHITESPACE(shiftAmount);
|
|
data->immediate = getImmediate(shiftAmount);
|
|
return data;
|
|
}
|
|
|
|
/** Parses the addressing mode of a single transfer instruction. (Not load literal)
|
|
*/
|
|
static void parseAddressingMode(a64inst_instruction *instr, char *tokens[], int tokenCount) {
|
|
assert(*tokens[2] == '[');
|
|
|
|
int operandCount = 0;
|
|
char *unsplitString = duplicateString(tokens[2]);
|
|
char **operands = tokeniseOperands(tokens[2], &operandCount);
|
|
|
|
int baseRegister = getRegister(operands[0]);
|
|
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister;
|
|
|
|
if (tokenCount >= 4) {
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED;
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(tokens[3]);
|
|
|
|
} else if(unsplitString[strlen(unsplitString)-1] == '!') {
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED;
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(operands[1]);
|
|
|
|
} else if (operandCount == 1 || (!isRegister(operands[1]))) {
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET;
|
|
if(operandCount > 1){
|
|
int offset = getImmediate(operands[1]);
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8;
|
|
}
|
|
} else {
|
|
if((isRegister(operands[0]) == 1)
|
|
&& (isRegister(operands[1]) == 1)){
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET;
|
|
instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getRegister(operands[1]);
|
|
}
|
|
}
|
|
}
|