/** @file parser.c * @brief Functions to parse ARMv8 assembly lines into an array of a special * internal representation of instructions, a64inst_instruction. * @author Ethan Dias Alberto * @author George Niedringhaus * @author Saleh Bubshait */ #include #include #include #include #include #include #include "parser.h" #include "a64instruction/a64instruction.h" #include "global.h" #include "tokeniser.h" #include "string_util.h" /************************************ * STRUCTS ************************************/ typedef struct { int type; int immediate; } ShiftData; /************************************ * PROTOTYPES ************************************/ static void parse_instruction(char asmLine[], a64inst_instruction *instr); static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *operandList[], int numOperands); static void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]); static void parseAddressingMode(a64inst_instruction *instr, char *operandList[], int numOperands); static void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount); static void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount); static void parseDirective(a64inst_instruction *inst, char *tokens[]); static ShiftData *parseShift(char *shift); static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount); /************************************ * CONSTANTS ************************************/ static const char *BRANCH_OPCODES[] = {"b", "br", "b.eq", "b.ne", "b.ge", "b.lt", "b.gt", "b.le", "b.al"}; static const char *SINGLE_TRANSFER_OPCODES[] = {"ldr", "str"}; static const char *WIDE_MOV_OPCODES[] = {"movn", "movz", "movz", "movk"}; static const char *ARITHMETIC_OPCODES[] = {"add", "adds", "sub", "subs"}; static const char *MULTIPLY_OPCODES[] = {"mul", "madd", "msub", "mneg"}; static const char *SHIFT_TYPE_OPCODES[] = {"lsl", "lsr", "asr", "ror"}; static const char *LOGIC_OPCODES[] = {"and", "ands", "bic", "bics", "eor", "eon", "orr", "orn"}; /************************************ * FUNCTIONS ************************************/ a64inst_instruction *parse(char **asmLines, int lineCount) { a64inst_instruction *instructions = malloc(sizeof(a64inst_instruction) * lineCount); int i = 0; while (asmLines[i] != NULL) { parse_instruction(asmLines[i], &instructions[i]); i++; } return instructions; } /** Parses a single ARMv8 assembly line into an a64inst_instruction. */ static void parse_instruction(char asmLine[], a64inst_instruction *instr) { if (instr == NULL){ exit(EXIT_FAILURE); } char *asmLineCopy = duplicateString(asmLine); int tokensCount = 0; char **tokens = tokenise(asmLineCopy, &tokensCount); char *opcode = tokens[0]; // Check if the instruction is the halt instruction, "and x0, x0, x0". if (tokensCount == 4 && strcmp(opcode, "and") == 0 && getRegister(tokens[1]) == 0 && getRegister(tokens[2]) == 0 && getRegister(tokens[3]) == 0) { instr->type = a64inst_HALT; return; } if(strcmp(opcode, ".int") == 0){ // Directive instr->type = a64inst_DIRECTIVE; parseDirective(instr, tokens); } else if(opcode[strlen(opcode)-1]== ':') { // Label instr->type = a64inst_LABEL; opcode[strlen(opcode) - 1] = '\0'; // Remove the colon instr->data.LabelData.label = opcode; } else { // Instruction // Classify the opcode into the correct instruction type. classifyOpcode(opcode, instr, tokens, &tokensCount); switch(instr->type){ case a64inst_BRANCH: parseBranch(instr, opcode, tokens); break; case a64inst_SINGLETRANSFER: parseSingleTransfer(instr, opcode, tokens, tokensCount); parseAddressingMode(instr, tokens, tokensCount); break; case a64inst_LOADLITERAL: parseSingleTransfer(instr, opcode, tokens, tokensCount); break; case a64inst_DPREGISTER: //generate DP operands; parseDPRegister(instr, tokens, tokensCount); break; case a64inst_DPIMMEDIATE: parseDPImmediate(instr, tokens, tokensCount); break; default: printf("Error: Invalid Instruction, '%s'\n", opcode); break; } } } static void parseDirective(a64inst_instruction *instr, char *tokens[]) { char *intValue = tokens[1]; char *endptr; if(strncmp(intValue, "0x", 2) == 0) { intValue += 2; instr->data.DirectiveData.value = strtol(intValue, &endptr, 16); } else { instr->data.DirectiveData.value = strtol(tokens[1], &endptr, 10); } } static void parseSingleTransfer(a64inst_instruction *instr, char *opcode, char *tokens[], int tokensCount) { switch(instr->type){ case a64inst_SINGLETRANSFER: instr->data.SingleTransferData.regType = getRegisterType(tokens[1]); instr->data.SingleTransferData.target = getRegister(tokens[1]); break; case a64inst_LOADLITERAL: instr->data.SingleTransferData.regType = getRegisterType(tokens[1]); instr->data.SingleTransferData.target = getRegister(tokens[1]); if(*tokens[2] =='#'){ //offset is immediate instr->data.SingleTransferData.processOpData.loadLiteralData.offset = getImmediate(tokens[2]);; } else { //offset is label instr->data.SingleTransferData.processOpData.loadLiteralData.label = tokens[2]; } break; default: break; } } void parseBranch(a64inst_instruction *instr, char* opcode, char *operandList[]) { switch(instr->data.BranchData.BranchType){ case a64inst_UNCONDITIONAL: //define and sign extend immediate offset //use symbol table printf("unconditional"); instr->data.BranchData.processOpData.unconditionalData.label = operandList[1]; break; case a64inst_REGISTER: instr->data.BranchData.processOpData.registerData.src = getRegister(operandList[1]); break; case a64inst_CONDITIONAL: { char condition[strlen(opcode)+1]; strcpy(condition, opcode+2); if(strcmp(condition, "eq")==0){ instr->data.BranchData.processOpData.conditionalData.cond = EQ; } else if (strcmp(condition, "ne")==0){ instr->data.BranchData.processOpData.conditionalData.cond = NE; } else if (strcmp(condition, "ge")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GE; } else if (strcmp(condition, "lt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LT; } else if (strcmp(condition, "gt")==0){ instr->data.BranchData.processOpData.conditionalData.cond = GT; } else if (strcmp(condition, "le")==0){ instr->data.BranchData.processOpData.conditionalData.cond = LE; } else if (strcmp(condition, "al")==0){ instr->data.BranchData.processOpData.conditionalData.cond = AL; } instr->data.BranchData.processOpData.unconditionalData.label = operandList[1]; break; } } } void parseDPImmediate(a64inst_instruction *inst, char *tokens[], int tokensCount) { a64inst_DPImmediateData *data = &inst->data.DPImmediateData; data->dest = getRegister(tokens[1]); data->regType = getRegisterType(tokens[1]); if (containsString(tokens[0], WIDE_MOV_OPCODES, 4)) { data->DPIOpType = a64inst_DPI_WIDEMOV; data->processOp = lastIndexOfString(tokens[0], WIDE_MOV_OPCODES, 4); data->processOpData.wideMovData.immediate = getImmediate(tokens[2]); if (tokensCount >= 4) { ShiftData shData = *parseShift(tokens[3]); data->processOpData.wideMovData.shiftScalar = shData.immediate; } } else { data->DPIOpType = a64inst_DPI_ARITHM; data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4); data->processOpData.arithmData.src = getRegister(tokens[2]); data->processOpData.arithmData.immediate = getImmediate(tokens[3]); if (tokensCount >= 5) { ShiftData shData = *parseShift(tokens[4]); if (shData.immediate > 0) { data->processOpData.arithmData.shiftImmediate = true; } } } } void parseDPRegister(a64inst_instruction *inst, char *tokens[], int tokensCount) { a64inst_DPRegisterData *data = &inst->data.DPRegisterData; data->dest = getRegister(tokens[1]); data->regType = getRegisterType(tokens[1]); data->src1 = getRegister(tokens[2]); data->src2 = getRegister(tokens[3]); if (containsString(tokens[0], MULTIPLY_OPCODES, 4)) { // Multiply data->DPROpType = a64inst_DPR_MULTIPLY; if (tokensCount >= 5) { data->processOpData.multiplydata.summand = getRegister(tokens[4]); data->processOpData.multiplydata.negProd = strcmp(tokens[0], "msub") == 0; } else { data->processOpData.multiplydata.summand = ZERO_REGISTER; data->processOpData.multiplydata.negProd = strcmp(tokens[0], "mneg") == 0; } } else { // Arithmetic/Logic data->DPROpType = a64inst_DPR_ARITHMLOGIC; if (containsString(tokens[0], ARITHMETIC_OPCODES, 4)) { // Arithmetic data->processOp = lastIndexOfString(tokens[0], ARITHMETIC_OPCODES, 4); data->processOpData.arithmLogicData.type = 1; if(tokensCount == 5) { //has a shift int numTokens = 0; char **shiftOperands = tokenise(tokens[4], &numTokens); data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]); } } else { // Logic int opcodeCategory = lastIndexOfString(tokens[0], LOGIC_OPCODES, 8); switch(opcodeCategory/2){ case 0: //and if((tokens[0][strlen(tokens[0])-1]) == 's'){ data->processOp = 3; } else { data->processOp = 0; } data->processOpData.arithmLogicData.negShiftedSrc2 = 0; break; case 1: //negated AND if((tokens[0][strlen(tokens[0])-1]) == 's'){ data->processOp = 3; } else { data->processOp = 0; } data->processOpData.arithmLogicData.negShiftedSrc2 = 1; break; case 2: //XOR data->processOp = 2; if(opcodeCategory==4){ data->processOpData.arithmLogicData.negShiftedSrc2 = 0; } else { data->processOpData.arithmLogicData.negShiftedSrc2 = 1; } break; case 3: //OR data->processOp = 1; if(opcodeCategory==6){ data->processOpData.arithmLogicData.negShiftedSrc2 = 0; } else { data->processOpData.arithmLogicData.negShiftedSrc2 = 1; } break; } if(tokensCount == 5) { //has a shift int numTokens = 0; char **shiftOperands = tokenise(tokens[4], &numTokens); data->processOpData.arithmLogicData.shiftType = lastIndexOfString(shiftOperands[0], SHIFT_TYPE_OPCODES, 4); data->processOpData.arithmLogicData.shiftAmount = getImmediate(shiftOperands[1]); } } } } /** Classifies the given opcode into the correct instruction type. * Modifies instr to reflect the classification. */ static void classifyOpcode(char* opcode, a64inst_instruction *instr, char *tokens[], int *tokensCount) { // First, if the opcode is an alias, convert it to the target instruction. translateAlias(opcode, tokens, tokensCount); if (containsString(opcode, BRANCH_OPCODES, 9)) { instr->type = a64inst_BRANCH; if (strcmp(opcode, "br") == 0) { instr->data.BranchData.BranchType = a64inst_REGISTER; } else if (strcmp(opcode, "b") == 0) { instr->data.BranchData.BranchType = a64inst_UNCONDITIONAL; } else { instr->data.BranchData.BranchType = a64inst_CONDITIONAL; } } else if (containsString(opcode, SINGLE_TRANSFER_OPCODES, 2)) { instr->type = a64inst_SINGLETRANSFER; if (*tokens[2] == '[') { instr->data.SingleTransferData.SingleTransferOpType = a64inst_SINGLE_TRANSFER_SINGLE_DATA_TRANSFER; instr->data.SingleTransferData.processOpData.singleDataTransferData.transferType = strcmp(opcode, "ldr") == 0; } else { instr->type = a64inst_LOADLITERAL; } // DP Instruction. // DP Register if the third operand is a register. } else if (*tokensCount >= 4 && isRegister(tokens[3])) { instr->type = a64inst_DPREGISTER; } else { instr->type = a64inst_DPIMMEDIATE; } } /** Parses a shift string into a ShiftData struct. */ static ShiftData *parseShift(char *shift) { char buffer[20]; strcpy(buffer, shift); char *shiftType = strtok(buffer, " "); char *shiftAmount = strtok(NULL, " "); ShiftData *data = malloc(sizeof(ShiftData)); data->type = lastIndexOfString(shiftType, SHIFT_TYPE_OPCODES, 4); SKIP_WHITESPACE(shiftAmount); data->immediate = getImmediate(shiftAmount); return data; } /** Parses the addressing mode of a single transfer instruction. (Not load literal) */ static void parseAddressingMode(a64inst_instruction *instr, char *tokens[], int tokenCount) { assert(*tokens[2] == '['); int operandCount = 0; char *unsplitString = duplicateString(tokens[2]); char **operands = tokeniseOperands(tokens[2], &operandCount); int baseRegister = getRegister(operands[0]); instr->data.SingleTransferData.processOpData.singleDataTransferData.base = baseRegister; if (tokenCount >= 4) { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_POST_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(tokens[3]); } else if(unsplitString[strlen(unsplitString)-1] == '!') { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_PRE_INDEXED; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.indexedOffset = getImmediate(operands[1]); } else if (operandCount == 1 || (!isRegister(operands[1]))) { instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_UNSIGNED_OFFSET; if(operandCount > 1){ int offset = getImmediate(operands[1]); instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.unsignedOffset = offset/8; } } else { if((isRegister(operands[0]) == 1) && (isRegister(operands[1]) == 1)){ instr->data.SingleTransferData.processOpData.singleDataTransferData.addressingMode = a64inst_REGISTER_OFFSET; instr->data.SingleTransferData.processOpData.singleDataTransferData.a64inst_addressingModeData.offsetReg = getRegister(operands[1]); } } }