From eb4fbe2ebdd44dbf9c604553f2ef053d0360dd41 Mon Sep 17 00:00:00 2001 From: Dominic Matarese Date: Mon, 12 Jul 2021 16:21:43 +0000 Subject: required first commit --- main.c | 842 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 842 insertions(+) create mode 100644 main.c (limited to 'main.c') diff --git a/main.c b/main.c new file mode 100644 index 0000000..172a353 --- /dev/null +++ b/main.c @@ -0,0 +1,842 @@ +#include +#include +#include +#include +//#include + +#define TABLESIZE 200 + +#define DIRECTIVE 1 +#define OPCODE 2 +#define OPERAND 3 +#define REGISTER 4 +#define SYMBOL 5 + +typedef struct { //definition of opcode which includes mnumonic and the opcode + char* instr; + int opcode; + int type; //defines whether it is a directive =1, opcode=2, or operand=3 +} opcode; + +typedef struct { + char* name; + int address; + int opcode; +} symbol; + +opcode* hashtable[TABLESIZE]; //array of pointers for the hashtable of opcodes + +symbol* symbolTable[TABLESIZE]; //array of pointers for the hashtable of symbols + + +unsigned int hash(char*); +void insert(opcode *opcode); +bool isValid(char*); +int getType(char*); +void insertSymbol(symbol *Symbol); +opcode* getOpcode(char*); +int getAddress(char* token); +int hex2dec(int); +int power(int, int); + +//hash function +unsigned int hash(char* opcode) { + int length = strlen(opcode); + unsigned int hash = 0; + for (int i = 0; i < length; i++) { //go through the char array and add up the ASCII codes + hash += opcode[i]; + hash *= opcode[i]; //multiply by ASCII codes + hash = hash % TABLESIZE; //keeps the hash value under the tablesize + } + + return hash; + +} + +//insert function for the hash table +void insert(opcode *opcode) { + if (opcode == NULL) { //do nothing if nothing there + return; + } + else { + int i = hash(opcode->instr); + //the following is an implementation of linear probing to deal with collisions in the hash table + for (int j = 0; j < TABLESIZE; j++) { + int try = (j + i) % TABLESIZE; + if (hashtable[try] == NULL) { + hashtable[try] = opcode; //just insert if no collision + return; + } + //if not null, increment by one until null, then insert + } + return; + } +} + +//determines if a token is valid by checking if it exists in the opcode hash table +bool isValid(char* token) { + int i = hash(token); + for (int j = 0; j < TABLESIZE; j++) { + int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic + if (hashtable[try] == NULL) { //automatically return false if there is nothing there + return false; + // + } + if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) { + return true; //confirm that it is valid + } + } + + return false; + +} + +//similar to the isValid function, but goes the extra mile to return the type of instruction the token is after validating it +//replaced the isValid function by returning 0 if it is invalid +int getType(char* token) { + int i = hash(token); + for (int j = 0; j < TABLESIZE; j++) { + int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic + if (hashtable[try] == NULL) { //automatically return false if there is nothing there + return 0; + // + } + if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) { + return hashtable[try]->type; //returns the type (see preprocessor definitions) + } + } + + return 0; +} + +//insert function for the symbol hash table +void insertSymbol(symbol *Symbol) { + char* sym = Symbol->name; + if (Symbol == NULL) { //do nothing if nothing there + return; + } + else { + int i = hash(sym); + //the following is an implementation of linear probing to deal with collisions in the hash table + for (int j = 0; j < TABLESIZE; j++) { + int try = (j + i) % TABLESIZE; + if (symbolTable[try] == NULL) { + //symbolTable[try]->address = address; //causes seg fault + //symbolTable[try]->name = symbol; //causes seg fault + //symbol inserting = {.name=sym, .address=address}; + //printf("\nInserting %s %d the hash was %d and the try is %d\n", Symbol->name, Symbol->address, hash(sym), try); + symbolTable[try] = Symbol; //just insert if no collision + //printf("\n%d", symbolTable[try]->address); + return; + } + //if not null, increment by one until null, then insert + } + return; + } +} + +opcode* getOpcode(char* token) { + int i = hash(token); + for (int j = 0; j < TABLESIZE; j++) { + int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic + if (hashtable[try] == NULL) { //automatically return false if there is nothing there + return NULL; + //this indicates the instruction is invalid + } + if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) { + //int op = hashtable[try]->opcode; + return hashtable[try]; //returns the opcode + } + } + + return 0; +} + +int getAddress(char* token) +{ + int i = hash(token); + //printf("\nhash of %s is %d", token, hash(token)); + int try = 0; + for (int j = 0; j < TABLESIZE; j++) { + try = ((i + j) % TABLESIZE); //for loop and this line is part of the collision handling logic + //printf("\n%d\n", try); + if (symbolTable[try] == NULL) { //automatically return false if there is nothing there + //printf("\ntried %d", try); + return 99999; + //this indicates the instruction is invalid + } + + if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) { + //int addr = symbolTable[try]->address; + return symbolTable[try]->address; //returns the address + } + } + return 99; + + /*//BRUTE FORCE + for(int try = 0; try < (TABLESIZE - 1); try++){ + if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) { + int addr = symbolTable[try]->address; + //printf("\nget %d", addr); + return addr; //returns the address + } + } + //still doesnt work..........................................................*/ + return 9999; +} + +//this function is needed because the integer after START is represented in hex and needs to be converted to decimal for the addressCounter int. +//the addressCounter int then gets displayed as hex at the end with %X +int hex2dec(int num){ + int dec = 0; + int remainder, ct = 0; + while (num > 0){ + remainder = num % 10; + dec += (remainder * power(16, ct)); + num /= 10; + ct++; + } + return dec; +} + +//I get an undefined reference to pow despite using math.h so I am defining pow here as power +int power(int x, int y){ + int z = x; + for(int i=1; i < y; i++){ + z *= x; + } + return z; +} + + + +int main(int argc, char* argv[]) { //argc = # of arguments. argv[1] is the assembly file to be opened + + //clear the memory for the hash table as soon as the program starts + for (int i = 0; i < TABLESIZE; i++) { + hashtable[i] = NULL; + symbolTable[i] = NULL; + } + + //check for incorrect command usage + if (argc != 2) { + printf("USAGE: %s \n", argv[0]); + return 1; + } + + FILE* inputFile; + FILE* outputFile; + inputFile = fopen(argv[1], "r"); //"r" parameter is for read-only + + + outputFile = fopen("pass1.txt", "w"); + //check if file is valid + if (!inputFile) { + printf("ASSEMBLY ERROR:\n\n%s could not be opened for reading.", argv[1]); + return 1; + + } + + if(true){ //this exists so I can easily collapse this section in the editor + //================================================================================== + //TABLE OF OPCODES (appropriately hardcoded) + //block selection mode and find and replace are my best friends + //I tried to have a separate function for this, but it did not work out. It must be in main() + + //Directives + opcode START = {.instr="START", .type=1}; + opcode END = {.instr="END", .type=1}; + opcode BYTE = {.instr="BYTE", .type=1}; + opcode WORD = {.instr="WORD", .type=1}; + opcode RESB = {.instr="RESB", .type=1}; + opcode RESW = {.instr="RESW", .type=1}; + opcode RESR = {.instr="RESR", .type=1}; + opcode EXPORTS = {.instr="EXPORTS", .type=1}; + //insert into hash table + insert(&START); + insert(&END); + insert(&BYTE); + insert(&WORD); + insert(&RESB); + insert(&RESW); + insert(&RESR); + insert(&EXPORTS); + + + //Opcodes + opcode ADD = {.instr="ADD", .opcode=0x18, .type=2}; + opcode ADDF = {.instr="ADDF", .opcode=0x58, .type=2}; + opcode ADDR = {.instr="ADDR", .opcode=0x90, .type=2}; + opcode AND = {.instr="AND", .opcode=0x40, .type=2}; + opcode CLEAR = {.instr="CLEAR", .opcode=0xB4, .type=2}; + opcode COMP = {.instr="COMP", .opcode=0x28, .type=2}; + opcode COMPF = {.instr="COMPF", .opcode=0x88, .type=2}; + opcode COMPR = {.instr="COMPR", .opcode=0xA0, .type=2}; + opcode DIV = {.instr="DIV", .opcode=0x24, .type=2}; + opcode DIVF = {.instr="DIVF", .opcode=0x64, .type=2}; + opcode DIVR = {.instr="DIVR", .opcode=0x9C, .type=2}; + opcode FIX = {.instr="FIX", .opcode=0xC4, .type=2}; + opcode FLOAT = {.instr="FLOAT", .opcode=0xC0, .type=2}; + opcode HIO = {.instr="HIO", .opcode=0xF4, .type=2}; + opcode J = {.instr="J", .opcode=0x3C, .type=2}; + opcode JEQ = {.instr="JEQ", .opcode=0x30, .type=2}; + opcode JGT = {.instr="JGT", .opcode=0x34, .type=2}; + opcode JLT = {.instr="JLT", .opcode=0x38, .type=2}; + opcode JSUB = {.instr="JSUB", .opcode=0x48, .type=2}; + opcode LDA = {.instr="LDA", .opcode=0x00, .type=2}; + opcode LDB = {.instr="LDB", .opcode=0x68, .type=2}; + opcode LDCH = {.instr="LDCH", .opcode=0x50, .type=2}; + opcode LDF = {.instr="LDF", .opcode=0x70, .type=2}; + opcode LDL = {.instr="LDL", .opcode=0x08, .type=2}; + opcode LDS = {.instr="LDS", .opcode=0x6C, .type=2}; + opcode LDT = {.instr="LDT", .opcode=0x74, .type=2}; + opcode LDX = {.instr="LDX", .opcode=0x04, .type=2}; + opcode LPS = {.instr="LPS", .opcode=0xD0, .type=2}; + opcode MUL = {.instr="MUL", .opcode=0x20, .type=2}; + opcode MULF = {.instr="MULF", .opcode=0x60, .type=2}; + opcode MULR = {.instr="MULR", .opcode=0x98, .type=2}; + opcode NORM = {.instr="NORM", .opcode=0xC8, .type=2}; + opcode OR = {.instr="OR", .opcode=0x44, .type=2}; + opcode RD = {.instr="RD", .opcode=0xD8, .type=2}; + opcode RMO = {.instr="RMO", .opcode=0xAC, .type=2}; + opcode RSUB = {.instr="RSUB", .opcode=0x4C, .type=2}; + opcode SHIFTL= {.instr="SHIFTL", .opcode=0xA4, .type=2}; + opcode SHIFTR= {.instr="SHIFTR", .opcode=0xA8, .type=2}; + opcode SIO = {.instr="SIO", .opcode=0xF0, .type=2}; + opcode SSK = {.instr="SSK", .opcode=0xEC, .type=2}; + opcode STA = {.instr="STA", .opcode=0x0C, .type=2}; + opcode STB = {.instr="STB", .opcode=0x78, .type=2}; + opcode STCH = {.instr="STCH", .opcode=0x54, .type=2}; + opcode STF = {.instr="STF", .opcode=0x80, .type=2}; + opcode STI = {.instr="STI", .opcode=0xD4, .type=2}; + opcode STL = {.instr="STL", .opcode=0x14, .type=2}; + opcode STS = {.instr="STS", .opcode=0x7C, .type=2}; + opcode STSW = {.instr="STSW", .opcode=0xE8, .type=2}; + opcode STT = {.instr="STT", .opcode=0x84, .type=2}; + opcode STX = {.instr="STX", .opcode=0x10, .type=2}; + opcode SUB = {.instr="SUB", .opcode=0x1C, .type=2}; + opcode SUBF = {.instr="SUBF", .opcode=0x5C, .type=2}; + opcode SUBR = {.instr="SUBR", .opcode=0x94, .type=2}; + opcode SVC = {.instr="SVC", .opcode=0xB0, .type=2}; + opcode TD = {.instr="TD", .opcode=0xE0, .type=2}; + opcode TIO = {.instr="TIO", .opcode=0xF8, .type=2}; + opcode TIX = {.instr="TIX", .opcode=0x2C, .type=2}; + opcode TIXR = {.instr="TIXR", .opcode=0xB8, .type=2}; + opcode WD = {.instr="WD", .opcode=0xDC, .type=2}; + //insert into hash table + insert(&ADD); + insert(&ADDF); + insert(&ADDR); + insert(&AND); + insert(&CLEAR); + insert(&COMP); + insert(&COMPF); + insert(&COMPR); + insert(&DIV); + insert(&DIVF); + insert(&DIVR); + insert(&FIX); + insert(&FLOAT); + insert(&HIO); + insert(&J); + insert(&JEQ); + insert(&JGT); + insert(&JLT); + insert(&JSUB); + insert(&LDA); + insert(&LDB); + insert(&LDCH); + insert(&LDF); + insert(&LDL); + insert(&LDS); + insert(&LDT); + insert(&LDX); + insert(&LPS); + insert(&MUL); + insert(&MULF); + insert(&MULR); + insert(&NORM); + insert(&OR); + insert(&RD); + insert(&RMO); + insert(&RSUB); + insert(&SHIFTL); + insert(&SHIFTR); + insert(&SIO); + insert(&SSK); + insert(&STA); + insert(&STB); + insert(&STCH); + insert(&STF); + insert(&STI); + insert(&STL); + insert(&STS); + insert(&STSW); + insert(&STT); + insert(&STX); + insert(&SUB); + insert(&SUBF); + insert(&SUBR); + insert(&SVC); + insert(&TD); + insert(&TIO); + insert(&TIX); + insert(&TIXR); + insert(&WD); + + //Registers + opcode A = {.instr="A", .opcode=0, .type=4}; + opcode X = {.instr="X", .opcode=0, .type=4}; + opcode L = {.instr="L", .opcode=0, .type=4}; + opcode PC = {.instr="PC", .opcode=0, .type=4}; + opcode SW = {.instr="SW", .opcode=0, .type=4}; + //insert into hash table + insert(&A); + insert(&X); + insert(&L); + insert(&PC); + insert(&SW); + }//================================================================================== + + //symbol array for temporarily storing symbols for insertion into the hash table + //symbol* symtable[TABLESIZE]; + + int lineCounter = 1; + int bytesUsed[TABLESIZE]; //keeping track of the space used per instruction/line. Useful for pass 2 + + //ADDRESS COUNTER + //Starts at 0, will be stored in decimal, then converted to hex + int addressCounter = 0; + int addressCounterArray[TABLESIZE]; //useful for pass 2 + //keep track of whether a start token has been encountered already + bool start = false; + //Token array for storing tokens + //also can serve to help verify that opcodes are followed by operands instead of other opcodes or directives + int n=0; + char* tokenA[4]; + + //start reading the file + char line[1024]; + while (fgets(line, 1024, inputFile)) { + //printf("READ: %s", line); + int length; + length = strlen(line); + if (length > 0) { //not a blank line + if (line[0] == 35) { //# + //printf("--was a comment\n"); + } + else if(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab + //printf("--symbol definition: %s", line); + char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters + while (token) { + tokenA[n] = token; + //printf("\t %d---->%s\n", n, tokenA[n]); + token = strtok(NULL, " \t\n"); + + n++; + + + /* //check if the token is valid + if (getType(token) == 0) { + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token: %s", token); + printf("Aborting..."); + printf("ASSEMBLY ABORTED DUE TO INVALID TOKEN"); + return 1; + } + if (strncmp(token, "END", TABLESIZE) == 0) { //EXITING ON END DIRECTIVE WILL CAUSE 2 TESTS TO FAIL + printf("END token found..."); + //return 0; + }*/ + + } + + //BEGIN ANALYSING THE LINE AND APPLYING THE LOGIC + if (getType(tokenA[0]) == DIRECTIVE) { + if (getType(tokenA[1]) == DIRECTIVE) { + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter); + return 1; + } + if (strncmp(tokenA[0], "START", TABLESIZE) == 0){ + if(start){ + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter); + return 1; + } + start = true; + int startnum = atoi(tokenA[1]); //if casting fails here then I let the assembly fail + if (addressCounter == 0) {addressCounter = hex2dec(startnum);} + else{ + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter); + } + } + if (strncmp(tokenA[0], "BYTE", TABLESIZE) == 0){ + //do whatever needs to be done + if (tokenA[1][0] == 'X'){ + int bytenum = (strlen(tokenA[1]) - 3); //length of string minus the X and quotes + addressCounter += (bytenum / 2); + bytesUsed[lineCounter] = bytenum / 2; + } + else if (tokenA[1][0] == 'C'){ + addressCounter += (strlen(tokenA[1]) - 3); //length of string minus the C and quotes + bytesUsed[lineCounter] = (strlen(tokenA[1]) - 3); + } + else { + printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter); + addressCounter += strlen(tokenA[1]); bytesUsed[lineCounter] = strlen(tokenA[1]);} + } + if (strncmp(tokenA[0], "WORD", TABLESIZE) == 0){ + //do whatever needs to be done + addressCounter += 3; + bytesUsed[lineCounter] = 3; + } + if (strncmp(tokenA[0], "RESB", TABLESIZE) == 0){ + //do whatever needs to be done + addressCounter += atoi(tokenA[1]); + bytesUsed[lineCounter] = atoi(tokenA[1]); + } + if (strncmp(tokenA[0], "RESW", TABLESIZE) == 0){ + //do whatever needs to be done + addressCounter += (3 * atoi(tokenA[1])); + bytesUsed[lineCounter] = (3 * atoi(tokenA[1])); + } + else {addressCounter += 3; bytesUsed[lineCounter] = 3;} + } + else if (getType(tokenA[0]) == OPCODE) { + if (getType(tokenA[1]) == OPCODE) { + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter); + return 1; + } + if (getType(tokenA[1]) == DIRECTIVE) { + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter); + return 1; + } + else {addressCounter += 3; bytesUsed[lineCounter] = 3;} + } + //else if (strncmp(tokenA[0], "RSUB", TABLESIZE) == 0) addressCounter += 3; //RSUB wasn't being detected as opcode for some reason even though getType returns 2, so I hardcoded this... <- should be fixed, but the hardcode remains just in case + else if (tokenA[0] == NULL) return 2; + + + //SYMBOL + else { //this means the first token is a symbol + //printf("Symbol encountered!\n"); + if (strlen(tokenA[0]) > 6){ + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols cannot be longer than six characters! \n", line, lineCounter); + return 1; + } + if ((tokenA[0][0] <= 65) || (tokenA[0][0] > 91)){ //im hoping that tokenA[0][0] means the first character of the first token + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols must start with an alpha character! \n ", line, lineCounter); + return 1; + } + + //record the symbol name and the current value of addressCounter (as hex) into new file + //if token is START, update address counter before recording + + if (strncmp(tokenA[1], "START", TABLESIZE) == 0){ + if(start){ + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter); + return 1; + } + start = true; + int startnum = atoi(tokenA[2]); //if casting fails here then I let the assembly fail + if (addressCounter == 0) {addressCounter = hex2dec(startnum);} + else{ + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter); + return 1; + } + } + + //printf("Recording token %s and address %X and writing to output file...\n\n", tokenA[0], addressCounter); + fprintf(outputFile, "%s %X\n", tokenA[0], addressCounter); + + + //insertSymbol(tokenA[0], addressCounter); + symbol inserttt = {.name=tokenA[0], .address=addressCounter}; + + + insertSymbol(&inserttt); + + + + //copy directive and opcode logic here, add 1 to all array positions + if (getType(tokenA[1]) == DIRECTIVE) { + if (getType(tokenA[2]) == DIRECTIVE) { + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter); + return 1; + } + + if (strncmp(tokenA[1], "BYTE", TABLESIZE) == 0){ + //do whatever needs to be done + if (tokenA[2][0] == 'X'){ + //printf("\nByte operand recognized."); + int bytenum = (strlen(tokenA[2]) - 3); //length of string minus the X and quotes + //printf("\nIncreasing counter by %d\n", (bytenum / 2)); + addressCounter += (bytenum / 2); + bytesUsed[lineCounter] = (bytenum / 2); + } + else if (tokenA[2][0] == 'C'){ + + //printf("\nCharacter operand recognized.\nIncreasing counter by %lu\n", strlen(tokenA[2] - 3)); + addressCounter += (strlen(tokenA[2]) - 3); //length of string minus the C and quotes + //printf("addressCounter is %X at this point\n", addressCounter); + bytesUsed[lineCounter] = (strlen(tokenA[2]) - 3); + } + else {printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter); addressCounter += strlen(tokenA[2]); bytesUsed[lineCounter] = strlen(tokenA[2]);} + } + if (strncmp(tokenA[1], "WORD", TABLESIZE) == 0){ + //do whatever needs to be done + addressCounter += 3; + bytesUsed[lineCounter] = 3; + } + if (strncmp(tokenA[1], "RESB", TABLESIZE) == 0){ + //do whatever needs to be done + addressCounter += atoi(tokenA[2]); + bytesUsed[lineCounter] = atoi(tokenA[2]); + } + if (strncmp(tokenA[1], "RESW", TABLESIZE) == 0){ + //do whatever needs to be done + addressCounter += (3 * atoi(tokenA[2])); + bytesUsed[lineCounter] = (3 * atoi(tokenA[2])); + } + else { + if(strncmp(tokenA[1], "START", TABLESIZE) != 0) {addressCounter += 0; bytesUsed[lineCounter] = 0;} + //addressCounter += 3; + }//increase by 3 only if token is not START because that was taken care of earlier + } + else if (getType(tokenA[1]) == OPCODE) { + if (getType(tokenA[2]) == OPCODE) { + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter); + return 1; + } + if (getType(tokenA[2]) == DIRECTIVE) { + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter); + return 1; + } + else {addressCounter += 3; bytesUsed[lineCounter] = 3;} + } + else if (tokenA[1] == NULL){ + //printf("tokenA[1] is null"); + //do nothing + } + else{ + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token or duplicate symbol! ", line, lineCounter); + //printf("Developers note: tokenA[0] = %sToken[1] = %sToken[2] = %s",tokenA[0], tokenA[1],tokenA[2]); + return 1; + } + + //printf("addressCounter is %X at this point #2\n", addressCounter); + + } + + n = 0; //reset token array counter + addressCounterArray[lineCounter] = addressCounter; + lineCounter++; + + //causes seg fault + tokenA[0] = "empty"; + tokenA[1] = "empty"; + tokenA[2] = "empty"; + tokenA[3] = "empty"; + + + } + else{ + printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid line!", line, lineCounter); + return 1; + } + + + } + + + + + } + fclose(outputFile); + fclose(inputFile); + + /* ========================================================================== + * =================================PASS 2=================================== + * ========================================================================== + */ + + + FILE* objectFile; + char pass2file[0x100]; + snprintf(pass2file, sizeof(pass2file), "%s.obj", argv[1]); + objectFile = fopen(pass2file, "w"); + + + FILE* pass2; + pass2 = fopen(argv[1], "r"); + + char* tokenB[4] = {"empty2", "empty2", "empty2", "empty2"}; + n = 0; + int lastLine = lineCounter; //record the number of lines in the file + lineCounter = 1; + //start reading the file + //char line2[1024]; + while (fgets(line, 1024, pass2)) { + + int length; + length = strlen(line); + if (length > 0) { //not a blank line + if (line[0] == 35) { //# + //do nothing + } + else if(true){ //(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab + + char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters + while (token) { + //char* tokenB; + tokenB[n] = token; + //printf("\t %d---->%s\n", n, tokenB[n]); + token = strtok(NULL, " \t\n"); + + n++; + + } + + //printf("\n %s %s %s\n", tokenB[0], tokenB[1], tokenB[2]); + + if(lineCounter == 1){ //print the header record + fprintf(objectFile, "H"); //file must begin with header record + int tokenlen = strlen(tokenB[0]); + if (tokenlen > 6){ + printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Symbol name too long!", line, lineCounter); + return 2; + } + fprintf(objectFile, "%s", tokenB[0]); + for(int l = 0; l < (6 - tokenlen); l++){ + fprintf(objectFile, " "); + } + fprintf(objectFile, "00"); //printing 2 zeros before starting address + fprintf(objectFile, "%X%X\n", addressCounterArray[1], (addressCounterArray[(lastLine-1)] - addressCounterArray[1])); + //printf("\nlast line is %d\n", lastLine); + } + else if(lineCounter == lastLine-1){ + //print the E, the leading zeros, and the first memory address + fprintf(objectFile, "E00%X", addressCounterArray[1]); + + } + else if ((getType(tokenB[0]) == OPCODE) || ((getType(tokenB[0]) == DIRECTIVE) && (strncmp(tokenB[0], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[0], "RESW", TABLESIZE) != 0))){ + //if RESB or RESW is encountered, do nothing + fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s) + fprintf(objectFile, "%X", addressCounterArray[lineCounter]); + fprintf(objectFile, "%02X", bytesUsed[lineCounter]); + if(strncmp(tokenB[0], "RSUB", TABLESIZE) == 0){ + //no address recorded for RSUB + fprintf(objectFile, "%X0000\n", getOpcode(tokenB[0])->opcode); + } + else if (getType(tokenB[0]) == DIRECTIVE){ + + if (strncmp(tokenB[0], "BYTE", TABLESIZE) == 0){ + //do whatever needs to be done + if (tokenB[1][0] == 'X'){ + bool notEnd = true; + for(int p = 2; notEnd; p++){ + if(tokenB[1][p] == '\''){notEnd = false;} + if(tokenB[1][p] == '\''){break;} + //I know this is an amateur move but I'm doing it anyway because I don't have time to mess around with regular expression formatting in c + if((tokenB[1][p] != '1') || (tokenB[1][p] != '2') || (tokenB[1][p] != '3') || (tokenB[1][p] != '4') || (tokenB[1][p] != '5') || (tokenB[1][p] != '6') || (tokenB[1][p] != '7') || (tokenB[1][p] != '8') || (tokenB[1][p] != '9') || (tokenB[1][p] != 'A') || (tokenB[1][p] != 'B') || (tokenB[1][p] != 'C') || (tokenB[1][p] != 'D') || (tokenB[1][p] != 'E') || (tokenB[1][p] != 'F')){ + printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Invalid byte constant!", line, lineCounter); + } + fprintf(objectFile, "%c", tokenB[1][p]); + } + } + else if (tokenB[1][0] == 'C'){ + bool notEnd = true; + for(int p = 2; notEnd; p++){ + if(tokenB[1][p] == '\''){notEnd = false;} + fprintf(objectFile, "%d", tokenB[1][p]); + } + } + else {printf("possible error");} + } + if (strncmp(tokenB[0], "WORD", TABLESIZE) == 0){ + //print 6-strlen zeros, then print the hex version of the int + for(int a = 0; a < (6 - strlen(tokenB[0])); a++){ + fprintf(objectFile, "0"); + } + fprintf(objectFile, "%X", atoi(tokenB[0])); + } + + fprintf(objectFile, "\n"); //print a newline after all is done + } + else{ + fprintf(objectFile, "%X%X\n", getOpcode(tokenB[0])->opcode, getAddress(tokenB[1])); + + } + } + + else{ //this means the first token is a symbol + //copy the above opcode and directive logic here, add 1 to all tokenB positions + + if ((getType(tokenB[1]) == OPCODE) || ((getType(tokenB[1]) == DIRECTIVE) && (strncmp(tokenB[1], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[1], "RESW", TABLESIZE) != 0))){ + //if RESB or RESW is encountered, do nothing + fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s) + fprintf(objectFile, "%X", addressCounterArray[lineCounter]); + fprintf(objectFile, "%02X", bytesUsed[lineCounter]); + if(strncmp(tokenB[1], "RSUB", TABLESIZE) == 0){ + //no address recorded for RSUB + fprintf(objectFile, "%X0000\n", getOpcode(tokenB[1])->opcode); + } + else if (getType(tokenB[1]) == DIRECTIVE){ + + if (strncmp(tokenB[1], "BYTE", TABLESIZE) == 0){ + //do whatever needs to be done + if (tokenB[2][0] == 'X'){ + bool notEnd = true; + for(int p = 2; notEnd; p++){ + if(tokenB[2][p] == '\''){notEnd = false;} + if(tokenB[2][p] == '\''){break;} + fprintf(objectFile, "%c", tokenB[2][p]); + } + } + else if (tokenB[2][0] == 'C'){ + bool notEnd = true; + for(int p = 2; notEnd; p++){ + if(tokenB[2][p] == '\''){notEnd = false;} + fprintf(objectFile, "%d", tokenB[1][p]); + } + } + else {printf("possible error");} + } + if (strncmp(tokenB[1], "WORD", TABLESIZE) == 0){ + //print 6-strlen zeros, then print the hex version of the int + for(int a = 0; a < (6 - strlen(tokenB[1])); a++){ + fprintf(objectFile, "0"); + } + fprintf(objectFile, "%X", atoi(tokenB[1])); + } + + fprintf(objectFile, "\n"); //print a newline after all is done + } + else{ + fprintf(objectFile, "%X%X\n", getOpcode(tokenB[1])->opcode, getAddress(tokenB[2])); + //printf("\n%d\n", getAddress(tokenB[2])); + } + } + + } + + lineCounter++; + //n = 0; //reset token array counter + } + + } + + //lineCounter++; + n = 0; //reset token array counter + } + + + + + + + fclose(objectFile); + fclose(pass2); + + //for(int test = 0; test < 200; test++){printf("\n%d", symbolTable[test]->address);} + + return 0; +} -- cgit v1.2.1