#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
//#include <math.h>

#define TABLESIZE 200

#define DIRECTIVE 1
#define OPCODE 2
#define OPERAND 3
#define REGISTER 4
#define SYMBOL 5

typedef struct { //definition of opcode which includes mnumonic and the opcode
    char* instr;
    int opcode;
    int type; //defines whether it is a directive =1, opcode=2, or operand=3
} opcode;

typedef struct {
    char* name;
    int address;
    int opcode;
} symbol;

opcode* hashtable[TABLESIZE]; //array of pointers for the hashtable of opcodes

symbol* symbolTable[TABLESIZE]; //array of pointers for the hashtable of symbols


unsigned int hash(char*);
void insert(opcode *opcode);
bool isValid(char*);
int getType(char*);
void insertSymbol(symbol *Symbol);
opcode* getOpcode(char*);
int getAddress(char* token);
int hex2dec(int);
int power(int, int);

//hash function
unsigned int hash(char* opcode) {
    int length = strlen(opcode);
    unsigned int hash = 0;
    for (int i = 0; i < length; i++) { //go through the char array and add up the ASCII codes
        hash += opcode[i];
        hash *= opcode[i]; //multiply by ASCII codes
        hash = hash % TABLESIZE; //keeps the hash value under the tablesize
    }

    return hash;
    
}

//insert function for the hash table
void insert(opcode *opcode) {
    if (opcode == NULL) { //do nothing if nothing there
        return;
    }
    else {
        int i = hash(opcode->instr);
        //the following is an implementation of linear probing to deal with collisions in the hash table
        for (int j = 0; j < TABLESIZE; j++) {
            int try = (j + i) % TABLESIZE;
            if (hashtable[try] == NULL) {
                hashtable[try] = opcode; //just insert if no collision
                return;
            }
            //if not null, increment by one until null, then insert
        }
        return;
    }
}

//determines if a token is valid by checking if it exists in the opcode hash table
bool isValid(char* token) {
    int i = hash(token);
    for (int j = 0; j < TABLESIZE; j++) {
        int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
        if (hashtable[try] == NULL) { //automatically return false if there is nothing there
            return false;
            //
        }
        if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
            return true; //confirm that it is valid 
        }
    }
    
    return false;

}

//similar to the isValid function, but goes the extra mile to return the type of instruction the token is after validating it
//replaced the isValid function by returning 0 if it is invalid
int getType(char* token) {
    int i = hash(token);
    for (int j = 0; j < TABLESIZE; j++) {
        int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
        if (hashtable[try] == NULL) { //automatically return false if there is nothing there
            return 0;
            //
        }
        if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
            return hashtable[try]->type; //returns the type (see preprocessor definitions)
        }
    }
    
    return 0;
}

//insert function for the symbol hash table
void insertSymbol(symbol *Symbol) {
    char* sym = Symbol->name;
    if (Symbol == NULL) { //do nothing if nothing there
        return;
    }
    else {
        int i = hash(sym);
        //the following is an implementation of linear probing to deal with collisions in the hash table
        for (int j = 0; j < TABLESIZE; j++) {
            int try = (j + i) % TABLESIZE;
            if (symbolTable[try] == NULL) {
                //symbolTable[try]->address = address; //causes seg fault
                //symbolTable[try]->name = symbol; //causes seg fault
                //symbol inserting = {.name=sym, .address=address};
                //printf("\nInserting %s %d the hash was %d and the try is %d\n", Symbol->name, Symbol->address, hash(sym), try);
                symbolTable[try] = Symbol; //just insert if no collision
                //printf("\n%d", symbolTable[try]->address);
                return;
            }
            //if not null, increment by one until null, then insert
        }
        return;
    }
}

opcode* getOpcode(char* token) {
    int i = hash(token);
    for (int j = 0; j < TABLESIZE; j++) {
        int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
        if (hashtable[try] == NULL) { //automatically return false if there is nothing there
            return NULL;
            //this indicates the instruction is invalid
        }
        if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
            //int op = hashtable[try]->opcode;
            return hashtable[try]; //returns the opcode
        }
    }
    
    return 0;
}

int getAddress(char* token)
{
    int i = hash(token);
    //printf("\nhash of %s is %d", token, hash(token));
    int try = 0;
    for (int j = 0; j < TABLESIZE; j++) {
        try = ((i + j) % TABLESIZE); //for loop and this line is part of the collision handling logic
        //printf("\n%d\n", try);
        if (symbolTable[try] == NULL) { //automatically return false if there is nothing there
            //printf("\ntried %d", try);
            return 99999;
            //this indicates the instruction is invalid
        }
        
        if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) {
            //int addr = symbolTable[try]->address;
            return symbolTable[try]->address; //returns the address
        }
    }
    return 99;
    
    /*//BRUTE FORCE 
    for(int try = 0; try < (TABLESIZE - 1); try++){
        if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) {
            int addr = symbolTable[try]->address;
            //printf("\nget %d", addr);
            return addr; //returns the address
        }
    }
    //still doesnt work..........................................................*/
    return 9999; 
}

//this function is needed because the integer after START is represented in hex and needs to be converted to decimal for the addressCounter int.
//the addressCounter int then gets displayed as hex at the end with %X
int hex2dec(int num){
    int dec = 0;
    int remainder, ct = 0;
    while (num > 0){
        remainder = num % 10;
        dec += (remainder * power(16, ct));
        num /= 10;
        ct++;
    }
    return dec;
}

//I get an undefined reference to pow despite using math.h so I am defining pow here as power
int power(int x, int y){
    int z = x;
    for(int i=1; i < y; i++){
        z *= x;
    }
    return z;
}



int main(int argc, char* argv[]) { //argc = # of arguments. argv[1] is the assembly file to be opened

    //clear the memory for the hash table as soon as the program starts
    for (int i = 0; i < TABLESIZE; i++) {
        hashtable[i] = NULL;
        symbolTable[i] = NULL;
    }

    //check for incorrect command usage
    if (argc != 2) {
        printf("USAGE: %s <filename>\n", argv[0]);
        return 1;
    }

    FILE* inputFile;
    FILE* outputFile;
    inputFile = fopen(argv[1], "r"); //"r" parameter is for read-only
    
    
    outputFile = fopen("pass1.txt", "w");
    //check if file is valid
    if (!inputFile) {
        printf("ASSEMBLY ERROR:\n\n%s could not be opened for reading.", argv[1]);
        return 1;
        
    }
    
    if(true){ //this exists so I can easily collapse this section in the editor
    //==================================================================================
    //TABLE OF OPCODES (appropriately hardcoded)
    //block selection mode and find and replace are my best friends
    //I tried to have a separate function for this, but it did not work out. It must be in main()
    
    //Directives
    opcode START = {.instr="START",  .type=1};
    opcode END = {.instr="END", .type=1};
    opcode BYTE = {.instr="BYTE", .type=1};
    opcode WORD = {.instr="WORD", .type=1};
    opcode RESB = {.instr="RESB", .type=1};
    opcode RESW = {.instr="RESW", .type=1};
    opcode RESR = {.instr="RESR", .type=1};
    opcode EXPORTS = {.instr="EXPORTS", .type=1};
    //insert into hash table
    insert(&START);
    insert(&END);
    insert(&BYTE);
    insert(&WORD);
    insert(&RESB);
    insert(&RESW);
    insert(&RESR);
    insert(&EXPORTS);

    
    //Opcodes
    opcode ADD   = {.instr="ADD", .opcode=0x18, .type=2};
    opcode ADDF  = {.instr="ADDF", .opcode=0x58, .type=2};
    opcode ADDR  = {.instr="ADDR", .opcode=0x90, .type=2};
    opcode AND   = {.instr="AND", .opcode=0x40, .type=2};
    opcode CLEAR = {.instr="CLEAR", .opcode=0xB4, .type=2};
    opcode COMP  = {.instr="COMP", .opcode=0x28, .type=2};
    opcode COMPF = {.instr="COMPF", .opcode=0x88, .type=2};
    opcode COMPR = {.instr="COMPR", .opcode=0xA0, .type=2};
    opcode DIV   = {.instr="DIV", .opcode=0x24, .type=2};
    opcode DIVF  = {.instr="DIVF", .opcode=0x64, .type=2};
    opcode DIVR  = {.instr="DIVR", .opcode=0x9C, .type=2};
    opcode FIX   = {.instr="FIX", .opcode=0xC4, .type=2};
    opcode FLOAT = {.instr="FLOAT", .opcode=0xC0, .type=2};
    opcode HIO   = {.instr="HIO", .opcode=0xF4, .type=2};
    opcode J     = {.instr="J", .opcode=0x3C, .type=2};
    opcode JEQ   = {.instr="JEQ", .opcode=0x30, .type=2};
    opcode JGT   = {.instr="JGT", .opcode=0x34, .type=2};
    opcode JLT   = {.instr="JLT", .opcode=0x38, .type=2};
    opcode JSUB  = {.instr="JSUB", .opcode=0x48, .type=2};
    opcode LDA   = {.instr="LDA", .opcode=0x00, .type=2};
    opcode LDB   = {.instr="LDB", .opcode=0x68, .type=2};
    opcode LDCH  = {.instr="LDCH", .opcode=0x50, .type=2};
    opcode LDF   = {.instr="LDF", .opcode=0x70, .type=2};
    opcode LDL   = {.instr="LDL", .opcode=0x08, .type=2};
    opcode LDS   = {.instr="LDS", .opcode=0x6C, .type=2};
    opcode LDT   = {.instr="LDT", .opcode=0x74, .type=2};
    opcode LDX   = {.instr="LDX", .opcode=0x04, .type=2};
    opcode LPS   = {.instr="LPS", .opcode=0xD0, .type=2};
    opcode MUL   = {.instr="MUL", .opcode=0x20, .type=2};
    opcode MULF  = {.instr="MULF", .opcode=0x60, .type=2};
    opcode MULR  = {.instr="MULR", .opcode=0x98, .type=2};
    opcode NORM  = {.instr="NORM", .opcode=0xC8, .type=2};
    opcode OR    = {.instr="OR", .opcode=0x44, .type=2};
    opcode RD    = {.instr="RD", .opcode=0xD8, .type=2};
    opcode RMO   = {.instr="RMO", .opcode=0xAC, .type=2};
    opcode RSUB  = {.instr="RSUB", .opcode=0x4C, .type=2};
    opcode SHIFTL= {.instr="SHIFTL", .opcode=0xA4, .type=2};
    opcode SHIFTR= {.instr="SHIFTR", .opcode=0xA8, .type=2};
    opcode SIO   = {.instr="SIO", .opcode=0xF0, .type=2};
    opcode SSK   = {.instr="SSK", .opcode=0xEC, .type=2};
    opcode STA   = {.instr="STA", .opcode=0x0C, .type=2};
    opcode STB   = {.instr="STB", .opcode=0x78, .type=2};
    opcode STCH  = {.instr="STCH", .opcode=0x54, .type=2};
    opcode STF   = {.instr="STF", .opcode=0x80, .type=2};
    opcode STI   = {.instr="STI", .opcode=0xD4, .type=2};
    opcode STL   = {.instr="STL", .opcode=0x14, .type=2};
    opcode STS   = {.instr="STS", .opcode=0x7C, .type=2};
    opcode STSW  = {.instr="STSW", .opcode=0xE8, .type=2};
    opcode STT   = {.instr="STT", .opcode=0x84, .type=2};
    opcode STX   = {.instr="STX", .opcode=0x10, .type=2};
    opcode SUB   = {.instr="SUB", .opcode=0x1C, .type=2};
    opcode SUBF  = {.instr="SUBF", .opcode=0x5C, .type=2};
    opcode SUBR  = {.instr="SUBR", .opcode=0x94, .type=2};
    opcode SVC   = {.instr="SVC", .opcode=0xB0, .type=2};
    opcode TD    = {.instr="TD", .opcode=0xE0, .type=2};
    opcode TIO   = {.instr="TIO", .opcode=0xF8, .type=2};
    opcode TIX   = {.instr="TIX", .opcode=0x2C, .type=2};
    opcode TIXR  = {.instr="TIXR", .opcode=0xB8, .type=2};
    opcode WD    = {.instr="WD", .opcode=0xDC, .type=2};
    //insert into hash table
    insert(&ADD);
    insert(&ADDF);
    insert(&ADDR);
    insert(&AND);
    insert(&CLEAR);
    insert(&COMP);
    insert(&COMPF);
    insert(&COMPR);
    insert(&DIV);
    insert(&DIVF);
    insert(&DIVR);
    insert(&FIX);
    insert(&FLOAT);
    insert(&HIO);
    insert(&J);
    insert(&JEQ);
    insert(&JGT);
    insert(&JLT);
    insert(&JSUB);
    insert(&LDA);
    insert(&LDB);
    insert(&LDCH);
    insert(&LDF);
    insert(&LDL);
    insert(&LDS);
    insert(&LDT);
    insert(&LDX);
    insert(&LPS);
    insert(&MUL);
    insert(&MULF);
    insert(&MULR);
    insert(&NORM);
    insert(&OR);
    insert(&RD);
    insert(&RMO);
    insert(&RSUB);
    insert(&SHIFTL);
    insert(&SHIFTR);
    insert(&SIO);
    insert(&SSK);
    insert(&STA);
    insert(&STB);
    insert(&STCH);
    insert(&STF);
    insert(&STI);
    insert(&STL);
    insert(&STS);
    insert(&STSW);
    insert(&STT);
    insert(&STX);
    insert(&SUB);
    insert(&SUBF);
    insert(&SUBR);
    insert(&SVC);
    insert(&TD);
    insert(&TIO);
    insert(&TIX);
    insert(&TIXR);
    insert(&WD);
    
    //Registers
    opcode A = {.instr="A", .opcode=0, .type=4};
    opcode X = {.instr="X", .opcode=0, .type=4};
    opcode L = {.instr="L", .opcode=0, .type=4};
    opcode PC = {.instr="PC", .opcode=0, .type=4};
    opcode SW = {.instr="SW", .opcode=0, .type=4};
    //insert into hash table
    insert(&A);
    insert(&X);
    insert(&L);
    insert(&PC);
    insert(&SW);
    }//==================================================================================
    
    //symbol array for temporarily storing symbols for insertion into the hash table
    //symbol* symtable[TABLESIZE];
    
    int lineCounter = 1;
    int bytesUsed[TABLESIZE]; //keeping track of the space used per instruction/line. Useful for pass 2
    
    //ADDRESS COUNTER
    //Starts at 0, will be stored in decimal, then converted to hex
    int addressCounter = 0;
    int addressCounterArray[TABLESIZE]; //useful for pass 2
    //keep track of whether a start token has been encountered already
    bool start = false;
    //Token array for storing tokens 
    //also can serve to help verify that opcodes are followed by operands instead of other opcodes or directives
    int n=0;
    char* tokenA[4];

    //start reading the file
    char line[1024];
        while (fgets(line, 1024, inputFile)) {
            //printf("READ: %s", line);
            int length;
            length = strlen(line);
            if (length > 0) { //not a blank line
                if (line[0] == 35) { //#
                    //printf("--was a comment\n");
                }
                else if(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab
                    //printf("--symbol definition: %s", line);
                    char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters
                    while (token) {
                        tokenA[n] = token;
                        //printf("\t %d---->%s\n", n, tokenA[n]);
                        token = strtok(NULL, " \t\n"); 
                        
                        n++;
                        
                        
                        /* //check if the token is valid
                        if (getType(token) == 0) {
                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token: %s", token);
                            printf("Aborting...");
                            printf("ASSEMBLY ABORTED DUE TO INVALID TOKEN");
                            return 1;
                        }
                        if (strncmp(token, "END", TABLESIZE) == 0) { //EXITING ON END DIRECTIVE WILL CAUSE 2 TESTS TO FAIL
                            printf("END token found...");
                            //return 0;
                        }*/
                        
                    }
                    
                    //BEGIN ANALYSING THE LINE AND APPLYING THE LOGIC
                    if (getType(tokenA[0]) == DIRECTIVE) {
                            if (getType(tokenA[1]) == DIRECTIVE) {
                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter);
                                return 1;
                            }
                            if (strncmp(tokenA[0], "START", TABLESIZE) == 0){
                                if(start){
                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter);
                                    return 1;
                                }
                                start = true;
                                int startnum = atoi(tokenA[1]); //if casting fails here then I let the assembly fail
                                if (addressCounter == 0) {addressCounter = hex2dec(startnum);}
                                else{
                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter);
                                }
                            }
                            if (strncmp(tokenA[0], "BYTE", TABLESIZE) == 0){
                                //do whatever needs to be done
                                if (tokenA[1][0] == 'X'){
                                    int bytenum = (strlen(tokenA[1]) - 3); //length of string minus the X and quotes
                                    addressCounter += (bytenum / 2);
                                    bytesUsed[lineCounter] = bytenum / 2;
                                }
                                else if (tokenA[1][0] == 'C'){
                                    addressCounter += (strlen(tokenA[1]) - 3); //length of string minus the C and quotes
                                    bytesUsed[lineCounter] = (strlen(tokenA[1]) - 3);
                                }
                                else {
                                    printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter); 
                                    addressCounter += strlen(tokenA[1]); bytesUsed[lineCounter] = strlen(tokenA[1]);}
                            }
                            if (strncmp(tokenA[0], "WORD", TABLESIZE) == 0){
                                //do whatever needs to be done
                                addressCounter += 3;
                                bytesUsed[lineCounter] = 3;
                            }
                            if (strncmp(tokenA[0], "RESB", TABLESIZE) == 0){
                                //do whatever needs to be done
                                addressCounter += atoi(tokenA[1]);
                                bytesUsed[lineCounter] = atoi(tokenA[1]);
                            }
                            if (strncmp(tokenA[0], "RESW", TABLESIZE) == 0){
                                //do whatever needs to be done
                                addressCounter += (3 * atoi(tokenA[1]));
                                bytesUsed[lineCounter] = (3 * atoi(tokenA[1]));
                            }
                            else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
                    }
                    else if (getType(tokenA[0]) == OPCODE) {
                        if (getType(tokenA[1]) == OPCODE) {
                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
                            return 1;
                        }
                        if (getType(tokenA[1]) == DIRECTIVE) {
                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
                            return 1;
                        }
                        else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
                    }
                    //else if (strncmp(tokenA[0], "RSUB", TABLESIZE) == 0) addressCounter += 3; //RSUB wasn't being detected as opcode for some reason even though getType returns 2, so I hardcoded this... <- should be fixed, but the hardcode remains just in case
                    else if (tokenA[0] == NULL) return 2;
                    
                    
                    //SYMBOL
                    else { //this means the first token is a symbol
                        //printf("Symbol encountered!\n"); 
                        if (strlen(tokenA[0]) > 6){
                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols cannot be longer than six characters! \n", line, lineCounter);
                            return 1;
                        }
                        if ((tokenA[0][0] <= 65) || (tokenA[0][0] > 91)){ //im hoping that tokenA[0][0] means the first character of the first token
                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols must start with an alpha character! \n ", line, lineCounter);
                            return 1;
                        }
                        
                        //record the symbol name and the current value of addressCounter (as hex) into new file
                        //if token is START, update address counter before recording
                        
                        if (strncmp(tokenA[1], "START", TABLESIZE) == 0){
                                if(start){
                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter);
                                    return 1;
                                }
                                start = true;
                                int startnum = atoi(tokenA[2]); //if casting fails here then I let the assembly fail
                                if (addressCounter == 0) {addressCounter = hex2dec(startnum);}
                                else{
                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter);
                                    return 1;
                                }
                            }
                            
                        //printf("Recording token %s and address %X and writing to output file...\n\n", tokenA[0], addressCounter);
                        fprintf(outputFile, "%s     %X\n", tokenA[0], addressCounter);
                        
                        
                        //insertSymbol(tokenA[0], addressCounter);
                        symbol inserttt = {.name=tokenA[0], .address=addressCounter};
                        
                        
                        insertSymbol(&inserttt);
                   
                      
                        
                        //copy directive and opcode logic here, add 1 to all array positions
                        if (getType(tokenA[1]) == DIRECTIVE) {
                            if (getType(tokenA[2]) == DIRECTIVE) {
                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter);
                                return 1;
                            }
                            
                            if (strncmp(tokenA[1], "BYTE", TABLESIZE) == 0){
                                //do whatever needs to be done
                                if (tokenA[2][0] == 'X'){
                                    //printf("\nByte operand recognized.");
                                    int bytenum = (strlen(tokenA[2]) - 3); //length of string minus the X and quotes
                                    //printf("\nIncreasing counter by %d\n", (bytenum / 2));
                                    addressCounter += (bytenum / 2);
                                    bytesUsed[lineCounter] = (bytenum / 2);
                                }
                                else if (tokenA[2][0] == 'C'){
                                    
                                    //printf("\nCharacter operand recognized.\nIncreasing counter by %lu\n", strlen(tokenA[2] - 3));
                                    addressCounter += (strlen(tokenA[2]) - 3); //length of string minus the C and quotes
                                    //printf("addressCounter is %X at this point\n", addressCounter);
                                    bytesUsed[lineCounter] = (strlen(tokenA[2]) - 3);
                                }
                                else {printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter); addressCounter += strlen(tokenA[2]); bytesUsed[lineCounter] = strlen(tokenA[2]);}
                            }
                            if (strncmp(tokenA[1], "WORD", TABLESIZE) == 0){
                                //do whatever needs to be done
                                addressCounter += 3;
                                bytesUsed[lineCounter] = 3;
                            }
                            if (strncmp(tokenA[1], "RESB", TABLESIZE) == 0){
                                //do whatever needs to be done
                                addressCounter += atoi(tokenA[2]);
                                bytesUsed[lineCounter] = atoi(tokenA[2]);
                            }
                            if (strncmp(tokenA[1], "RESW", TABLESIZE) == 0){
                                //do whatever needs to be done
                                addressCounter += (3 * atoi(tokenA[2]));
                                bytesUsed[lineCounter] = (3 * atoi(tokenA[2]));
                            }
                            else {
                                if(strncmp(tokenA[1], "START", TABLESIZE) != 0) {addressCounter += 0; bytesUsed[lineCounter] = 0;}
                                //addressCounter += 3;
                            }//increase by 3 only if token is not START because that was taken care of earlier
                        }
                        else if (getType(tokenA[1]) == OPCODE) {
                            if (getType(tokenA[2]) == OPCODE) {
                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
                                return 1;
                            }
                            if (getType(tokenA[2]) == DIRECTIVE) {
                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
                                return 1;
                            }
                            else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
                        }
                        else if (tokenA[1] == NULL){
                            //printf("tokenA[1] is null");
                            //do nothing
                        }
                        else{
                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token or duplicate symbol! ", line, lineCounter);
                            //printf("Developers note: tokenA[0] = %sToken[1] = %sToken[2] = %s",tokenA[0], tokenA[1],tokenA[2]);
                            return 1;
                        }
                        
                        //printf("addressCounter is %X at this point #2\n", addressCounter);
                        
                    }
                    
                    n = 0; //reset token array counter
                    addressCounterArray[lineCounter] = addressCounter;
                    lineCounter++;
                    
                    //causes seg fault 
                    tokenA[0] = "empty";
                    tokenA[1] = "empty";
                    tokenA[2] = "empty";
                    tokenA[3] = "empty";


                }
                else{
                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid line!", line, lineCounter);
                    return 1;
                }
                

            }




        }
        fclose(outputFile);
        fclose(inputFile);
        
        /* ==========================================================================
        * =================================PASS 2===================================
        * ==========================================================================
        */
        
    
    FILE* objectFile;
    char pass2file[0x100];
    snprintf(pass2file, sizeof(pass2file), "%s.obj", argv[1]);
    objectFile = fopen(pass2file, "w");
    
    
    FILE* pass2;
    pass2 = fopen(argv[1], "r");
    
    char* tokenB[4] = {"empty2", "empty2", "empty2", "empty2"};
    n = 0;
    int lastLine = lineCounter; //record the number of lines in the file
    lineCounter = 1;
    //start reading the file
    //char line2[1024];
        while (fgets(line, 1024, pass2)) {
            
            int length;
            length = strlen(line);
            if (length > 0) { //not a blank line
                if (line[0] == 35) { //#
                    //do nothing
                }
                else if(true){ //(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab
                    
                    char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters
                    while (token) {
                        //char* tokenB;
                        tokenB[n] = token;
                        //printf("\t %d---->%s\n", n, tokenB[n]);
                        token = strtok(NULL, " \t\n"); 
                        
                        n++;
                        
                    }
                    
                    //printf("\n %s %s %s\n", tokenB[0], tokenB[1], tokenB[2]);
                    
                    if(lineCounter == 1){ //print the header record
                        fprintf(objectFile, "H"); //file must begin with header record
                        int tokenlen = strlen(tokenB[0]);
                        if (tokenlen > 6){
                            printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Symbol name too long!", line, lineCounter);
                            return 2;
                        }
                        fprintf(objectFile, "%s", tokenB[0]);
                        for(int l = 0; l < (6 - tokenlen); l++){
                            fprintf(objectFile, " ");
                        }
                        fprintf(objectFile, "00"); //printing 2 zeros before starting address
                        fprintf(objectFile, "%X%X\n", addressCounterArray[1], (addressCounterArray[(lastLine-1)] - addressCounterArray[1]));
                        //printf("\nlast line is %d\n", lastLine);
                    }
                    else if(lineCounter == lastLine-1){
                        //print the E, the leading zeros, and the first memory address
                        fprintf(objectFile, "E00%X", addressCounterArray[1]);    
                        
                    }
                    else if ((getType(tokenB[0]) == OPCODE) || ((getType(tokenB[0]) == DIRECTIVE) && (strncmp(tokenB[0], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[0], "RESW", TABLESIZE) != 0))){
                        //if RESB or RESW is encountered, do nothing
                        fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s)
                        fprintf(objectFile, "%X", addressCounterArray[lineCounter]);
                        fprintf(objectFile, "%02X", bytesUsed[lineCounter]);
                        if(strncmp(tokenB[0], "RSUB", TABLESIZE) == 0){
                            //no address recorded for RSUB
                            fprintf(objectFile, "%X0000\n", getOpcode(tokenB[0])->opcode);
                        }
                    else if (getType(tokenB[0]) == DIRECTIVE){
                        
                        if (strncmp(tokenB[0], "BYTE", TABLESIZE) == 0){
                                //do whatever needs to be done
                                if (tokenB[1][0] == 'X'){
                                    bool notEnd = true;
                                    for(int p = 2; notEnd; p++){
                                        if(tokenB[1][p] == '\''){notEnd = false;}
                                        if(tokenB[1][p] == '\''){break;}
                                        //I know this is an amateur move but I'm doing it anyway because I don't have time to mess around with regular expression formatting in c
                                        if((tokenB[1][p] != '1') || (tokenB[1][p] != '2') || (tokenB[1][p] != '3') || (tokenB[1][p] != '4') || (tokenB[1][p] != '5') || (tokenB[1][p] != '6') || (tokenB[1][p] != '7') || (tokenB[1][p] != '8') || (tokenB[1][p] != '9') || (tokenB[1][p] != 'A') || (tokenB[1][p] != 'B') || (tokenB[1][p] != 'C') || (tokenB[1][p] != 'D') || (tokenB[1][p] != 'E') || (tokenB[1][p] != 'F')){
                                            printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Invalid byte constant!", line, lineCounter);
                                        }
                                        fprintf(objectFile, "%c", tokenB[1][p]);
                                    }
                                }
                                else if (tokenB[1][0] == 'C'){
                                    bool notEnd = true;
                                    for(int p = 2; notEnd; p++){
                                        if(tokenB[1][p] == '\''){notEnd = false;}
                                        fprintf(objectFile, "%d", tokenB[1][p]);
                                    }
                                }
                                else {printf("possible error");}
                        }
                        if (strncmp(tokenB[0], "WORD", TABLESIZE) == 0){
                                //print 6-strlen zeros, then print the hex version of the int
                                for(int a = 0; a < (6 - strlen(tokenB[0])); a++){
                                    fprintf(objectFile, "0");
                                }
                                fprintf(objectFile, "%X", atoi(tokenB[0]));
                        }
                            
                        fprintf(objectFile, "\n"); //print a newline after all is done
                    }
                    else{
                            fprintf(objectFile, "%X%X\n", getOpcode(tokenB[0])->opcode, getAddress(tokenB[1]));
                            
                        }
                    }
                    
                    else{ //this means the first token is a symbol
                        //copy the above opcode and directive logic here, add 1 to all tokenB positions
                        
                        if ((getType(tokenB[1]) == OPCODE) || ((getType(tokenB[1]) == DIRECTIVE) && (strncmp(tokenB[1], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[1], "RESW", TABLESIZE) != 0))){
                        //if RESB or RESW is encountered, do nothing
                        fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s)
                        fprintf(objectFile, "%X", addressCounterArray[lineCounter]);
                        fprintf(objectFile, "%02X", bytesUsed[lineCounter]);
                        if(strncmp(tokenB[1], "RSUB", TABLESIZE) == 0){
                            //no address recorded for RSUB
                            fprintf(objectFile, "%X0000\n", getOpcode(tokenB[1])->opcode);
                        }
                    else if (getType(tokenB[1]) == DIRECTIVE){
                        
                        if (strncmp(tokenB[1], "BYTE", TABLESIZE) == 0){
                                //do whatever needs to be done
                                if (tokenB[2][0] == 'X'){
                                    bool notEnd = true;
                                    for(int p = 2; notEnd; p++){
                                        if(tokenB[2][p] == '\''){notEnd = false;}
                                        if(tokenB[2][p] == '\''){break;}
                                        fprintf(objectFile, "%c", tokenB[2][p]);
                                    }
                                }
                                else if (tokenB[2][0] == 'C'){
                                    bool notEnd = true;
                                    for(int p = 2; notEnd; p++){
                                        if(tokenB[2][p] == '\''){notEnd = false;}
                                        fprintf(objectFile, "%d", tokenB[1][p]);
                                    }
                                }
                                else {printf("possible error");}
                        }
                        if (strncmp(tokenB[1], "WORD", TABLESIZE) == 0){
                                //print 6-strlen zeros, then print the hex version of the int
                                for(int a = 0; a < (6 - strlen(tokenB[1])); a++){
                                    fprintf(objectFile, "0");
                                }
                                fprintf(objectFile, "%X", atoi(tokenB[1]));
                        }
                            
                        fprintf(objectFile, "\n"); //print a newline after all is done
                    }
                    else{
                            fprintf(objectFile, "%X%X\n", getOpcode(tokenB[1])->opcode, getAddress(tokenB[2]));
                            //printf("\n%d\n", getAddress(tokenB[2]));
                        }
                    }
                        
                    }
                    
                    lineCounter++;
                    //n = 0; //reset token array counter
                }
                
            }
            
            //lineCounter++;
            n = 0; //reset token array counter
        }
        
        
        



    fclose(objectFile);
    fclose(pass2);
    
    //for(int test = 0; test < 200; test++){printf("\n%d", symbolTable[test]->address);}
    
    return 0;
}