From eb4fbe2ebdd44dbf9c604553f2ef053d0360dd41 Mon Sep 17 00:00:00 2001
From: Dominic Matarese <dominicmatarese@gmail.com>
Date: Mon, 12 Jul 2021 16:21:43 +0000
Subject: required first commit

---
 main.c | 842 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 842 insertions(+)
 create mode 100644 main.c

(limited to 'main.c')

diff --git a/main.c b/main.c
new file mode 100644
index 0000000..172a353
--- /dev/null
+++ b/main.c
@@ -0,0 +1,842 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+//#include <math.h>
+
+#define TABLESIZE 200
+
+#define DIRECTIVE 1
+#define OPCODE 2
+#define OPERAND 3
+#define REGISTER 4
+#define SYMBOL 5
+
+typedef struct { //definition of opcode which includes mnumonic and the opcode
+    char* instr;
+    int opcode;
+    int type; //defines whether it is a directive =1, opcode=2, or operand=3
+} opcode;
+
+typedef struct {
+    char* name;
+    int address;
+    int opcode;
+} symbol;
+
+opcode* hashtable[TABLESIZE]; //array of pointers for the hashtable of opcodes
+
+symbol* symbolTable[TABLESIZE]; //array of pointers for the hashtable of symbols
+
+
+unsigned int hash(char*);
+void insert(opcode *opcode);
+bool isValid(char*);
+int getType(char*);
+void insertSymbol(symbol *Symbol);
+opcode* getOpcode(char*);
+int getAddress(char* token);
+int hex2dec(int);
+int power(int, int);
+
+//hash function
+unsigned int hash(char* opcode) {
+    int length = strlen(opcode);
+    unsigned int hash = 0;
+    for (int i = 0; i < length; i++) { //go through the char array and add up the ASCII codes
+        hash += opcode[i];
+        hash *= opcode[i]; //multiply by ASCII codes
+        hash = hash % TABLESIZE; //keeps the hash value under the tablesize
+    }
+
+    return hash;
+    
+}
+
+//insert function for the hash table
+void insert(opcode *opcode) {
+    if (opcode == NULL) { //do nothing if nothing there
+        return;
+    }
+    else {
+        int i = hash(opcode->instr);
+        //the following is an implementation of linear probing to deal with collisions in the hash table
+        for (int j = 0; j < TABLESIZE; j++) {
+            int try = (j + i) % TABLESIZE;
+            if (hashtable[try] == NULL) {
+                hashtable[try] = opcode; //just insert if no collision
+                return;
+            }
+            //if not null, increment by one until null, then insert
+        }
+        return;
+    }
+}
+
+//determines if a token is valid by checking if it exists in the opcode hash table
+bool isValid(char* token) {
+    int i = hash(token);
+    for (int j = 0; j < TABLESIZE; j++) {
+        int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
+        if (hashtable[try] == NULL) { //automatically return false if there is nothing there
+            return false;
+            //
+        }
+        if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
+            return true; //confirm that it is valid 
+        }
+    }
+    
+    return false;
+
+}
+
+//similar to the isValid function, but goes the extra mile to return the type of instruction the token is after validating it
+//replaced the isValid function by returning 0 if it is invalid
+int getType(char* token) {
+    int i = hash(token);
+    for (int j = 0; j < TABLESIZE; j++) {
+        int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
+        if (hashtable[try] == NULL) { //automatically return false if there is nothing there
+            return 0;
+            //
+        }
+        if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
+            return hashtable[try]->type; //returns the type (see preprocessor definitions)
+        }
+    }
+    
+    return 0;
+}
+
+//insert function for the symbol hash table
+void insertSymbol(symbol *Symbol) {
+    char* sym = Symbol->name;
+    if (Symbol == NULL) { //do nothing if nothing there
+        return;
+    }
+    else {
+        int i = hash(sym);
+        //the following is an implementation of linear probing to deal with collisions in the hash table
+        for (int j = 0; j < TABLESIZE; j++) {
+            int try = (j + i) % TABLESIZE;
+            if (symbolTable[try] == NULL) {
+                //symbolTable[try]->address = address; //causes seg fault
+                //symbolTable[try]->name = symbol; //causes seg fault
+                //symbol inserting = {.name=sym, .address=address};
+                //printf("\nInserting %s %d the hash was %d and the try is %d\n", Symbol->name, Symbol->address, hash(sym), try);
+                symbolTable[try] = Symbol; //just insert if no collision
+                //printf("\n%d", symbolTable[try]->address);
+                return;
+            }
+            //if not null, increment by one until null, then insert
+        }
+        return;
+    }
+}
+
+opcode* getOpcode(char* token) {
+    int i = hash(token);
+    for (int j = 0; j < TABLESIZE; j++) {
+        int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
+        if (hashtable[try] == NULL) { //automatically return false if there is nothing there
+            return NULL;
+            //this indicates the instruction is invalid
+        }
+        if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
+            //int op = hashtable[try]->opcode;
+            return hashtable[try]; //returns the opcode
+        }
+    }
+    
+    return 0;
+}
+
+int getAddress(char* token)
+{
+    int i = hash(token);
+    //printf("\nhash of %s is %d", token, hash(token));
+    int try = 0;
+    for (int j = 0; j < TABLESIZE; j++) {
+        try = ((i + j) % TABLESIZE); //for loop and this line is part of the collision handling logic
+        //printf("\n%d\n", try);
+        if (symbolTable[try] == NULL) { //automatically return false if there is nothing there
+            //printf("\ntried %d", try);
+            return 99999;
+            //this indicates the instruction is invalid
+        }
+        
+        if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) {
+            //int addr = symbolTable[try]->address;
+            return symbolTable[try]->address; //returns the address
+        }
+    }
+    return 99;
+    
+    /*//BRUTE FORCE 
+    for(int try = 0; try < (TABLESIZE - 1); try++){
+        if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) {
+            int addr = symbolTable[try]->address;
+            //printf("\nget %d", addr);
+            return addr; //returns the address
+        }
+    }
+    //still doesnt work..........................................................*/
+    return 9999; 
+}
+
+//this function is needed because the integer after START is represented in hex and needs to be converted to decimal for the addressCounter int.
+//the addressCounter int then gets displayed as hex at the end with %X
+int hex2dec(int num){
+    int dec = 0;
+    int remainder, ct = 0;
+    while (num > 0){
+        remainder = num % 10;
+        dec += (remainder * power(16, ct));
+        num /= 10;
+        ct++;
+    }
+    return dec;
+}
+
+//I get an undefined reference to pow despite using math.h so I am defining pow here as power
+int power(int x, int y){
+    int z = x;
+    for(int i=1; i < y; i++){
+        z *= x;
+    }
+    return z;
+}
+
+
+
+int main(int argc, char* argv[]) { //argc = # of arguments. argv[1] is the assembly file to be opened
+
+    //clear the memory for the hash table as soon as the program starts
+    for (int i = 0; i < TABLESIZE; i++) {
+        hashtable[i] = NULL;
+        symbolTable[i] = NULL;
+    }
+
+    //check for incorrect command usage
+    if (argc != 2) {
+        printf("USAGE: %s <filename>\n", argv[0]);
+        return 1;
+    }
+
+    FILE* inputFile;
+    FILE* outputFile;
+    inputFile = fopen(argv[1], "r"); //"r" parameter is for read-only
+    
+    
+    outputFile = fopen("pass1.txt", "w");
+    //check if file is valid
+    if (!inputFile) {
+        printf("ASSEMBLY ERROR:\n\n%s could not be opened for reading.", argv[1]);
+        return 1;
+        
+    }
+    
+    if(true){ //this exists so I can easily collapse this section in the editor
+    //==================================================================================
+    //TABLE OF OPCODES (appropriately hardcoded)
+    //block selection mode and find and replace are my best friends
+    //I tried to have a separate function for this, but it did not work out. It must be in main()
+    
+    //Directives
+    opcode START = {.instr="START",  .type=1};
+    opcode END = {.instr="END", .type=1};
+    opcode BYTE = {.instr="BYTE", .type=1};
+    opcode WORD = {.instr="WORD", .type=1};
+    opcode RESB = {.instr="RESB", .type=1};
+    opcode RESW = {.instr="RESW", .type=1};
+    opcode RESR = {.instr="RESR", .type=1};
+    opcode EXPORTS = {.instr="EXPORTS", .type=1};
+    //insert into hash table
+    insert(&START);
+    insert(&END);
+    insert(&BYTE);
+    insert(&WORD);
+    insert(&RESB);
+    insert(&RESW);
+    insert(&RESR);
+    insert(&EXPORTS);
+
+    
+    //Opcodes
+    opcode ADD   = {.instr="ADD", .opcode=0x18, .type=2};
+    opcode ADDF  = {.instr="ADDF", .opcode=0x58, .type=2};
+    opcode ADDR  = {.instr="ADDR", .opcode=0x90, .type=2};
+    opcode AND   = {.instr="AND", .opcode=0x40, .type=2};
+    opcode CLEAR = {.instr="CLEAR", .opcode=0xB4, .type=2};
+    opcode COMP  = {.instr="COMP", .opcode=0x28, .type=2};
+    opcode COMPF = {.instr="COMPF", .opcode=0x88, .type=2};
+    opcode COMPR = {.instr="COMPR", .opcode=0xA0, .type=2};
+    opcode DIV   = {.instr="DIV", .opcode=0x24, .type=2};
+    opcode DIVF  = {.instr="DIVF", .opcode=0x64, .type=2};
+    opcode DIVR  = {.instr="DIVR", .opcode=0x9C, .type=2};
+    opcode FIX   = {.instr="FIX", .opcode=0xC4, .type=2};
+    opcode FLOAT = {.instr="FLOAT", .opcode=0xC0, .type=2};
+    opcode HIO   = {.instr="HIO", .opcode=0xF4, .type=2};
+    opcode J     = {.instr="J", .opcode=0x3C, .type=2};
+    opcode JEQ   = {.instr="JEQ", .opcode=0x30, .type=2};
+    opcode JGT   = {.instr="JGT", .opcode=0x34, .type=2};
+    opcode JLT   = {.instr="JLT", .opcode=0x38, .type=2};
+    opcode JSUB  = {.instr="JSUB", .opcode=0x48, .type=2};
+    opcode LDA   = {.instr="LDA", .opcode=0x00, .type=2};
+    opcode LDB   = {.instr="LDB", .opcode=0x68, .type=2};
+    opcode LDCH  = {.instr="LDCH", .opcode=0x50, .type=2};
+    opcode LDF   = {.instr="LDF", .opcode=0x70, .type=2};
+    opcode LDL   = {.instr="LDL", .opcode=0x08, .type=2};
+    opcode LDS   = {.instr="LDS", .opcode=0x6C, .type=2};
+    opcode LDT   = {.instr="LDT", .opcode=0x74, .type=2};
+    opcode LDX   = {.instr="LDX", .opcode=0x04, .type=2};
+    opcode LPS   = {.instr="LPS", .opcode=0xD0, .type=2};
+    opcode MUL   = {.instr="MUL", .opcode=0x20, .type=2};
+    opcode MULF  = {.instr="MULF", .opcode=0x60, .type=2};
+    opcode MULR  = {.instr="MULR", .opcode=0x98, .type=2};
+    opcode NORM  = {.instr="NORM", .opcode=0xC8, .type=2};
+    opcode OR    = {.instr="OR", .opcode=0x44, .type=2};
+    opcode RD    = {.instr="RD", .opcode=0xD8, .type=2};
+    opcode RMO   = {.instr="RMO", .opcode=0xAC, .type=2};
+    opcode RSUB  = {.instr="RSUB", .opcode=0x4C, .type=2};
+    opcode SHIFTL= {.instr="SHIFTL", .opcode=0xA4, .type=2};
+    opcode SHIFTR= {.instr="SHIFTR", .opcode=0xA8, .type=2};
+    opcode SIO   = {.instr="SIO", .opcode=0xF0, .type=2};
+    opcode SSK   = {.instr="SSK", .opcode=0xEC, .type=2};
+    opcode STA   = {.instr="STA", .opcode=0x0C, .type=2};
+    opcode STB   = {.instr="STB", .opcode=0x78, .type=2};
+    opcode STCH  = {.instr="STCH", .opcode=0x54, .type=2};
+    opcode STF   = {.instr="STF", .opcode=0x80, .type=2};
+    opcode STI   = {.instr="STI", .opcode=0xD4, .type=2};
+    opcode STL   = {.instr="STL", .opcode=0x14, .type=2};
+    opcode STS   = {.instr="STS", .opcode=0x7C, .type=2};
+    opcode STSW  = {.instr="STSW", .opcode=0xE8, .type=2};
+    opcode STT   = {.instr="STT", .opcode=0x84, .type=2};
+    opcode STX   = {.instr="STX", .opcode=0x10, .type=2};
+    opcode SUB   = {.instr="SUB", .opcode=0x1C, .type=2};
+    opcode SUBF  = {.instr="SUBF", .opcode=0x5C, .type=2};
+    opcode SUBR  = {.instr="SUBR", .opcode=0x94, .type=2};
+    opcode SVC   = {.instr="SVC", .opcode=0xB0, .type=2};
+    opcode TD    = {.instr="TD", .opcode=0xE0, .type=2};
+    opcode TIO   = {.instr="TIO", .opcode=0xF8, .type=2};
+    opcode TIX   = {.instr="TIX", .opcode=0x2C, .type=2};
+    opcode TIXR  = {.instr="TIXR", .opcode=0xB8, .type=2};
+    opcode WD    = {.instr="WD", .opcode=0xDC, .type=2};
+    //insert into hash table
+    insert(&ADD);
+    insert(&ADDF);
+    insert(&ADDR);
+    insert(&AND);
+    insert(&CLEAR);
+    insert(&COMP);
+    insert(&COMPF);
+    insert(&COMPR);
+    insert(&DIV);
+    insert(&DIVF);
+    insert(&DIVR);
+    insert(&FIX);
+    insert(&FLOAT);
+    insert(&HIO);
+    insert(&J);
+    insert(&JEQ);
+    insert(&JGT);
+    insert(&JLT);
+    insert(&JSUB);
+    insert(&LDA);
+    insert(&LDB);
+    insert(&LDCH);
+    insert(&LDF);
+    insert(&LDL);
+    insert(&LDS);
+    insert(&LDT);
+    insert(&LDX);
+    insert(&LPS);
+    insert(&MUL);
+    insert(&MULF);
+    insert(&MULR);
+    insert(&NORM);
+    insert(&OR);
+    insert(&RD);
+    insert(&RMO);
+    insert(&RSUB);
+    insert(&SHIFTL);
+    insert(&SHIFTR);
+    insert(&SIO);
+    insert(&SSK);
+    insert(&STA);
+    insert(&STB);
+    insert(&STCH);
+    insert(&STF);
+    insert(&STI);
+    insert(&STL);
+    insert(&STS);
+    insert(&STSW);
+    insert(&STT);
+    insert(&STX);
+    insert(&SUB);
+    insert(&SUBF);
+    insert(&SUBR);
+    insert(&SVC);
+    insert(&TD);
+    insert(&TIO);
+    insert(&TIX);
+    insert(&TIXR);
+    insert(&WD);
+    
+    //Registers
+    opcode A = {.instr="A", .opcode=0, .type=4};
+    opcode X = {.instr="X", .opcode=0, .type=4};
+    opcode L = {.instr="L", .opcode=0, .type=4};
+    opcode PC = {.instr="PC", .opcode=0, .type=4};
+    opcode SW = {.instr="SW", .opcode=0, .type=4};
+    //insert into hash table
+    insert(&A);
+    insert(&X);
+    insert(&L);
+    insert(&PC);
+    insert(&SW);
+    }//==================================================================================
+    
+    //symbol array for temporarily storing symbols for insertion into the hash table
+    //symbol* symtable[TABLESIZE];
+    
+    int lineCounter = 1;
+    int bytesUsed[TABLESIZE]; //keeping track of the space used per instruction/line. Useful for pass 2
+    
+    //ADDRESS COUNTER
+    //Starts at 0, will be stored in decimal, then converted to hex
+    int addressCounter = 0;
+    int addressCounterArray[TABLESIZE]; //useful for pass 2
+    //keep track of whether a start token has been encountered already
+    bool start = false;
+    //Token array for storing tokens 
+    //also can serve to help verify that opcodes are followed by operands instead of other opcodes or directives
+    int n=0;
+    char* tokenA[4];
+
+    //start reading the file
+    char line[1024];
+        while (fgets(line, 1024, inputFile)) {
+            //printf("READ: %s", line);
+            int length;
+            length = strlen(line);
+            if (length > 0) { //not a blank line
+                if (line[0] == 35) { //#
+                    //printf("--was a comment\n");
+                }
+                else if(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab
+                    //printf("--symbol definition: %s", line);
+                    char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters
+                    while (token) {
+                        tokenA[n] = token;
+                        //printf("\t %d---->%s\n", n, tokenA[n]);
+                        token = strtok(NULL, " \t\n"); 
+                        
+                        n++;
+                        
+                        
+                        /* //check if the token is valid
+                        if (getType(token) == 0) {
+                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token: %s", token);
+                            printf("Aborting...");
+                            printf("ASSEMBLY ABORTED DUE TO INVALID TOKEN");
+                            return 1;
+                        }
+                        if (strncmp(token, "END", TABLESIZE) == 0) { //EXITING ON END DIRECTIVE WILL CAUSE 2 TESTS TO FAIL
+                            printf("END token found...");
+                            //return 0;
+                        }*/
+                        
+                    }
+                    
+                    //BEGIN ANALYSING THE LINE AND APPLYING THE LOGIC
+                    if (getType(tokenA[0]) == DIRECTIVE) {
+                            if (getType(tokenA[1]) == DIRECTIVE) {
+                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter);
+                                return 1;
+                            }
+                            if (strncmp(tokenA[0], "START", TABLESIZE) == 0){
+                                if(start){
+                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter);
+                                    return 1;
+                                }
+                                start = true;
+                                int startnum = atoi(tokenA[1]); //if casting fails here then I let the assembly fail
+                                if (addressCounter == 0) {addressCounter = hex2dec(startnum);}
+                                else{
+                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter);
+                                }
+                            }
+                            if (strncmp(tokenA[0], "BYTE", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                if (tokenA[1][0] == 'X'){
+                                    int bytenum = (strlen(tokenA[1]) - 3); //length of string minus the X and quotes
+                                    addressCounter += (bytenum / 2);
+                                    bytesUsed[lineCounter] = bytenum / 2;
+                                }
+                                else if (tokenA[1][0] == 'C'){
+                                    addressCounter += (strlen(tokenA[1]) - 3); //length of string minus the C and quotes
+                                    bytesUsed[lineCounter] = (strlen(tokenA[1]) - 3);
+                                }
+                                else {
+                                    printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter); 
+                                    addressCounter += strlen(tokenA[1]); bytesUsed[lineCounter] = strlen(tokenA[1]);}
+                            }
+                            if (strncmp(tokenA[0], "WORD", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                addressCounter += 3;
+                                bytesUsed[lineCounter] = 3;
+                            }
+                            if (strncmp(tokenA[0], "RESB", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                addressCounter += atoi(tokenA[1]);
+                                bytesUsed[lineCounter] = atoi(tokenA[1]);
+                            }
+                            if (strncmp(tokenA[0], "RESW", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                addressCounter += (3 * atoi(tokenA[1]));
+                                bytesUsed[lineCounter] = (3 * atoi(tokenA[1]));
+                            }
+                            else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
+                    }
+                    else if (getType(tokenA[0]) == OPCODE) {
+                        if (getType(tokenA[1]) == OPCODE) {
+                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+                            return 1;
+                        }
+                        if (getType(tokenA[1]) == DIRECTIVE) {
+                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+                            return 1;
+                        }
+                        else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
+                    }
+                    //else if (strncmp(tokenA[0], "RSUB", TABLESIZE) == 0) addressCounter += 3; //RSUB wasn't being detected as opcode for some reason even though getType returns 2, so I hardcoded this... <- should be fixed, but the hardcode remains just in case
+                    else if (tokenA[0] == NULL) return 2;
+                    
+                    
+                    //SYMBOL
+                    else { //this means the first token is a symbol
+                        //printf("Symbol encountered!\n"); 
+                        if (strlen(tokenA[0]) > 6){
+                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols cannot be longer than six characters! \n", line, lineCounter);
+                            return 1;
+                        }
+                        if ((tokenA[0][0] <= 65) || (tokenA[0][0] > 91)){ //im hoping that tokenA[0][0] means the first character of the first token
+                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols must start with an alpha character! \n ", line, lineCounter);
+                            return 1;
+                        }
+                        
+                        //record the symbol name and the current value of addressCounter (as hex) into new file
+                        //if token is START, update address counter before recording
+                        
+                        if (strncmp(tokenA[1], "START", TABLESIZE) == 0){
+                                if(start){
+                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter);
+                                    return 1;
+                                }
+                                start = true;
+                                int startnum = atoi(tokenA[2]); //if casting fails here then I let the assembly fail
+                                if (addressCounter == 0) {addressCounter = hex2dec(startnum);}
+                                else{
+                                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter);
+                                    return 1;
+                                }
+                            }
+                            
+                        //printf("Recording token %s and address %X and writing to output file...\n\n", tokenA[0], addressCounter);
+                        fprintf(outputFile, "%s     %X\n", tokenA[0], addressCounter);
+                        
+                        
+                        //insertSymbol(tokenA[0], addressCounter);
+                        symbol inserttt = {.name=tokenA[0], .address=addressCounter};
+                        
+                        
+                        insertSymbol(&inserttt);
+                   
+                      
+                        
+                        //copy directive and opcode logic here, add 1 to all array positions
+                        if (getType(tokenA[1]) == DIRECTIVE) {
+                            if (getType(tokenA[2]) == DIRECTIVE) {
+                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter);
+                                return 1;
+                            }
+                            
+                            if (strncmp(tokenA[1], "BYTE", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                if (tokenA[2][0] == 'X'){
+                                    //printf("\nByte operand recognized.");
+                                    int bytenum = (strlen(tokenA[2]) - 3); //length of string minus the X and quotes
+                                    //printf("\nIncreasing counter by %d\n", (bytenum / 2));
+                                    addressCounter += (bytenum / 2);
+                                    bytesUsed[lineCounter] = (bytenum / 2);
+                                }
+                                else if (tokenA[2][0] == 'C'){
+                                    
+                                    //printf("\nCharacter operand recognized.\nIncreasing counter by %lu\n", strlen(tokenA[2] - 3));
+                                    addressCounter += (strlen(tokenA[2]) - 3); //length of string minus the C and quotes
+                                    //printf("addressCounter is %X at this point\n", addressCounter);
+                                    bytesUsed[lineCounter] = (strlen(tokenA[2]) - 3);
+                                }
+                                else {printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter); addressCounter += strlen(tokenA[2]); bytesUsed[lineCounter] = strlen(tokenA[2]);}
+                            }
+                            if (strncmp(tokenA[1], "WORD", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                addressCounter += 3;
+                                bytesUsed[lineCounter] = 3;
+                            }
+                            if (strncmp(tokenA[1], "RESB", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                addressCounter += atoi(tokenA[2]);
+                                bytesUsed[lineCounter] = atoi(tokenA[2]);
+                            }
+                            if (strncmp(tokenA[1], "RESW", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                addressCounter += (3 * atoi(tokenA[2]));
+                                bytesUsed[lineCounter] = (3 * atoi(tokenA[2]));
+                            }
+                            else {
+                                if(strncmp(tokenA[1], "START", TABLESIZE) != 0) {addressCounter += 0; bytesUsed[lineCounter] = 0;}
+                                //addressCounter += 3;
+                            }//increase by 3 only if token is not START because that was taken care of earlier
+                        }
+                        else if (getType(tokenA[1]) == OPCODE) {
+                            if (getType(tokenA[2]) == OPCODE) {
+                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+                                return 1;
+                            }
+                            if (getType(tokenA[2]) == DIRECTIVE) {
+                                printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+                                return 1;
+                            }
+                            else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
+                        }
+                        else if (tokenA[1] == NULL){
+                            //printf("tokenA[1] is null");
+                            //do nothing
+                        }
+                        else{
+                            printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token or duplicate symbol! ", line, lineCounter);
+                            //printf("Developers note: tokenA[0] = %sToken[1] = %sToken[2] = %s",tokenA[0], tokenA[1],tokenA[2]);
+                            return 1;
+                        }
+                        
+                        //printf("addressCounter is %X at this point #2\n", addressCounter);
+                        
+                    }
+                    
+                    n = 0; //reset token array counter
+                    addressCounterArray[lineCounter] = addressCounter;
+                    lineCounter++;
+                    
+                    //causes seg fault 
+                    tokenA[0] = "empty";
+                    tokenA[1] = "empty";
+                    tokenA[2] = "empty";
+                    tokenA[3] = "empty";
+
+
+                }
+                else{
+                    printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid line!", line, lineCounter);
+                    return 1;
+                }
+                
+
+            }
+
+
+
+
+        }
+        fclose(outputFile);
+        fclose(inputFile);
+        
+        /* ==========================================================================
+        * =================================PASS 2===================================
+        * ==========================================================================
+        */
+        
+    
+    FILE* objectFile;
+    char pass2file[0x100];
+    snprintf(pass2file, sizeof(pass2file), "%s.obj", argv[1]);
+    objectFile = fopen(pass2file, "w");
+    
+    
+    FILE* pass2;
+    pass2 = fopen(argv[1], "r");
+    
+    char* tokenB[4] = {"empty2", "empty2", "empty2", "empty2"};
+    n = 0;
+    int lastLine = lineCounter; //record the number of lines in the file
+    lineCounter = 1;
+    //start reading the file
+    //char line2[1024];
+        while (fgets(line, 1024, pass2)) {
+            
+            int length;
+            length = strlen(line);
+            if (length > 0) { //not a blank line
+                if (line[0] == 35) { //#
+                    //do nothing
+                }
+                else if(true){ //(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab
+                    
+                    char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters
+                    while (token) {
+                        //char* tokenB;
+                        tokenB[n] = token;
+                        //printf("\t %d---->%s\n", n, tokenB[n]);
+                        token = strtok(NULL, " \t\n"); 
+                        
+                        n++;
+                        
+                    }
+                    
+                    //printf("\n %s %s %s\n", tokenB[0], tokenB[1], tokenB[2]);
+                    
+                    if(lineCounter == 1){ //print the header record
+                        fprintf(objectFile, "H"); //file must begin with header record
+                        int tokenlen = strlen(tokenB[0]);
+                        if (tokenlen > 6){
+                            printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Symbol name too long!", line, lineCounter);
+                            return 2;
+                        }
+                        fprintf(objectFile, "%s", tokenB[0]);
+                        for(int l = 0; l < (6 - tokenlen); l++){
+                            fprintf(objectFile, " ");
+                        }
+                        fprintf(objectFile, "00"); //printing 2 zeros before starting address
+                        fprintf(objectFile, "%X%X\n", addressCounterArray[1], (addressCounterArray[(lastLine-1)] - addressCounterArray[1]));
+                        //printf("\nlast line is %d\n", lastLine);
+                    }
+                    else if(lineCounter == lastLine-1){
+                        //print the E, the leading zeros, and the first memory address
+                        fprintf(objectFile, "E00%X", addressCounterArray[1]);    
+                        
+                    }
+                    else if ((getType(tokenB[0]) == OPCODE) || ((getType(tokenB[0]) == DIRECTIVE) && (strncmp(tokenB[0], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[0], "RESW", TABLESIZE) != 0))){
+                        //if RESB or RESW is encountered, do nothing
+                        fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s)
+                        fprintf(objectFile, "%X", addressCounterArray[lineCounter]);
+                        fprintf(objectFile, "%02X", bytesUsed[lineCounter]);
+                        if(strncmp(tokenB[0], "RSUB", TABLESIZE) == 0){
+                            //no address recorded for RSUB
+                            fprintf(objectFile, "%X0000\n", getOpcode(tokenB[0])->opcode);
+                        }
+                    else if (getType(tokenB[0]) == DIRECTIVE){
+                        
+                        if (strncmp(tokenB[0], "BYTE", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                if (tokenB[1][0] == 'X'){
+                                    bool notEnd = true;
+                                    for(int p = 2; notEnd; p++){
+                                        if(tokenB[1][p] == '\''){notEnd = false;}
+                                        if(tokenB[1][p] == '\''){break;}
+                                        //I know this is an amateur move but I'm doing it anyway because I don't have time to mess around with regular expression formatting in c
+                                        if((tokenB[1][p] != '1') || (tokenB[1][p] != '2') || (tokenB[1][p] != '3') || (tokenB[1][p] != '4') || (tokenB[1][p] != '5') || (tokenB[1][p] != '6') || (tokenB[1][p] != '7') || (tokenB[1][p] != '8') || (tokenB[1][p] != '9') || (tokenB[1][p] != 'A') || (tokenB[1][p] != 'B') || (tokenB[1][p] != 'C') || (tokenB[1][p] != 'D') || (tokenB[1][p] != 'E') || (tokenB[1][p] != 'F')){
+                                            printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Invalid byte constant!", line, lineCounter);
+                                        }
+                                        fprintf(objectFile, "%c", tokenB[1][p]);
+                                    }
+                                }
+                                else if (tokenB[1][0] == 'C'){
+                                    bool notEnd = true;
+                                    for(int p = 2; notEnd; p++){
+                                        if(tokenB[1][p] == '\''){notEnd = false;}
+                                        fprintf(objectFile, "%d", tokenB[1][p]);
+                                    }
+                                }
+                                else {printf("possible error");}
+                        }
+                        if (strncmp(tokenB[0], "WORD", TABLESIZE) == 0){
+                                //print 6-strlen zeros, then print the hex version of the int
+                                for(int a = 0; a < (6 - strlen(tokenB[0])); a++){
+                                    fprintf(objectFile, "0");
+                                }
+                                fprintf(objectFile, "%X", atoi(tokenB[0]));
+                        }
+                            
+                        fprintf(objectFile, "\n"); //print a newline after all is done
+                    }
+                    else{
+                            fprintf(objectFile, "%X%X\n", getOpcode(tokenB[0])->opcode, getAddress(tokenB[1]));
+                            
+                        }
+                    }
+                    
+                    else{ //this means the first token is a symbol
+                        //copy the above opcode and directive logic here, add 1 to all tokenB positions
+                        
+                        if ((getType(tokenB[1]) == OPCODE) || ((getType(tokenB[1]) == DIRECTIVE) && (strncmp(tokenB[1], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[1], "RESW", TABLESIZE) != 0))){
+                        //if RESB or RESW is encountered, do nothing
+                        fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s)
+                        fprintf(objectFile, "%X", addressCounterArray[lineCounter]);
+                        fprintf(objectFile, "%02X", bytesUsed[lineCounter]);
+                        if(strncmp(tokenB[1], "RSUB", TABLESIZE) == 0){
+                            //no address recorded for RSUB
+                            fprintf(objectFile, "%X0000\n", getOpcode(tokenB[1])->opcode);
+                        }
+                    else if (getType(tokenB[1]) == DIRECTIVE){
+                        
+                        if (strncmp(tokenB[1], "BYTE", TABLESIZE) == 0){
+                                //do whatever needs to be done
+                                if (tokenB[2][0] == 'X'){
+                                    bool notEnd = true;
+                                    for(int p = 2; notEnd; p++){
+                                        if(tokenB[2][p] == '\''){notEnd = false;}
+                                        if(tokenB[2][p] == '\''){break;}
+                                        fprintf(objectFile, "%c", tokenB[2][p]);
+                                    }
+                                }
+                                else if (tokenB[2][0] == 'C'){
+                                    bool notEnd = true;
+                                    for(int p = 2; notEnd; p++){
+                                        if(tokenB[2][p] == '\''){notEnd = false;}
+                                        fprintf(objectFile, "%d", tokenB[1][p]);
+                                    }
+                                }
+                                else {printf("possible error");}
+                        }
+                        if (strncmp(tokenB[1], "WORD", TABLESIZE) == 0){
+                                //print 6-strlen zeros, then print the hex version of the int
+                                for(int a = 0; a < (6 - strlen(tokenB[1])); a++){
+                                    fprintf(objectFile, "0");
+                                }
+                                fprintf(objectFile, "%X", atoi(tokenB[1]));
+                        }
+                            
+                        fprintf(objectFile, "\n"); //print a newline after all is done
+                    }
+                    else{
+                            fprintf(objectFile, "%X%X\n", getOpcode(tokenB[1])->opcode, getAddress(tokenB[2]));
+                            //printf("\n%d\n", getAddress(tokenB[2]));
+                        }
+                    }
+                        
+                    }
+                    
+                    lineCounter++;
+                    //n = 0; //reset token array counter
+                }
+                
+            }
+            
+            //lineCounter++;
+            n = 0; //reset token array counter
+        }
+        
+        
+        
+
+
+
+    fclose(objectFile);
+    fclose(pass2);
+    
+    //for(int test = 0; test < 200; test++){printf("\n%d", symbolTable[test]->address);}
+    
+    return 0;
+}
-- 
cgit v1.2.1