aboutsummaryrefslogtreecommitdiff
path: root/main.c
diff options
context:
space:
mode:
authorDominic Matarese <dominicmatarese@gmail.com>2021-07-12 16:21:43 +0000
committerDominic Matarese <dominicmatarese@gmail.com>2021-07-12 16:21:43 +0000
commiteb4fbe2ebdd44dbf9c604553f2ef053d0360dd41 (patch)
tree6bf8a5ed4a3e892a5155edab06899a354085627e /main.c
required first commitHEADmaster
Diffstat (limited to 'main.c')
-rw-r--r--main.c842
1 files changed, 842 insertions, 0 deletions
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..172a353
--- /dev/null
+++ b/main.c
@@ -0,0 +1,842 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+//#include <math.h>
+
+#define TABLESIZE 200
+
+#define DIRECTIVE 1
+#define OPCODE 2
+#define OPERAND 3
+#define REGISTER 4
+#define SYMBOL 5
+
+typedef struct { //definition of opcode which includes mnumonic and the opcode
+ char* instr;
+ int opcode;
+ int type; //defines whether it is a directive =1, opcode=2, or operand=3
+} opcode;
+
+typedef struct {
+ char* name;
+ int address;
+ int opcode;
+} symbol;
+
+opcode* hashtable[TABLESIZE]; //array of pointers for the hashtable of opcodes
+
+symbol* symbolTable[TABLESIZE]; //array of pointers for the hashtable of symbols
+
+
+unsigned int hash(char*);
+void insert(opcode *opcode);
+bool isValid(char*);
+int getType(char*);
+void insertSymbol(symbol *Symbol);
+opcode* getOpcode(char*);
+int getAddress(char* token);
+int hex2dec(int);
+int power(int, int);
+
+//hash function
+unsigned int hash(char* opcode) {
+ int length = strlen(opcode);
+ unsigned int hash = 0;
+ for (int i = 0; i < length; i++) { //go through the char array and add up the ASCII codes
+ hash += opcode[i];
+ hash *= opcode[i]; //multiply by ASCII codes
+ hash = hash % TABLESIZE; //keeps the hash value under the tablesize
+ }
+
+ return hash;
+
+}
+
+//insert function for the hash table
+void insert(opcode *opcode) {
+ if (opcode == NULL) { //do nothing if nothing there
+ return;
+ }
+ else {
+ int i = hash(opcode->instr);
+ //the following is an implementation of linear probing to deal with collisions in the hash table
+ for (int j = 0; j < TABLESIZE; j++) {
+ int try = (j + i) % TABLESIZE;
+ if (hashtable[try] == NULL) {
+ hashtable[try] = opcode; //just insert if no collision
+ return;
+ }
+ //if not null, increment by one until null, then insert
+ }
+ return;
+ }
+}
+
+//determines if a token is valid by checking if it exists in the opcode hash table
+bool isValid(char* token) {
+ int i = hash(token);
+ for (int j = 0; j < TABLESIZE; j++) {
+ int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
+ if (hashtable[try] == NULL) { //automatically return false if there is nothing there
+ return false;
+ //
+ }
+ if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
+ return true; //confirm that it is valid
+ }
+ }
+
+ return false;
+
+}
+
+//similar to the isValid function, but goes the extra mile to return the type of instruction the token is after validating it
+//replaced the isValid function by returning 0 if it is invalid
+int getType(char* token) {
+ int i = hash(token);
+ for (int j = 0; j < TABLESIZE; j++) {
+ int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
+ if (hashtable[try] == NULL) { //automatically return false if there is nothing there
+ return 0;
+ //
+ }
+ if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
+ return hashtable[try]->type; //returns the type (see preprocessor definitions)
+ }
+ }
+
+ return 0;
+}
+
+//insert function for the symbol hash table
+void insertSymbol(symbol *Symbol) {
+ char* sym = Symbol->name;
+ if (Symbol == NULL) { //do nothing if nothing there
+ return;
+ }
+ else {
+ int i = hash(sym);
+ //the following is an implementation of linear probing to deal with collisions in the hash table
+ for (int j = 0; j < TABLESIZE; j++) {
+ int try = (j + i) % TABLESIZE;
+ if (symbolTable[try] == NULL) {
+ //symbolTable[try]->address = address; //causes seg fault
+ //symbolTable[try]->name = symbol; //causes seg fault
+ //symbol inserting = {.name=sym, .address=address};
+ //printf("\nInserting %s %d the hash was %d and the try is %d\n", Symbol->name, Symbol->address, hash(sym), try);
+ symbolTable[try] = Symbol; //just insert if no collision
+ //printf("\n%d", symbolTable[try]->address);
+ return;
+ }
+ //if not null, increment by one until null, then insert
+ }
+ return;
+ }
+}
+
+opcode* getOpcode(char* token) {
+ int i = hash(token);
+ for (int j = 0; j < TABLESIZE; j++) {
+ int try = (i + j) % TABLESIZE; //for loop and this line is part of the collision handling logic
+ if (hashtable[try] == NULL) { //automatically return false if there is nothing there
+ return NULL;
+ //this indicates the instruction is invalid
+ }
+ if (strncmp(hashtable[try]->instr, token, TABLESIZE) == 0) {
+ //int op = hashtable[try]->opcode;
+ return hashtable[try]; //returns the opcode
+ }
+ }
+
+ return 0;
+}
+
+int getAddress(char* token)
+{
+ int i = hash(token);
+ //printf("\nhash of %s is %d", token, hash(token));
+ int try = 0;
+ for (int j = 0; j < TABLESIZE; j++) {
+ try = ((i + j) % TABLESIZE); //for loop and this line is part of the collision handling logic
+ //printf("\n%d\n", try);
+ if (symbolTable[try] == NULL) { //automatically return false if there is nothing there
+ //printf("\ntried %d", try);
+ return 99999;
+ //this indicates the instruction is invalid
+ }
+
+ if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) {
+ //int addr = symbolTable[try]->address;
+ return symbolTable[try]->address; //returns the address
+ }
+ }
+ return 99;
+
+ /*//BRUTE FORCE
+ for(int try = 0; try < (TABLESIZE - 1); try++){
+ if (strncmp(symbolTable[try]->name, token, TABLESIZE) == 0) {
+ int addr = symbolTable[try]->address;
+ //printf("\nget %d", addr);
+ return addr; //returns the address
+ }
+ }
+ //still doesnt work..........................................................*/
+ return 9999;
+}
+
+//this function is needed because the integer after START is represented in hex and needs to be converted to decimal for the addressCounter int.
+//the addressCounter int then gets displayed as hex at the end with %X
+int hex2dec(int num){
+ int dec = 0;
+ int remainder, ct = 0;
+ while (num > 0){
+ remainder = num % 10;
+ dec += (remainder * power(16, ct));
+ num /= 10;
+ ct++;
+ }
+ return dec;
+}
+
+//I get an undefined reference to pow despite using math.h so I am defining pow here as power
+int power(int x, int y){
+ int z = x;
+ for(int i=1; i < y; i++){
+ z *= x;
+ }
+ return z;
+}
+
+
+
+int main(int argc, char* argv[]) { //argc = # of arguments. argv[1] is the assembly file to be opened
+
+ //clear the memory for the hash table as soon as the program starts
+ for (int i = 0; i < TABLESIZE; i++) {
+ hashtable[i] = NULL;
+ symbolTable[i] = NULL;
+ }
+
+ //check for incorrect command usage
+ if (argc != 2) {
+ printf("USAGE: %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ FILE* inputFile;
+ FILE* outputFile;
+ inputFile = fopen(argv[1], "r"); //"r" parameter is for read-only
+
+
+ outputFile = fopen("pass1.txt", "w");
+ //check if file is valid
+ if (!inputFile) {
+ printf("ASSEMBLY ERROR:\n\n%s could not be opened for reading.", argv[1]);
+ return 1;
+
+ }
+
+ if(true){ //this exists so I can easily collapse this section in the editor
+ //==================================================================================
+ //TABLE OF OPCODES (appropriately hardcoded)
+ //block selection mode and find and replace are my best friends
+ //I tried to have a separate function for this, but it did not work out. It must be in main()
+
+ //Directives
+ opcode START = {.instr="START", .type=1};
+ opcode END = {.instr="END", .type=1};
+ opcode BYTE = {.instr="BYTE", .type=1};
+ opcode WORD = {.instr="WORD", .type=1};
+ opcode RESB = {.instr="RESB", .type=1};
+ opcode RESW = {.instr="RESW", .type=1};
+ opcode RESR = {.instr="RESR", .type=1};
+ opcode EXPORTS = {.instr="EXPORTS", .type=1};
+ //insert into hash table
+ insert(&START);
+ insert(&END);
+ insert(&BYTE);
+ insert(&WORD);
+ insert(&RESB);
+ insert(&RESW);
+ insert(&RESR);
+ insert(&EXPORTS);
+
+
+ //Opcodes
+ opcode ADD = {.instr="ADD", .opcode=0x18, .type=2};
+ opcode ADDF = {.instr="ADDF", .opcode=0x58, .type=2};
+ opcode ADDR = {.instr="ADDR", .opcode=0x90, .type=2};
+ opcode AND = {.instr="AND", .opcode=0x40, .type=2};
+ opcode CLEAR = {.instr="CLEAR", .opcode=0xB4, .type=2};
+ opcode COMP = {.instr="COMP", .opcode=0x28, .type=2};
+ opcode COMPF = {.instr="COMPF", .opcode=0x88, .type=2};
+ opcode COMPR = {.instr="COMPR", .opcode=0xA0, .type=2};
+ opcode DIV = {.instr="DIV", .opcode=0x24, .type=2};
+ opcode DIVF = {.instr="DIVF", .opcode=0x64, .type=2};
+ opcode DIVR = {.instr="DIVR", .opcode=0x9C, .type=2};
+ opcode FIX = {.instr="FIX", .opcode=0xC4, .type=2};
+ opcode FLOAT = {.instr="FLOAT", .opcode=0xC0, .type=2};
+ opcode HIO = {.instr="HIO", .opcode=0xF4, .type=2};
+ opcode J = {.instr="J", .opcode=0x3C, .type=2};
+ opcode JEQ = {.instr="JEQ", .opcode=0x30, .type=2};
+ opcode JGT = {.instr="JGT", .opcode=0x34, .type=2};
+ opcode JLT = {.instr="JLT", .opcode=0x38, .type=2};
+ opcode JSUB = {.instr="JSUB", .opcode=0x48, .type=2};
+ opcode LDA = {.instr="LDA", .opcode=0x00, .type=2};
+ opcode LDB = {.instr="LDB", .opcode=0x68, .type=2};
+ opcode LDCH = {.instr="LDCH", .opcode=0x50, .type=2};
+ opcode LDF = {.instr="LDF", .opcode=0x70, .type=2};
+ opcode LDL = {.instr="LDL", .opcode=0x08, .type=2};
+ opcode LDS = {.instr="LDS", .opcode=0x6C, .type=2};
+ opcode LDT = {.instr="LDT", .opcode=0x74, .type=2};
+ opcode LDX = {.instr="LDX", .opcode=0x04, .type=2};
+ opcode LPS = {.instr="LPS", .opcode=0xD0, .type=2};
+ opcode MUL = {.instr="MUL", .opcode=0x20, .type=2};
+ opcode MULF = {.instr="MULF", .opcode=0x60, .type=2};
+ opcode MULR = {.instr="MULR", .opcode=0x98, .type=2};
+ opcode NORM = {.instr="NORM", .opcode=0xC8, .type=2};
+ opcode OR = {.instr="OR", .opcode=0x44, .type=2};
+ opcode RD = {.instr="RD", .opcode=0xD8, .type=2};
+ opcode RMO = {.instr="RMO", .opcode=0xAC, .type=2};
+ opcode RSUB = {.instr="RSUB", .opcode=0x4C, .type=2};
+ opcode SHIFTL= {.instr="SHIFTL", .opcode=0xA4, .type=2};
+ opcode SHIFTR= {.instr="SHIFTR", .opcode=0xA8, .type=2};
+ opcode SIO = {.instr="SIO", .opcode=0xF0, .type=2};
+ opcode SSK = {.instr="SSK", .opcode=0xEC, .type=2};
+ opcode STA = {.instr="STA", .opcode=0x0C, .type=2};
+ opcode STB = {.instr="STB", .opcode=0x78, .type=2};
+ opcode STCH = {.instr="STCH", .opcode=0x54, .type=2};
+ opcode STF = {.instr="STF", .opcode=0x80, .type=2};
+ opcode STI = {.instr="STI", .opcode=0xD4, .type=2};
+ opcode STL = {.instr="STL", .opcode=0x14, .type=2};
+ opcode STS = {.instr="STS", .opcode=0x7C, .type=2};
+ opcode STSW = {.instr="STSW", .opcode=0xE8, .type=2};
+ opcode STT = {.instr="STT", .opcode=0x84, .type=2};
+ opcode STX = {.instr="STX", .opcode=0x10, .type=2};
+ opcode SUB = {.instr="SUB", .opcode=0x1C, .type=2};
+ opcode SUBF = {.instr="SUBF", .opcode=0x5C, .type=2};
+ opcode SUBR = {.instr="SUBR", .opcode=0x94, .type=2};
+ opcode SVC = {.instr="SVC", .opcode=0xB0, .type=2};
+ opcode TD = {.instr="TD", .opcode=0xE0, .type=2};
+ opcode TIO = {.instr="TIO", .opcode=0xF8, .type=2};
+ opcode TIX = {.instr="TIX", .opcode=0x2C, .type=2};
+ opcode TIXR = {.instr="TIXR", .opcode=0xB8, .type=2};
+ opcode WD = {.instr="WD", .opcode=0xDC, .type=2};
+ //insert into hash table
+ insert(&ADD);
+ insert(&ADDF);
+ insert(&ADDR);
+ insert(&AND);
+ insert(&CLEAR);
+ insert(&COMP);
+ insert(&COMPF);
+ insert(&COMPR);
+ insert(&DIV);
+ insert(&DIVF);
+ insert(&DIVR);
+ insert(&FIX);
+ insert(&FLOAT);
+ insert(&HIO);
+ insert(&J);
+ insert(&JEQ);
+ insert(&JGT);
+ insert(&JLT);
+ insert(&JSUB);
+ insert(&LDA);
+ insert(&LDB);
+ insert(&LDCH);
+ insert(&LDF);
+ insert(&LDL);
+ insert(&LDS);
+ insert(&LDT);
+ insert(&LDX);
+ insert(&LPS);
+ insert(&MUL);
+ insert(&MULF);
+ insert(&MULR);
+ insert(&NORM);
+ insert(&OR);
+ insert(&RD);
+ insert(&RMO);
+ insert(&RSUB);
+ insert(&SHIFTL);
+ insert(&SHIFTR);
+ insert(&SIO);
+ insert(&SSK);
+ insert(&STA);
+ insert(&STB);
+ insert(&STCH);
+ insert(&STF);
+ insert(&STI);
+ insert(&STL);
+ insert(&STS);
+ insert(&STSW);
+ insert(&STT);
+ insert(&STX);
+ insert(&SUB);
+ insert(&SUBF);
+ insert(&SUBR);
+ insert(&SVC);
+ insert(&TD);
+ insert(&TIO);
+ insert(&TIX);
+ insert(&TIXR);
+ insert(&WD);
+
+ //Registers
+ opcode A = {.instr="A", .opcode=0, .type=4};
+ opcode X = {.instr="X", .opcode=0, .type=4};
+ opcode L = {.instr="L", .opcode=0, .type=4};
+ opcode PC = {.instr="PC", .opcode=0, .type=4};
+ opcode SW = {.instr="SW", .opcode=0, .type=4};
+ //insert into hash table
+ insert(&A);
+ insert(&X);
+ insert(&L);
+ insert(&PC);
+ insert(&SW);
+ }//==================================================================================
+
+ //symbol array for temporarily storing symbols for insertion into the hash table
+ //symbol* symtable[TABLESIZE];
+
+ int lineCounter = 1;
+ int bytesUsed[TABLESIZE]; //keeping track of the space used per instruction/line. Useful for pass 2
+
+ //ADDRESS COUNTER
+ //Starts at 0, will be stored in decimal, then converted to hex
+ int addressCounter = 0;
+ int addressCounterArray[TABLESIZE]; //useful for pass 2
+ //keep track of whether a start token has been encountered already
+ bool start = false;
+ //Token array for storing tokens
+ //also can serve to help verify that opcodes are followed by operands instead of other opcodes or directives
+ int n=0;
+ char* tokenA[4];
+
+ //start reading the file
+ char line[1024];
+ while (fgets(line, 1024, inputFile)) {
+ //printf("READ: %s", line);
+ int length;
+ length = strlen(line);
+ if (length > 0) { //not a blank line
+ if (line[0] == 35) { //#
+ //printf("--was a comment\n");
+ }
+ else if(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab
+ //printf("--symbol definition: %s", line);
+ char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters
+ while (token) {
+ tokenA[n] = token;
+ //printf("\t %d---->%s\n", n, tokenA[n]);
+ token = strtok(NULL, " \t\n");
+
+ n++;
+
+
+ /* //check if the token is valid
+ if (getType(token) == 0) {
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token: %s", token);
+ printf("Aborting...");
+ printf("ASSEMBLY ABORTED DUE TO INVALID TOKEN");
+ return 1;
+ }
+ if (strncmp(token, "END", TABLESIZE) == 0) { //EXITING ON END DIRECTIVE WILL CAUSE 2 TESTS TO FAIL
+ printf("END token found...");
+ //return 0;
+ }*/
+
+ }
+
+ //BEGIN ANALYSING THE LINE AND APPLYING THE LOGIC
+ if (getType(tokenA[0]) == DIRECTIVE) {
+ if (getType(tokenA[1]) == DIRECTIVE) {
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter);
+ return 1;
+ }
+ if (strncmp(tokenA[0], "START", TABLESIZE) == 0){
+ if(start){
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter);
+ return 1;
+ }
+ start = true;
+ int startnum = atoi(tokenA[1]); //if casting fails here then I let the assembly fail
+ if (addressCounter == 0) {addressCounter = hex2dec(startnum);}
+ else{
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter);
+ }
+ }
+ if (strncmp(tokenA[0], "BYTE", TABLESIZE) == 0){
+ //do whatever needs to be done
+ if (tokenA[1][0] == 'X'){
+ int bytenum = (strlen(tokenA[1]) - 3); //length of string minus the X and quotes
+ addressCounter += (bytenum / 2);
+ bytesUsed[lineCounter] = bytenum / 2;
+ }
+ else if (tokenA[1][0] == 'C'){
+ addressCounter += (strlen(tokenA[1]) - 3); //length of string minus the C and quotes
+ bytesUsed[lineCounter] = (strlen(tokenA[1]) - 3);
+ }
+ else {
+ printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter);
+ addressCounter += strlen(tokenA[1]); bytesUsed[lineCounter] = strlen(tokenA[1]);}
+ }
+ if (strncmp(tokenA[0], "WORD", TABLESIZE) == 0){
+ //do whatever needs to be done
+ addressCounter += 3;
+ bytesUsed[lineCounter] = 3;
+ }
+ if (strncmp(tokenA[0], "RESB", TABLESIZE) == 0){
+ //do whatever needs to be done
+ addressCounter += atoi(tokenA[1]);
+ bytesUsed[lineCounter] = atoi(tokenA[1]);
+ }
+ if (strncmp(tokenA[0], "RESW", TABLESIZE) == 0){
+ //do whatever needs to be done
+ addressCounter += (3 * atoi(tokenA[1]));
+ bytesUsed[lineCounter] = (3 * atoi(tokenA[1]));
+ }
+ else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
+ }
+ else if (getType(tokenA[0]) == OPCODE) {
+ if (getType(tokenA[1]) == OPCODE) {
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+ return 1;
+ }
+ if (getType(tokenA[1]) == DIRECTIVE) {
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+ return 1;
+ }
+ else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
+ }
+ //else if (strncmp(tokenA[0], "RSUB", TABLESIZE) == 0) addressCounter += 3; //RSUB wasn't being detected as opcode for some reason even though getType returns 2, so I hardcoded this... <- should be fixed, but the hardcode remains just in case
+ else if (tokenA[0] == NULL) return 2;
+
+
+ //SYMBOL
+ else { //this means the first token is a symbol
+ //printf("Symbol encountered!\n");
+ if (strlen(tokenA[0]) > 6){
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols cannot be longer than six characters! \n", line, lineCounter);
+ return 1;
+ }
+ if ((tokenA[0][0] <= 65) || (tokenA[0][0] > 91)){ //im hoping that tokenA[0][0] means the first character of the first token
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Symbols must start with an alpha character! \n ", line, lineCounter);
+ return 1;
+ }
+
+ //record the symbol name and the current value of addressCounter (as hex) into new file
+ //if token is START, update address counter before recording
+
+ if (strncmp(tokenA[1], "START", TABLESIZE) == 0){
+ if(start){
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d duplicate START token encountered!\n", line, lineCounter);
+ return 1;
+ }
+ start = true;
+ int startnum = atoi(tokenA[2]); //if casting fails here then I let the assembly fail
+ if (addressCounter == 0) {addressCounter = hex2dec(startnum);}
+ else{
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d You can't have the START directive in the middle of the file!\n", line, lineCounter);
+ return 1;
+ }
+ }
+
+ //printf("Recording token %s and address %X and writing to output file...\n\n", tokenA[0], addressCounter);
+ fprintf(outputFile, "%s %X\n", tokenA[0], addressCounter);
+
+
+ //insertSymbol(tokenA[0], addressCounter);
+ symbol inserttt = {.name=tokenA[0], .address=addressCounter};
+
+
+ insertSymbol(&inserttt);
+
+
+
+ //copy directive and opcode logic here, add 1 to all array positions
+ if (getType(tokenA[1]) == DIRECTIVE) {
+ if (getType(tokenA[2]) == DIRECTIVE) {
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Duplicate directive detected! Did you define a symbol with a name that matches an assembler directive?\n ", line, lineCounter);
+ return 1;
+ }
+
+ if (strncmp(tokenA[1], "BYTE", TABLESIZE) == 0){
+ //do whatever needs to be done
+ if (tokenA[2][0] == 'X'){
+ //printf("\nByte operand recognized.");
+ int bytenum = (strlen(tokenA[2]) - 3); //length of string minus the X and quotes
+ //printf("\nIncreasing counter by %d\n", (bytenum / 2));
+ addressCounter += (bytenum / 2);
+ bytesUsed[lineCounter] = (bytenum / 2);
+ }
+ else if (tokenA[2][0] == 'C'){
+
+ //printf("\nCharacter operand recognized.\nIncreasing counter by %lu\n", strlen(tokenA[2] - 3));
+ addressCounter += (strlen(tokenA[2]) - 3); //length of string minus the C and quotes
+ //printf("addressCounter is %X at this point\n", addressCounter);
+ bytesUsed[lineCounter] = (strlen(tokenA[2]) - 3);
+ }
+ else {printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Must specify X or C after byte directive", line, lineCounter); addressCounter += strlen(tokenA[2]); bytesUsed[lineCounter] = strlen(tokenA[2]);}
+ }
+ if (strncmp(tokenA[1], "WORD", TABLESIZE) == 0){
+ //do whatever needs to be done
+ addressCounter += 3;
+ bytesUsed[lineCounter] = 3;
+ }
+ if (strncmp(tokenA[1], "RESB", TABLESIZE) == 0){
+ //do whatever needs to be done
+ addressCounter += atoi(tokenA[2]);
+ bytesUsed[lineCounter] = atoi(tokenA[2]);
+ }
+ if (strncmp(tokenA[1], "RESW", TABLESIZE) == 0){
+ //do whatever needs to be done
+ addressCounter += (3 * atoi(tokenA[2]));
+ bytesUsed[lineCounter] = (3 * atoi(tokenA[2]));
+ }
+ else {
+ if(strncmp(tokenA[1], "START", TABLESIZE) != 0) {addressCounter += 0; bytesUsed[lineCounter] = 0;}
+ //addressCounter += 3;
+ }//increase by 3 only if token is not START because that was taken care of earlier
+ }
+ else if (getType(tokenA[1]) == OPCODE) {
+ if (getType(tokenA[2]) == OPCODE) {
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+ return 1;
+ }
+ if (getType(tokenA[2]) == DIRECTIVE) {
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Opcode not followed by valid operand! \n", line, lineCounter);
+ return 1;
+ }
+ else {addressCounter += 3; bytesUsed[lineCounter] = 3;}
+ }
+ else if (tokenA[1] == NULL){
+ //printf("tokenA[1] is null");
+ //do nothing
+ }
+ else{
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid token or duplicate symbol! ", line, lineCounter);
+ //printf("Developers note: tokenA[0] = %sToken[1] = %sToken[2] = %s",tokenA[0], tokenA[1],tokenA[2]);
+ return 1;
+ }
+
+ //printf("addressCounter is %X at this point #2\n", addressCounter);
+
+ }
+
+ n = 0; //reset token array counter
+ addressCounterArray[lineCounter] = addressCounter;
+ lineCounter++;
+
+ //causes seg fault
+ tokenA[0] = "empty";
+ tokenA[1] = "empty";
+ tokenA[2] = "empty";
+ tokenA[3] = "empty";
+
+
+ }
+ else{
+ printf("ASSEMBLY ERROR:\n\n%s\n\nLine %d Invalid line!", line, lineCounter);
+ return 1;
+ }
+
+
+ }
+
+
+
+
+ }
+ fclose(outputFile);
+ fclose(inputFile);
+
+ /* ==========================================================================
+ * =================================PASS 2===================================
+ * ==========================================================================
+ */
+
+
+ FILE* objectFile;
+ char pass2file[0x100];
+ snprintf(pass2file, sizeof(pass2file), "%s.obj", argv[1]);
+ objectFile = fopen(pass2file, "w");
+
+
+ FILE* pass2;
+ pass2 = fopen(argv[1], "r");
+
+ char* tokenB[4] = {"empty2", "empty2", "empty2", "empty2"};
+ n = 0;
+ int lastLine = lineCounter; //record the number of lines in the file
+ lineCounter = 1;
+ //start reading the file
+ //char line2[1024];
+ while (fgets(line, 1024, pass2)) {
+
+ int length;
+ length = strlen(line);
+ if (length > 0) { //not a blank line
+ if (line[0] == 35) { //#
+ //do nothing
+ }
+ else if(true){ //(((line[0] >= 65) && (line[0] <= 90)) || (line[0] == 9)) { //this is A-Z or a tab
+
+ char* token = strtok(line, " \t\n"); //tokenize the line. Tokens are separated by spaces or tabs or newline characters
+ while (token) {
+ //char* tokenB;
+ tokenB[n] = token;
+ //printf("\t %d---->%s\n", n, tokenB[n]);
+ token = strtok(NULL, " \t\n");
+
+ n++;
+
+ }
+
+ //printf("\n %s %s %s\n", tokenB[0], tokenB[1], tokenB[2]);
+
+ if(lineCounter == 1){ //print the header record
+ fprintf(objectFile, "H"); //file must begin with header record
+ int tokenlen = strlen(tokenB[0]);
+ if (tokenlen > 6){
+ printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Symbol name too long!", line, lineCounter);
+ return 2;
+ }
+ fprintf(objectFile, "%s", tokenB[0]);
+ for(int l = 0; l < (6 - tokenlen); l++){
+ fprintf(objectFile, " ");
+ }
+ fprintf(objectFile, "00"); //printing 2 zeros before starting address
+ fprintf(objectFile, "%X%X\n", addressCounterArray[1], (addressCounterArray[(lastLine-1)] - addressCounterArray[1]));
+ //printf("\nlast line is %d\n", lastLine);
+ }
+ else if(lineCounter == lastLine-1){
+ //print the E, the leading zeros, and the first memory address
+ fprintf(objectFile, "E00%X", addressCounterArray[1]);
+
+ }
+ else if ((getType(tokenB[0]) == OPCODE) || ((getType(tokenB[0]) == DIRECTIVE) && (strncmp(tokenB[0], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[0], "RESW", TABLESIZE) != 0))){
+ //if RESB or RESW is encountered, do nothing
+ fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s)
+ fprintf(objectFile, "%X", addressCounterArray[lineCounter]);
+ fprintf(objectFile, "%02X", bytesUsed[lineCounter]);
+ if(strncmp(tokenB[0], "RSUB", TABLESIZE) == 0){
+ //no address recorded for RSUB
+ fprintf(objectFile, "%X0000\n", getOpcode(tokenB[0])->opcode);
+ }
+ else if (getType(tokenB[0]) == DIRECTIVE){
+
+ if (strncmp(tokenB[0], "BYTE", TABLESIZE) == 0){
+ //do whatever needs to be done
+ if (tokenB[1][0] == 'X'){
+ bool notEnd = true;
+ for(int p = 2; notEnd; p++){
+ if(tokenB[1][p] == '\''){notEnd = false;}
+ if(tokenB[1][p] == '\''){break;}
+ //I know this is an amateur move but I'm doing it anyway because I don't have time to mess around with regular expression formatting in c
+ if((tokenB[1][p] != '1') || (tokenB[1][p] != '2') || (tokenB[1][p] != '3') || (tokenB[1][p] != '4') || (tokenB[1][p] != '5') || (tokenB[1][p] != '6') || (tokenB[1][p] != '7') || (tokenB[1][p] != '8') || (tokenB[1][p] != '9') || (tokenB[1][p] != 'A') || (tokenB[1][p] != 'B') || (tokenB[1][p] != 'C') || (tokenB[1][p] != 'D') || (tokenB[1][p] != 'E') || (tokenB[1][p] != 'F')){
+ printf("ASSEMBLY ERROR\n\n%s\n\nLine %d Invalid byte constant!", line, lineCounter);
+ }
+ fprintf(objectFile, "%c", tokenB[1][p]);
+ }
+ }
+ else if (tokenB[1][0] == 'C'){
+ bool notEnd = true;
+ for(int p = 2; notEnd; p++){
+ if(tokenB[1][p] == '\''){notEnd = false;}
+ fprintf(objectFile, "%d", tokenB[1][p]);
+ }
+ }
+ else {printf("possible error");}
+ }
+ if (strncmp(tokenB[0], "WORD", TABLESIZE) == 0){
+ //print 6-strlen zeros, then print the hex version of the int
+ for(int a = 0; a < (6 - strlen(tokenB[0])); a++){
+ fprintf(objectFile, "0");
+ }
+ fprintf(objectFile, "%X", atoi(tokenB[0]));
+ }
+
+ fprintf(objectFile, "\n"); //print a newline after all is done
+ }
+ else{
+ fprintf(objectFile, "%X%X\n", getOpcode(tokenB[0])->opcode, getAddress(tokenB[1]));
+
+ }
+ }
+
+ else{ //this means the first token is a symbol
+ //copy the above opcode and directive logic here, add 1 to all tokenB positions
+
+ if ((getType(tokenB[1]) == OPCODE) || ((getType(tokenB[1]) == DIRECTIVE) && (strncmp(tokenB[1], "RESB", TABLESIZE) != 0) && (strncmp(tokenB[1], "RESW", TABLESIZE) != 0))){
+ //if RESB or RESW is encountered, do nothing
+ fprintf(objectFile, "T00"); //print start of Text record (& 2 leading 0s)
+ fprintf(objectFile, "%X", addressCounterArray[lineCounter]);
+ fprintf(objectFile, "%02X", bytesUsed[lineCounter]);
+ if(strncmp(tokenB[1], "RSUB", TABLESIZE) == 0){
+ //no address recorded for RSUB
+ fprintf(objectFile, "%X0000\n", getOpcode(tokenB[1])->opcode);
+ }
+ else if (getType(tokenB[1]) == DIRECTIVE){
+
+ if (strncmp(tokenB[1], "BYTE", TABLESIZE) == 0){
+ //do whatever needs to be done
+ if (tokenB[2][0] == 'X'){
+ bool notEnd = true;
+ for(int p = 2; notEnd; p++){
+ if(tokenB[2][p] == '\''){notEnd = false;}
+ if(tokenB[2][p] == '\''){break;}
+ fprintf(objectFile, "%c", tokenB[2][p]);
+ }
+ }
+ else if (tokenB[2][0] == 'C'){
+ bool notEnd = true;
+ for(int p = 2; notEnd; p++){
+ if(tokenB[2][p] == '\''){notEnd = false;}
+ fprintf(objectFile, "%d", tokenB[1][p]);
+ }
+ }
+ else {printf("possible error");}
+ }
+ if (strncmp(tokenB[1], "WORD", TABLESIZE) == 0){
+ //print 6-strlen zeros, then print the hex version of the int
+ for(int a = 0; a < (6 - strlen(tokenB[1])); a++){
+ fprintf(objectFile, "0");
+ }
+ fprintf(objectFile, "%X", atoi(tokenB[1]));
+ }
+
+ fprintf(objectFile, "\n"); //print a newline after all is done
+ }
+ else{
+ fprintf(objectFile, "%X%X\n", getOpcode(tokenB[1])->opcode, getAddress(tokenB[2]));
+ //printf("\n%d\n", getAddress(tokenB[2]));
+ }
+ }
+
+ }
+
+ lineCounter++;
+ //n = 0; //reset token array counter
+ }
+
+ }
+
+ //lineCounter++;
+ n = 0; //reset token array counter
+ }
+
+
+
+
+
+
+ fclose(objectFile);
+ fclose(pass2);
+
+ //for(int test = 0; test < 200; test++){printf("\n%d", symbolTable[test]->address);}
+
+ return 0;
+}