From f1f5bf70a9392974fd13cf7ed32c05a011a46e54 Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Fri, 21 Feb 2020 09:55:18 +0530 Subject: [PATCH 1/8] First follow implementation started --- Code/driver.c | 7 ++- Code/parser.c | 157 ++++++++++++++++++++++++++++++++++++++++++++--- Code/parser.h | 9 +++ Code/parserDef.h | 7 ++- 4 files changed, 169 insertions(+), 11 deletions(-) diff --git a/Code/driver.c b/Code/driver.c index e9785be..bee8764 100644 --- a/Code/driver.c +++ b/Code/driver.c @@ -56,8 +56,11 @@ int main() } grammar_fill(fptr); - print_grammar(); - + // print_grammar(); + // get_nullable_set(); + populate_first_sets(); + + print_first_sets(); } // end of main diff --git a/Code/parser.c b/Code/parser.c index eaa1334..b269451 100644 --- a/Code/parser.c +++ b/Code/parser.c @@ -67,18 +67,28 @@ void parser_init() strcpy(non_terminal_string[i], non_terminal_string_copy[i]); } + + //initialize all first sets to be null + for(int i=0; iflag; + if(flag == NT) + { + sym = (int)( (grammar[i].head)->s ).nt; + continue; + } + else + { + sym = (int)( (grammar[i].head)->s ).t; + if( sym == EPSILON ) + { + set |= (mask << grammar[i].sym); + // printf("%d", sym); + // printf("%s\n", non_terminal_string[grammar[i].sym]); + } + } + } + return set; +} + +// ull + +void print_first_sets() +{ + for( int i = 0; i < NUM_OF_NONTERMINALS; i++) + { + printf("FIRST(%s) = { " , non_terminal_string[i] ); + for(int j = 0; j< BITSTRING_PART_NUM ; j++) + { + for(int k = 0; k < sz(ull); k++) + { + if(first_set[i][j] & (1 << k) != 0) + { + printf("%s, ", terminal_string[j*sz(ull) + k]); + } + } + } + printf(" }\n"); + } +} + +ull* firstOf(nonterminal nt) +{ + return first_set[nt]; +} + +bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]) +{ + for(int i=0; i a cannot be superset of b + { + return false; + } + } + } + return true; +} + +void populate_first_sets() +{ + // grammar[i].head + bool is_changed = true; + int lhs; + rhsnode_ptr rhs_ptr; + while(is_changed == true) + { + is_changed = false; + for(int i=0; i flag == T) + { + token_name t = (rhs_ptr -> s).t; + if( ( first_set[lhs][t / sz(ull)] & ( 1 << (t % sz(ull)) ) ) == 0) //check if terminal already there in the first set + { + // printf("Adding term %s to first(%s)\n", terminal_string[t], non_terminal_string[lhs]); + first_set[lhs][t / sz(ull)] |= ( 1 << t % sz(ull) ); + is_changed = true; + } + } + else + { + rhsnode_ptr temp = rhs_ptr; + ull* first_rhs = firstOf( (temp -> s).nt ); + while(temp != NULL) + { + //first_set[lhs]|=first_set + if(is_superset(first_set[lhs], first_rhs) == false) + { + is_changed = true; + // printf("Adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); + for(int j = 0; j < BITSTRING_PART_NUM; j++) + { + first_set[lhs][j] |= first_rhs[j]; + } + } + if( first_rhs[ EPSILON / sz(ull) ] & ( 1 << (EPSILON % sz(ull)) ) == 0)//firstOf( this nt ) does nto contain epsilon + { + printf("first(%s) does not contain epsilon yet\n", non_terminal_string[ (temp->s).nt ]); + break; + } + else + { + // check if the current nt on rhs is last nt of the rule, if no, remove epsilon from first_set(lhs) + if(temp->next != NULL) //isn't last node + { + first_set[lhs][ EPSILON / sz(ull) ] &= (~ ( 1 << (EPSILON % sz(ull)) ) ); + // printf("Removing term %s from first(%s)\n", terminal_string[ EPSILON ], non_terminal_string[lhs]); + } + } + temp = temp -> next; + } // end of rule linked list traversal while loop + } // end of else (non-terminal branch) + } // end of for - grammar traversal + } // end of while - infinite loop until convergence +} // end of function + diff --git a/Code/parser.h b/Code/parser.h index df7192f..c036b91 100644 --- a/Code/parser.h +++ b/Code/parser.h @@ -13,4 +13,13 @@ void print_grammar(); void parser_init(); +void populate_first_sets(); + +unsigned long long int get_nullable_set(); + +void print_first_sets(); + +ull* firstOf(nonterminal nt); + +bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]); #endif \ No newline at end of file diff --git a/Code/parserDef.h b/Code/parserDef.h index 595b6d7..11af84f 100644 --- a/Code/parserDef.h +++ b/Code/parserDef.h @@ -5,6 +5,10 @@ #define MAX_LENGTH 100 #define NUM_OF_RULES 99 #define NUM_OF_NONTERMINALS 53 +#define BITSTRING_PART_NUM 3 +#define sz(x) sizeof(x) +typedef unsigned long long int ull; + typedef enum { MAINPROGRAM, @@ -89,6 +93,7 @@ typedef struct rhsnode_ptr tail; } cell; +unsigned long long int first_set[NUM_OF_NONTERMINALS][BITSTRING_PART_NUM]; //f_set for each nt calculated by considering bits of 3 unsigned long long int // typedef cell *grammar_t; cell grammar[NUM_OF_RULES]; -#endif \ No newline at end of file +#endif From ad702d438b3155f7c3e1dd9902927f1e3b97ae7f Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Fri, 21 Feb 2020 13:34:55 +0530 Subject: [PATCH 2/8] Convergence achieved, results are wrong though --- Code/parser.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/Code/parser.c b/Code/parser.c index b269451..4cf1d50 100644 --- a/Code/parser.c +++ b/Code/parser.c @@ -298,12 +298,16 @@ ull* firstOf(nonterminal nt) bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]) { + // for(int i=0; i a cannot be superset of b + if( ((a[i] & (1 << j ) ) == 0) && ( (b[i] & (1 << j )) != 0 ) ) //jth bit of a[i] is not set but of b[i] is set => a cannot be superset of b { + // printf("diff is in %s.\n", terminal_string[i*sz(ull) + j]); + // printf("Differing in : %d, %d\n", a[i], b[i]); return false; } } @@ -319,6 +323,7 @@ void populate_first_sets() rhsnode_ptr rhs_ptr; while(is_changed == true) { + printf("Iterating over grammar.\n"); is_changed = false; for(int i=0; i s).t; if( ( first_set[lhs][t / sz(ull)] & ( 1 << (t % sz(ull)) ) ) == 0) //check if terminal already there in the first set { - // printf("Adding term %s to first(%s)\n", terminal_string[t], non_terminal_string[lhs]); - first_set[lhs][t / sz(ull)] |= ( 1 << t % sz(ull) ); + printf("Adding term %s to first(%s) - \n", terminal_string[t], non_terminal_string[lhs]); + // printf("B4 adding : %d\n", first_set[lhs][t / sz(ull)] & ( 1 << t % sz(ull) )); + first_set[lhs][t / sz(ull)] |= ( 1 << (t % sz(ull)) ); + // printf("After adding: %d\n", first_set[lhs][t / sz(ull)] & ( 1 << t % sz(ull) )); + // printf("sizeof(llu) : %lu\n", sz(ull)); + // printf("EPSILON : = %d\n", EPSILON); is_changed = true; + // break; } } else @@ -344,26 +354,31 @@ void populate_first_sets() if(is_superset(first_set[lhs], first_rhs) == false) { is_changed = true; - // printf("Adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); + printf("Adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); for(int j = 0; j < BITSTRING_PART_NUM; j++) { first_set[lhs][j] |= first_rhs[j]; } + // printf("now is_superset = %d\n", is_superset(first_set[lhs], first_rhs)); } - if( first_rhs[ EPSILON / sz(ull) ] & ( 1 << (EPSILON % sz(ull)) ) == 0)//firstOf( this nt ) does nto contain epsilon + else { - printf("first(%s) does not contain epsilon yet\n", non_terminal_string[ (temp->s).nt ]); - break; + // printf("not adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); } - else + if( first_rhs[ EPSILON / sz(ull) ] & ( 1 << (EPSILON % sz(ull)) ) == 0)//firstOf( this nt ) does not contain epsilon { - // check if the current nt on rhs is last nt of the rule, if no, remove epsilon from first_set(lhs) - if(temp->next != NULL) //isn't last node - { - first_set[lhs][ EPSILON / sz(ull) ] &= (~ ( 1 << (EPSILON % sz(ull)) ) ); - // printf("Removing term %s from first(%s)\n", terminal_string[ EPSILON ], non_terminal_string[lhs]); - } + // printf("first(%s) does not contain epsilon yet\n", non_terminal_string[ (temp->s).nt ]); + break; } + // else + // { + // // check if the current nt on rhs is last nt of the rule, if no, remove epsilon from first_set(lhs) + // if(temp->next != NULL) //isn't last node + // { + // first_set[lhs][ EPSILON / sz(ull) ] &= (~ ( 1 << (EPSILON % sz(ull)) ) ); + // printf("Removing term %s from first(%s)\n", terminal_string[ EPSILON ], non_terminal_string[lhs]); + // } + // } temp = temp -> next; } // end of rule linked list traversal while loop } // end of else (non-terminal branch) From a5ed1cb369180e4ede94b1e392914b5c212ea927 Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Fri, 21 Feb 2020 15:03:33 +0530 Subject: [PATCH 3/8] sz(ull) resolved --- Code/.vscode/settings.json | 5 +++ Code/exe | Bin 0 -> 27256 bytes Code/f1.txt | 55 ++++++++++++++++++++++++++ Code/f2.txt | 55 ++++++++++++++++++++++++++ Code/parser.c | 77 ++++++++++++++++++++++--------------- Code/parserDef.h | 1 + Header Files/lexer.h | 47 ++++++++++++++++++++++ grammar.txt | 60 +++++++++++++++++++++++++++++ 8 files changed, 268 insertions(+), 32 deletions(-) create mode 100644 Code/.vscode/settings.json create mode 100755 Code/exe create mode 100644 Code/f1.txt create mode 100644 Code/f2.txt create mode 100644 Header Files/lexer.h create mode 100644 grammar.txt diff --git a/Code/.vscode/settings.json b/Code/.vscode/settings.json new file mode 100644 index 0000000..0f06797 --- /dev/null +++ b/Code/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "stdio.h": "c" + } +} \ No newline at end of file diff --git a/Code/exe b/Code/exe new file mode 100755 index 0000000000000000000000000000000000000000..d2c0cf948c7200da5c778fd538ca2e6b0aec5190 GIT binary patch literal 27256 zcmeHwdwf$x`uC*mfr7Oy3W|yn1e6LCP=qR=4ecq3_GX(RE{mb`LMy#(njomKtO1u8 z!m_*S;$>I%r~4CK{ds*6UBx0u(4VXIURQjrE^1=yT5nht$?yBjB|SMLh4=Hm?|(ab z&dhx0d7gRZnd_NJhhOT&MJW~wQ!bTV!YJ03Es%VfkXIuE@bXz68;19pYz!NMcpAq< zdcN?GDqO!5t^9O}r@|+`yc7YL{V|eG`AS3bHJO&kvpNK-e8#LCAU@J{QVEa3xAK7U zyGR+h@L9rFe54-n z_SW3^+X+c)X=g51yQjKlg{!7vg{!{0v5C-{hH5u)cv@Gm`sHp%gm=UdQ^?F!y zbyHJo4MfyIEbR#m&|8(XTI8n3J43N^O@ zB~?iWx5w37-Ppnw6g%b@xX#bLAQqY#3!R^PA#*vLC9c{!cir;Fc2Aw#SyIr{+EVAN zUe;6xY0H~iTck9XAWcXJTT$OlG5nWm2*;;TEDaC!^He;Hsd4G##>NwA1bs$gBY8ZP z?UQlu>5apPyAyG8Y8+Sf)HXuk)8%-r@aYmyFSl|6g{!#Lggb3Q&Lk7QNaE8?xJ%-B zCcIwa`6j$g;&v0hM&jir+$ZrxCfqOar6zo{#2ZZb?spA4@rEd34dAQ9VYxuiSIYz?@GMWgnuCM zE)yP>c()1fm3Y{Me<|@k6aKx#`%O5TFWMoyBEH=Ymw37f&z5+$2_Gr((I)&fiCazh z1c^^F;pa(wx(Uyfc%BKLA@O_@ev!oOCj3%~mz(e+i7ztY4v8-{;l&bfFyU87yv>AP zDe+Y%+$HgKCcIYS8%_8%65nFNTO_{Kgu5la&4ha;zTJeclK3Mg{CbJ+G~sI{-eJOj zCGq_xe7(dwP54HMcbV{j#Jf%Str8EL@IOnu&xGG6@qQEju*AE{MEj?;Zi?&@XF>%J+Is-mDhBCI%fM*!+Ee0GDbj)ov;F&R$ zv26xC+kkI3;3Ew9BL>{KF579qPd3nZ7;yTgP;S2gAEiL}odz7MyO^{4deiLz?X7$( zvv26|q(nRIzP;&tSoFf-fT9yK@Xi{Wj}XNhh%eF|g`1c{aSO#edHjb<5vQgZ>EQ9N zC{9fv@(7QAMsce8$TlATh~iY`ku5y_4~kP&N7nK9>lCLdjVJboR;sp=vfJWl-eTM*}N#^(zdw(>d7pqPI;sdf-(-!&;vdjeHY=;L2em8U+b`<3`#<)ahN?OpG*ne5E{S&6 z=Vpyv&*d-KL$x#z2mfir7rgK!E+*#)`0cxoq}u&`_TAx2?UsG^*N%8bL*R#(2!RlobEdms|!A@U`W zdQeaZt**S*9!Q%w2_$yE-an%Qnq~-7422hNf(?Y)@gJDl920 zjFdH?>?wq9BGq5D^Q)|EeJ2FYkYv=S!gGo2%othl!l6bXXCZYxQiAh7gcm*y2{37w-M6NTc{2dNP4Gzs2LQuPhmf}@(CfogXh^yY*#jm0 zwe|}#c%^x?@KuMS(HHmd>iku2IePL@HF|5u50D+yeuI3T`U_dn|Bl_i?`!*Cs1^Eu z2&DUm+k+Kp_F#Tm){gXz6tN&;32k@+oMiW&W%gi@!~{5Guj#b=JMFu>Q|uO0!pIFo zLqW^#V-N&KHBvhVss9x!>YN{=Q6HCS@jXkq&0KqHXv0pG?V`|zXH)4Y=N{C4CG$>V1#L$aBz!sEr2js4HZ#=vGO@mI(+%} zqAGm)ItzOIMpVTLDCOhEx7Y)>QXNzE?WzSi)z1C>05)`FZLspe!_+!gAXPLI+d^BF z%~?rfI2!48V>{`Z(Gk)+D1X><`+#YAkeik#l4(20#=bQj%)2L#k4$6lI5^I6gt6xCxFkdKM@%Lkv{zyi*L;{GuJ*INkP=^?{@h23YJj#58shZ zp11oxv)KJKIiT@|^sSV+!~cS97X_ZC?!_rAoqwb7(}dc;``W&uBio*{7ZUBh4hY61 zKodcrN@){06pe;-K797%0PhT7Sl^npBP}IsR|o9-F4FzGV+k8GQhfSO3lY_HaZ#Z8 z7Bu#Y_PAljy_9dy?0~-4zSS_qq!wCXTsaGg*^QxCFhe>>VTPXJK7B82uy@YdJ;nZm z0sRRK++D_jdmCjEtm|HE`?+n2?dm0B4EObBhxBhK)5sWDE}(ytxQi~+tqd&}W=uE` z2S-^hf`&BC(!9F&Ts4nej9iln9R;DhKVV3UtcMOTNPi$d&DU@7&f{U0Ji{${h75le z%kaf-F@;W(%pC?3cFb3qk%l=jhOJ;Aa(f^@osX@)HT~!|zd%QMZa8!bfAAlpfLwcWxg~^LNffo(7V(t>tMr_18gV$$T3JUAjzky_n zcT!T~f6@E`;|G{u@9SBdly0Aq&bOxTV(+x1w7&x_iLC1N?|$PW)ah3HFumWWhf&Nh ziaB2ll>J9Hlb?9GR5LkxP7aPbG|Nykdnc-!3jxt#M$I-koo({&_L2 zZvUEU_b__oVPF5HSvR>+aG%}}GxVoqZC=EO%Xue*JUo_D@^&yYYxDPpc7RFS?)P@f z!u70`CF^MvIw>WnGN4l+J8==bv&`h(gOPiNia}@Wfc7hYGobk@Xugx|6sy7MA-$7; z$nWuehA@Kg-Xt%w5>P;YGjU_Bw~4;5A4p1aIY?wr?n7tlL%XIqh4SE0v|Nly2X}{u zU?rd*h|4FgmVcmOgxB&xU-zX6DmrV!z7*1x%i8Dva!_dx#z~_U3^kii{b(uvLw5gb z_F+eB(`RTjO~j8mR$1sDi0s@2S%{qD5n~GFRSgb)~20Vts5D~H_6 zXjG=MCFk#19nkKZP6bHwoDk6N;=v)_Q;;+5IYP2VlDGW}3ner+?Rrj-PPA=2nBg4* zT4YSL?_xa|e&X|JwBDYzw&NICqX_%OTkkH7rq3gw9}UpB~Yu6eQ|z6 zBL`Y>EGcly3nVxeR~;m%y~s&NT6|mzK14~_oV9=4NjZfz>=tf;mEt~0SN5t!KUVuz z`Xmi#|H_TEzz+~47tr2Ch=%f@_6CK5b7_|Z&1d)1v~c2scli4rs@B84HHU|LP793s zPY`1I!!*eG?nD&L($L~%eru6{7Eg_!od<_|PeDIG5SW~2T=_&!*ShP13*RUgm zXQL8VCS|~GfM6I822AlR@*o)2t@$9=3n`fZ&mr&+0-QtO_fxEF5`i}ga0-D30p<|c zEWpJCz6uZ@o-DFRfJ+E`8X&d>SfoIJlL-6^z$pag3NVMjEdZwyc$xsCUK)$?fDAI0 zM83@8`dd92D8+Ek5WI(ZCui-_Kfu^QkQd$gAuK?7Mc#`*NIM5Pok%%7ESM)#_9Fly z$|83Na4La^CtLZMd}Ma89WdB zb1=Bl?IExa0efT?Xu_|4ddw2u00w^ep;2LuUD^N4Kq0A)!lTWF{2T3hBrW7vWaQUQ z#5<%7#hcteSeNi=<)5HF7$VLfVap zrc?9;DiV!tVV$bUSZ`t4!3gNv64zTXB>1(9@eXM-@g_G|k?uQTph)XrJyCCyxrTf& z_~KkE8xA3nRiF%yICg1wfdQl}vH4UV9&WYh;fKBiHxRM~VxyW_}a7syoI z{Xu3^aO`aQ&XJ{AP1$H4B8sYEk!uhY6?8A(Q9^yF(O`ktN=Z^de}x3Uc01k}|L`U^ zSOpC+==}j&v#6l>k$(aj6(91S<|{bN-H-AW+y#)2F2o|w3?t-bzDvhm(y?@a^6GNQC0i|YDJie&t5rTrz7OZW1tCXrz6UbEAK&6SmJ%YFVj6$ zC~*r`grYD%PN2d>@D6GJ#GBk;Mp|BYdT!~u|r4?@EECb77RER=vLinXCB2s^2|YYoi72_q0gCF~xd@{s-i1`5{3RgpYg6#X{4a?IQ~q|yQ5JN^ zG~sjgr{k^sM-Y)V(4f8Xss43~*PbgE)HcR){p4)Y{!P5`{ew3rNdD~?u^DnbJ(erC>ucD)Z0Vb}ACEU5i8 zCMS){`Dz@=>BXDeU~*iLqwJc8Iu~~3g?VV4l~MgiX2rB``;yx=g@gvRyjZTALF(6T zz&oU^#GBk;xkl0ka(x~a5xH`^&K_rFRDaW8TI;K(sS7{+vsr)!p+LKBui_fdO zV0nX;Mu2?+sdNWuLRt!<6Db;?sNPL$dt|*0-zLHupN}PLyoYy4dkb%JgIU84Iif4A z`GQwD%|DQMMNHzIT%w;#^c{!9LufRrFt7Hh!th0KA=uA_{K(%ya~xIvS3x#jsML|A z3YpGTI&>BlvJY=^gB7wy$T^M3g4*>lIUV5jYyZXNY(EY;X9zh~A?L!FoK;*-J(sie zIOM!x;l;-oO!|BQlZ(om&gGoN<%~WKIRPODvlGd=KPD%9CY9@LyhBR;3A_lEsd2XRb;`e8je z*iFLrFFrCeu`!hOu-<$7cWdw)jKcxFcOcgL+L|zJ;QsP85Q&XJIt`zMtmz=Pi?(p4 zAQ!*h8`2Mw1w(q6yx55y5n(fD2g};kg)E<@@y5UJxohEtYp|1*^>CN}-KX!{zx;jw z{_j2urVZcSulb+%@88`&mUghPw`{>H#nPFx+y9cK>L8ugLCze#_l7?PN)8?`Cl333 zdnYZ9oeNoMhYvqGK_U32+SyPA2eQgKMZzYB=~S&Bzj;WW4VcLA@rv#TDo&_y!q+KI zk5lbA;@3PlSqN&s#EKoeVmS2m+(PFCM^Obw6@o_ZseOv`Qk@3BQ3IF>>2@jY%$-1@3Qa**p1tT>6w5#2zZ46tb+*HOu(}QU^FLS z0|5^bfU%2!YYDiM0DQ#|&_uw^1Ry{_H36#$pwB7*P68STAiV)xPQaA}&`J}4jesHo z<_KUm0T&WLJ4~R+C13&pV+3$60Vfl1rU1qekV3#&0>~oZOPopU8Y_TQ0=fyHH9Ca; zgd&7rC4d&>0KOz(7Xet;5by~B4-tS>8v*YTa2Emi!XcoOfB*p(3Lr$lbp*^30PPuv z8wnUEfCmX!M8MesxSN0l1W?a`(BBd;i-1W2_%#6&3CIz^^#qJ0;5-4e5s*p%P5zLv zoPe+B&?UY@sNOIVNdvthatF8)HmTsKyQf-BjdaOd{3jaY6EcXl4<5Ar9`FC}PIkJ; z2%Riz{aiX4#>mKC+%xdgQT}^P+m*J34*xs!OP}K4#6u(aPf@3mUupCIyr+IYTI~O!*ngVV|&Umr8{|Sx*owNAb{?77ZoL6(v|9eIm*L@T zxx2c#x!TQ1#9yVMV98vG%~4ujQMRDMRuWHAQdU@1tQYD9#kLBY(@|DhDbtTiP<&-h zyIvtu6|G1rtZ*z;DTnf!QE4JDB)AG~mG&5W zS(P&}i!mXNrO@WIIj<_$VN$;yb@eg0tQyR&{kZfI|`3V z6A8A83Ui91v`}Akv`(tdEA*;LlWJw<)cg_@-BEaSb#uugA+GL3j{2hV3LS+kD>Z2V z)R{1`LNDg6-&Rb?1}@@Am{?r4z)@f;E-N<>^WXrrDXps1R&kZB!r`=+=uSt$Kt_~D z!op+nSed^;(ynOgyo!|_3eQbly&%^TuCbRbly7ul+co$>N1nRh5Bh9+}T zg{^eKQSynyI-Ft5oq7od-F7Q0a-eZfn%JIWooBtCpefj8SOYTTC`T}+mlje?jDQ0n z2|I0<>y?a;P^|Kb`DOGfU>M`7Dlpt)@ThQ-1PG*2SZCO8sJ>ztN%Uw`NoiFHtDskgb!e)lF8N{@~ntv2|kmaE8AI zuUn2kBFEo}H?d`{txa{+EsXwJ-0iMjjlT=WUxKr`mReR@SJPDOu4B!uwO;&fcdff| zCAiw$t+aGt__K7^%IYR>9S}%l-u60nZKJ1wwN)dz4u2Wm&OB};*HVeR0qX<#+gpk4;uRzQLJYHt&kGYyT%-*QIkoNm+$t=R8+ zDH@%E)&7D1hTY-!!aofB(96+i6XGwt5{+(xKdLhteGvX8_%FkM{Pk$G7ry_^Xmlj@ zPWEA;ITQXPZ$l3JSMYV@f$#2$MsIIpX~}o*QA;SBc-2nSE1A!HGjsU$Q_YuFDO8XZ$@H zEssmTJeGbJ(yLJq^qC&xPjM4F=YIz9PNbg;d&bgJ&rTtN34jjZc@FhPKjn+1U%ojt zn6^pt4N2Wc8I3_w8q%9luk(y_sd+IyCIG(~OYYA=8>AEj9 zv(eC)jlxL34db2$e?UjZ+Y0584%=UkzV?+dkCe|v2EeF|PXWy$^us!$nV*^co0R#P zqc^AOnbu(1{LD$4G<)Xsn}*mk^L#@~GV|S;dA7{yw#-TMGp(Ql{rt>y{u>wS-|HZU zRQb32w*~%ffqz>-Ss-?w42`95G@OxBeujv$blUBNqs=68x6r;M932ReQ}@)+T8EsP zn`u9joVu@u#%FRW{o5naR*G+xgOf%Pxal%LyNu*IWIFcnxEm)G#11BRbRmf@ z|DV#|A^m;Qe_Q&WNdJ)Zhhz)+r%3-C>CcjWk@OczUr-BV6(|ACJ0!N8Puh?$=TG=?tGQ%BSwjQTOAh`*0@6^z)>z?zd6**{J(#)O|HYlFlK0 zb^naIZ${lOqwbTbm2}rgU)>L*?t@YHzo`3O)cr2%J{NU=%X-PL?q>+IZ_xznf4@bcJZJK z|I3+TL_|0hzg_ht#M5HqoRKRPKWkOvTRc7_HV($)Lzx;M;_-B*#+7({7*pd{JU%=& z&c)*=#K!x0JcC(fzm3OFWJ?qD&s6*pPu;f}PoEW=KjQIhwoT3}@%V_?y^-0mCc_+~@w;y|Zx|C(X_iPZuB6f|P?1QEnk5IMSP)EO zUq%NI5u*7v1$LvKqZl222*B}f6FyGj>i##yGaCU5^huZcP=4f!@L2E_t>$A6;?<0Y z@>S~rg|9}ylE9u@fhW@c&q?qHlHiXe!IRl%7w9ecUE>%B0w$M8pO-n!$w!sejiKI8TpYJ(-bUUQ)KTCpkayIajV9zbGJ*(m1e2LGY4Ffp-4GaHLj&msPMSDfo zI7CF+fG4`g@l#~P|1`nWKG%bu^to%i;OF1g2yWu|(HU$@g8#21cu2~hDHnizZvtYw zlF;`6C;hFmeWpnj`jgNPN4bc8f{8vGcsBJ%Q@=YEIF)y{T+s178RT(}lrzaB=R!$8 z_F|z3|1L*x3fJf8@oE95mr5JF#YymMrTok33>>iFAC5!O$N$~m5hqtQ+D<{m574w$Nce>11t(y;_x ztQhtp_c3r@FQ#_*Rg!#%a64qO*XTzBaMOjyvZOvsWxiA8cft51cFU7;PR|np`2Gum z^dmj1lxg&?l=LUcA)4>WfuIifh?G;AdHv`n93@lplxnxX;`nH`?MjhLzf7PX8B#m6 z%5k7hW_~Bfjhcw?!%1-ZeIv;^|7=0W_tp^n6Swndmfd6(3G&^;?L095sy*0Oq#TQE zhkOqVJnu@pLVlp#E9HC)JR9xJ+`qn*^w*6LgnZ8x!9xX7u^vb;+qzU^u&*TgPA6gP4?DDOC-;9RNa7Rndx&1~npfC~wdPnPmNDs#|nd4-U#`nf4SmXAzd%X4axivV|P&eYbJk72ex;&R| z0j_OzEpKXFR^8;P^|ZR%UDe)ItfsZOtqDgdwYjrrT{Jr}5#8w9=;CKLuDTYF8+Q!T z`HQR8+uXbwEQXj1l%6zJ92dr?h;t;D3s-l$bi0&bFRWm$1;u6aZN)gcE2_k?qSH3NSSP9c9^#sI zFE0=sFO}A<@;K>tEb2<+{Cssc@v-h97WX5^(?eo`yjK}FF~_ILdzIrsx}MpD&`B5c zbGLh3?Q|_OziT<3h71VvFT?f7E&LYd_+*&HgL{(eYO6if{0yyqb@Q^;COS%E*LWcp zhhy&Q8q_&!ud8;~G!(bCuJE>r>y^bV(D4Gz_eT$;&SX@H>W4m1yQvBHPrI+JcGtSv zJap!UDo1eLZG(UNC$zL)dv9`U{)ll71OE*zF${@M6 zvBiah$yyexGgVG%Xe3?Up&g&CxU)MR;~hh=)K|BAN?L36Rc)-X-PP{JjofGrki4uN zQGR~PZ_*Y9#$Dhob*`3H*4EnQZ9?rE8xV|F>A_)Vv$z>NK1(FJTD(n7)D5Y<$$P`& ziC{kb*ef0rZIEC89iJc@7gTkY6sZ18_j#)!39#`tZ{v;O@si^193M!!K#nBBP%Ia% z8#MI@7mgQ26H{{&f)h&?6 zxR3VBQ*b2R-q_j_7jYrYUDs4igfi5IW8_?3-?<3pE^kHDgYkvs((nnQR^B0V>l);M z*-#51DkfOP$SF8f7&%loH`YKW>d(1o04x`yb~8p|F$4VHy=nPRQy`Wf+^KyN<*WT) znapWbn0%Vlyr%AYBTmHvuO`74l4d(-OPF1U0NGUWt9>Wst9^5ozq$PD zCI5UWQ0>DjU+voy|0#IP{F{N1T}eN3>fA#4@^erm;!V05ogWb+zuNCoem*4{oZ602 z{w+u|@-LMM%2)a-5h||yKbrW}{+aU6RttG4K;^6C-DBcc=QPTf%TPsrRQd0h{Hp%c zcaHK8Dn+FPYfO>|JPMD>ulSSwJ1>gOZ2#v>{Azzs`8%m|7@SER_9D_~uS}7teH``g zz?kV(T={Q-m$o<+zuG68UIGwK(TUhVT;U%gf!dzpSNluq-`VMt?I+egr65(0w68`a zieK$NPAU_GU6NjTDqrQ(caM=jwoh3uIaI~Ss8#yP>qCHKsr+N#BNc)oTYg_DdaDXZ z|6BN!|8x`1B)_Toa#|vlX5qd`MJ{Q&%AW>TIGSg2@u+gE@srwjOm~Ep%U>cw- a cannot be superset of b { - // printf("diff is in %s.\n", terminal_string[i*sz(ull) + j]); + // printf("diff is in %s.\n", terminal_string[i*NUM_BITS + j]); // printf("Differing in : %d, %d\n", a[i], b[i]); return false; } @@ -323,22 +324,23 @@ void populate_first_sets() rhsnode_ptr rhs_ptr; while(is_changed == true) { - printf("Iterating over grammar.\n"); + // printf("Iterating over grammar.\n"); is_changed = false; for(int i=0; i flag == T) + + if(rhs_ptr->flag == T) { token_name t = (rhs_ptr -> s).t; - if( ( first_set[lhs][t / sz(ull)] & ( 1 << (t % sz(ull)) ) ) == 0) //check if terminal already there in the first set + if( ( first_set[lhs][t / NUM_BITS] & ( 1 << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not { - printf("Adding term %s to first(%s) - \n", terminal_string[t], non_terminal_string[lhs]); - // printf("B4 adding : %d\n", first_set[lhs][t / sz(ull)] & ( 1 << t % sz(ull) )); - first_set[lhs][t / sz(ull)] |= ( 1 << (t % sz(ull)) ); - // printf("After adding: %d\n", first_set[lhs][t / sz(ull)] & ( 1 << t % sz(ull) )); - // printf("sizeof(llu) : %lu\n", sz(ull)); + // printf("Adding term %s to first(%s) - \n", terminal_string[t], non_terminal_string[lhs]); + // printf("B4 adding : %d\n", first_set[lhs][t / NUM_BITS] & ( 1 << t % NUM_BITS )); + first_set[lhs][t / NUM_BITS] |= ( 1 << (t % NUM_BITS) ); + // printf("After adding: %d\n", first_set[lhs][t / NUM_BITS] & ( 1 << t % NUM_BITS )); + // printf("sizeof(llu) : %lu\n", NUM_BITS); // printf("EPSILON : = %d\n", EPSILON); is_changed = true; // break; @@ -347,38 +349,49 @@ void populate_first_sets() else { rhsnode_ptr temp = rhs_ptr; - ull* first_rhs = firstOf( (temp -> s).nt ); + ull* rhs_symbol_fset; while(temp != NULL) { + if(temp->flag == T) + { + token_name t = (temp -> s).t; + if( ( first_set[lhs][t / NUM_BITS] & ( 1 << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not + { + first_set[lhs][t / NUM_BITS] |= ( 1 << (t % NUM_BITS) ); + is_changed = true; + } + break; + } + rhs_symbol_fset = first_set[ (temp -> s).nt ]; //first_set[lhs]|=first_set - if(is_superset(first_set[lhs], first_rhs) == false) + if(is_superset(first_set[lhs], rhs_symbol_fset) == false) { is_changed = true; - printf("Adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); + // printf("Adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); for(int j = 0; j < BITSTRING_PART_NUM; j++) { - first_set[lhs][j] |= first_rhs[j]; + first_set[lhs][j] |= rhs_symbol_fset[j]; } - // printf("now is_superset = %d\n", is_superset(first_set[lhs], first_rhs)); + // printf("now is_superset = %d\n", is_superset(first_set[lhs], rhs_symbol_fset)); } else { // printf("not adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); } - if( first_rhs[ EPSILON / sz(ull) ] & ( 1 << (EPSILON % sz(ull)) ) == 0)//firstOf( this nt ) does not contain epsilon + if( rhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1 << (EPSILON % NUM_BITS) ) == 0)//firstOf( this nt ) does not contain epsilon { // printf("first(%s) does not contain epsilon yet\n", non_terminal_string[ (temp->s).nt ]); break; } - // else - // { - // // check if the current nt on rhs is last nt of the rule, if no, remove epsilon from first_set(lhs) - // if(temp->next != NULL) //isn't last node - // { - // first_set[lhs][ EPSILON / sz(ull) ] &= (~ ( 1 << (EPSILON % sz(ull)) ) ); - // printf("Removing term %s from first(%s)\n", terminal_string[ EPSILON ], non_terminal_string[lhs]); - // } - // } + else + { + // check if the current nt on rhs is last nt of the rule, if no, remove epsilon from first_set(lhs) + if(temp->next != NULL) //isn't last node + { + first_set[lhs][ EPSILON / NUM_BITS ] &= (~ ( 1 << (EPSILON % NUM_BITS) ) ); + // printf("Removing term %s from first(%s)\n", terminal_string[ EPSILON ], non_terminal_string[lhs]); + } + } temp = temp -> next; } // end of rule linked list traversal while loop } // end of else (non-terminal branch) diff --git a/Code/parserDef.h b/Code/parserDef.h index 11af84f..ba90df4 100644 --- a/Code/parserDef.h +++ b/Code/parserDef.h @@ -8,6 +8,7 @@ #define BITSTRING_PART_NUM 3 #define sz(x) sizeof(x) typedef unsigned long long int ull; +#define NUM_BITS (8 * sz(ull)) typedef enum { diff --git a/Header Files/lexer.h b/Header Files/lexer.h new file mode 100644 index 0000000..629c2af --- /dev/null +++ b/Header Files/lexer.h @@ -0,0 +1,47 @@ +#ifndef LEXER_H +#define LEXER_H + +#include + +enum token_name{ + DEF, MODULE, ENDDEF, + DRIVERDEF, DRIVERENDDEF, + TAKES, INPUT, + SQBO, SQBC, + RETURNS, + START, END, + DECLARE, ID, COLON, + ARRAY, OF, INTEGER, REAL, BOOLEAN, + TRUE, FALSE, + ASSIGNOP, NUM, RNUM, SEMICOL, + DRIVER, PROGRAM, + GET_VALUE, PRINT, + USE, WITH, PARAMETERS, COMMA, + FOR, IN, RANGEOP, WHILE, + SWITCH, BO, BC, CASE, BREAK, DEFAULT, + PLUS, MINUS, + MUL, DIV, + LT, LE, GT, GE, EQ, NE, + AND, OR +} + +typedef enum { false, true } bool; // boolean type + +char look; // lookahaead character + +FILE *getStream(FILE *fp); + +TOKEN getNextToken(); + +void removeComments(char *testcaseFile, char *cleanFile); + +/* + report error +*/ +void lexError(char *errStr); + +/* + Initialization (if any) +*/ +void init(); +#endif diff --git a/grammar.txt b/grammar.txt new file mode 100644 index 0000000..4491f43 --- /dev/null +++ b/grammar.txt @@ -0,0 +1,60 @@ +ABC -> PROGRAM dollar. +PROGRAM -> MODULEDECLARATIONS OTHERMODULES DRIVERMODULE OTHERMODULES. +MODULEDECLARATIONS -> MODULEDECLARATION MODULEDECLARATIONS | . +MODULEDECLARATION -> declare module id semicol. +OTHERMODULES -> MODULE OTHERMODULES | . +DRIVERMODULE -> driverdef program driverenddef MODULEDEF. +MODULE -> def module id enddef takes input sqbo INPUT_PLIST sqbc semicol RET MODULEDEF. +MODULEDEF -> start STATEMENTS end. +RET -> returns sqbo OUTPUT_PLIST sqbc semicol | . +INPUT_PLIST -> id colon DATATYPE INPUT_PLIST_DASH. +INPUT_PLIST_DASH -> comma id colon DATATYPE INPUT_PLIST_DASH | . +OUTPUT_PLIST -> id colon TYPE OUTPUT_PLIST_DASH. +OUTPUT_PLIST_DASH -> comma id colon TYPE OUTPUT_PLIST_DASH | . +DATATYPE ->integer| real | boolean | array sqbo RANGE sqbc of TYPE. +TYPE -> integer | real | boolean. +STATEMENTS -> STATEMENT STATEMENTS | . +STATEMENT -> IOSTMT | SIMPLESTMT | DECLARESTMT | CONDITIONALSTMT | ITERATIVESTMT. +IOSTMT -> get_value bo id bc semicol | print bo VAR bc semicol. +VAR -> id WHICHID | num | rnum. +WHICHID -> sqbo INDEX sqbc | . +SIMPLESTMT -> ASSIGNMENTSTMT | MODULEREUSESTMT. +ASSIGNMENTSTMT -> id WHICHSTMT. +WHICHSTMT -> LVALUEIDSTMT | LVALUEARRSTMT. +LVALUEIDSTMT -> assignop EXPRESSION semicol. +LVALUEARRSTMT -> sqbo INDEX sqbc assignop EXPRESSION semicol. +INDEX -> num | id. +MODULEREUSESTMT -> OPTIONAL use module id with parameters IDLIST semicol. +OPTIONAL -> sqbo IDLIST sqbc assignop | . +IDLIST -> id IDLIST_DASH. +IDLIST_DASH -> comma id IDLIST_DASH | . + + +EXPRESSION -> UNARYEXPR | NONUNARY. +UNARYEXPR -> minus NEGUNARY| plus POSUNARY. +NEGUNARY -> bo NONUNARY bc | VAR. +POSUNARY -> bo NONUNARY bc | VAR. +NONUNARY -> RELATIONALEXPR RESTNONUNARY. +RESTNONUNARY -> LOGICALOP NONUNARY | . +RELATIONALEXPR -> ARITHMETICEXPR RESTRELATIONALEXPR. +RESTRELATIONALEXPR -> RELATIONALOP RELATIONALEXPR | . + + + +LOGICALOP -> and | or. +RELATIONALOP -> lt | le | gt | ge | eq | ne. +ARITHMETICEXPR -> TERM ARITHMETICEXPRDASH. +ARITHMETICEXPRDASH -> ADDSUBOP TERM ARITHMETICEXPRDASH | . +TERM -> FACTOR TERM_DASH. +TERM_DASH -> MULDIVOP FACTOR TERM_DASH | . +FACTOR -> bo ARITHMETICEXPR bc | VAR. +ADDSUBOP -> plus | minus. +MULDIVOP -> mul | div. +DECLARESTMT -> declare IDLIST colon DATATYPE semicol. +CONDITIONALSTMT -> switch bo id bc start CASESTMT DEFAULT end. +CASESTMT -> case VALUE colon STATEMENTS break semicol OTHERCASE. +OTHERCASE -> CASESTMT | . +VALUE -> num | true | false. +DEFAULT -> default colon STATEMENTS break semicol | . +ITERATIVESTMT -> for bo id in num rangeop num bc start STATEMENTS end | while bo BOOLEANEXPR bc start STATEMENTS end. +RANGE -> INDEX rangeop INDEX. \ No newline at end of file From 469277dd4db511412db56a55f353aeb24ae9aa47 Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Sat, 22 Feb 2020 08:28:26 +0530 Subject: [PATCH 4/8] First set erroneous results --- Code/driver.c | 4 +- Code/parser.c | 114 ++++++++++++++++++++++++++++++-------------------- Code/parser.h | 2 + 3 files changed, 73 insertions(+), 47 deletions(-) diff --git a/Code/driver.c b/Code/driver.c index bee8764..5356986 100644 --- a/Code/driver.c +++ b/Code/driver.c @@ -6,7 +6,7 @@ #include "parser.h" -int main() +int main(int argc, char *argv[]) { FILE *source = fopen("test.txt", "r"); FILE *token_file = fopen("tokens.txt", "w"); @@ -58,6 +58,8 @@ int main() // print_grammar(); + // printf("dr: %d mp : %d\n", DRIVER, MAINPROGRAM); + // get_nullable_set(); populate_first_sets(); diff --git a/Code/parser.c b/Code/parser.c index 023de20..3182981 100644 --- a/Code/parser.c +++ b/Code/parser.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "parser.h" void parser_init() @@ -281,8 +282,9 @@ void print_first_sets() { for(int k = 0; k < NUM_BITS; k++) { - if((first_set[i][j] & (1 << k)) != 0) + if((first_set[i][j] & (1ULL << k)) != 0) { + // printf(" %d ", j); printf("%s, ", terminal_string[j*NUM_BITS + k]); } } @@ -297,24 +299,24 @@ void print_first_sets() // return first_set[nt]; // } -bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]) -{ - // for(int i=0; i a cannot be superset of b - { - // printf("diff is in %s.\n", terminal_string[i*NUM_BITS + j]); - // printf("Differing in : %d, %d\n", a[i], b[i]); - return false; - } - } - } - return true; -} +// bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]) +// { +// // for(int i=0; i a cannot be superset of b +// { +// // printf("diff is in %s.\n", terminal_string[i*NUM_BITS + j]); +// // printf("Differing in : %d, %d\n", a[i], b[i]); +// return false; +// } +// } +// } +// return true; +// } void populate_first_sets() { @@ -334,12 +336,12 @@ void populate_first_sets() if(rhs_ptr->flag == T) { token_name t = (rhs_ptr -> s).t; - if( ( first_set[lhs][t / NUM_BITS] & ( 1 << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not + if( ( first_set[lhs][t / NUM_BITS] & ( 1ULL << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not { // printf("Adding term %s to first(%s) - \n", terminal_string[t], non_terminal_string[lhs]); - // printf("B4 adding : %d\n", first_set[lhs][t / NUM_BITS] & ( 1 << t % NUM_BITS )); - first_set[lhs][t / NUM_BITS] |= ( 1 << (t % NUM_BITS) ); - // printf("After adding: %d\n", first_set[lhs][t / NUM_BITS] & ( 1 << t % NUM_BITS )); + // printf("B4 adding : %d\n", first_set[lhs][t / NUM_BITS] & ( 1ULL << t % NUM_BITS )); + first_set[lhs][t / NUM_BITS] |= ( 1ULL << (t % NUM_BITS) ); + // printf("After adding: %d\n", first_set[lhs][t / NUM_BITS] & ( 1ULL << t % NUM_BITS )); // printf("sizeof(llu) : %lu\n", NUM_BITS); // printf("EPSILON : = %d\n", EPSILON); is_changed = true; @@ -350,48 +352,68 @@ void populate_first_sets() { rhsnode_ptr temp = rhs_ptr; ull* rhs_symbol_fset; + ull* lhs_symbol_fset = first_set[lhs]; while(temp != NULL) { if(temp->flag == T) { token_name t = (temp -> s).t; - if( ( first_set[lhs][t / NUM_BITS] & ( 1 << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not + if( ( first_set[lhs][t / NUM_BITS] & ( 1ULL << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not { - first_set[lhs][t / NUM_BITS] |= ( 1 << (t % NUM_BITS) ); + first_set[lhs][t / NUM_BITS] |= ( 1ULL << (t % NUM_BITS) ); is_changed = true; } break; } + + rhs_symbol_fset = first_set[ (temp -> s).nt ]; - //first_set[lhs]|=first_set - if(is_superset(first_set[lhs], rhs_symbol_fset) == false) + + bool is_diff = false; + for(int j = 0; j < BITSTRING_PART_NUM; j++) { - is_changed = true; - // printf("Adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); - for(int j = 0; j < BITSTRING_PART_NUM; j++) + if( (lhs_symbol_fset[j] & rhs_symbol_fset[j]) != rhs_symbol_fset[j] ) { - first_set[lhs][j] |= rhs_symbol_fset[j]; + is_diff = true; + break; } - // printf("now is_superset = %d\n", is_superset(first_set[lhs], rhs_symbol_fset)); - } - else - { - // printf("not adding first(%s) to first(%s)\n", non_terminal_string[ (temp->s).nt ], non_terminal_string[lhs]); - } - if( rhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1 << (EPSILON % NUM_BITS) ) == 0)//firstOf( this nt ) does not contain epsilon - { - // printf("first(%s) does not contain epsilon yet\n", non_terminal_string[ (temp->s).nt ]); - break; } - else + + if( is_diff == true ) { - // check if the current nt on rhs is last nt of the rule, if no, remove epsilon from first_set(lhs) - if(temp->next != NULL) //isn't last node + is_changed = true; + bool eps_in_rhs = false; + bool already_pres = false; + + if( ( rhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) != 0) + { + // printf("hi"); + eps_in_rhs = true; + if(( lhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) != 0) + already_pres = true; + // ull a = 0x00; + // printf("%llu\n", (( ( 1ULL << (EPSILON % NUM_BITS)) ))); + // rhs_symbol_fset[ EPSILON / NUM_BITS ] &= ((ull )( ~( 1ULL << (EPSILON % NUM_BITS)) )) ; + } + + for(int j = 0; j < BITSTRING_PART_NUM; j++) + { + lhs_symbol_fset[j] |= rhs_symbol_fset[j]; + } + + if( eps_in_rhs) { - first_set[lhs][ EPSILON / NUM_BITS ] &= (~ ( 1 << (EPSILON % NUM_BITS) ) ); - // printf("Removing term %s from first(%s)\n", terminal_string[ EPSILON ], non_terminal_string[lhs]); + printf("eps pres"); + if(eps_in_rhs && (!already_pres)) + lhs_symbol_fset[ EPSILON / NUM_BITS ] &= (~((ull)pow(2,EPSILON))); + + if(temp->next == NULL) + { + lhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); + } } } + temp = temp -> next; } // end of rule linked list traversal while loop } // end of else (non-terminal branch) diff --git a/Code/parser.h b/Code/parser.h index c036b91..c63a6a6 100644 --- a/Code/parser.h +++ b/Code/parser.h @@ -19,6 +19,8 @@ unsigned long long int get_nullable_set(); void print_first_sets(); +void print_first(nonterminal a); + ull* firstOf(nonterminal nt); bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]); From 58d098551721e8b310d5f21303ddca97ffb90d0f Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Sat, 22 Feb 2020 11:00:22 +0530 Subject: [PATCH 5/8] A->BCD rule only add epsilon to F(A) if B, C , D all are nullable thing is remaining --- Code/parser.c | 59 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/Code/parser.c b/Code/parser.c index 3182981..80d35fe 100644 --- a/Code/parser.c +++ b/Code/parser.c @@ -2,7 +2,6 @@ #include #include #include -#include #include "parser.h" void parser_init() @@ -317,13 +316,39 @@ void print_first_sets() // } // return true; // } - +void print_first(nonterminal nt) +{ + // printf("\n{"); + printf("FIRST(%s) = { " , non_terminal_string[nt] ); + for(int j = 0; j< BITSTRING_PART_NUM ; j++) + { + for(int k = 0; k < NUM_BITS; k++) + { + if((first_set[nt][j] & (1ULL << k)) != 0) + { + // printf(" %d ", j); + printf("%s, ", terminal_string[j*NUM_BITS + k]); + } + } + } + printf(" }\n"); +} void populate_first_sets() { // grammar[i].head bool is_changed = true; int lhs; rhsnode_ptr rhs_ptr; + // for(int i=0; iflag == T) + // { + // token_name t = (rhs_ptr -> s).t; + // first_set[lhs][t / NUM_BITS] |= ( 1ULL << (t % NUM_BITS) ); + // } + // } while(is_changed == true) { // printf("Iterating over grammar.\n"); @@ -332,13 +357,13 @@ void populate_first_sets() { lhs = grammar[i].sym; rhs_ptr = grammar[i].head; - + printf("calculating first of %s\n", non_terminal_string[lhs]); if(rhs_ptr->flag == T) { token_name t = (rhs_ptr -> s).t; if( ( first_set[lhs][t / NUM_BITS] & ( 1ULL << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not { - // printf("Adding term %s to first(%s) - \n", terminal_string[t], non_terminal_string[lhs]); + printf("Adding term %s to first(%s) - \n", terminal_string[t], non_terminal_string[lhs]); // printf("B4 adding : %d\n", first_set[lhs][t / NUM_BITS] & ( 1ULL << t % NUM_BITS )); first_set[lhs][t / NUM_BITS] |= ( 1ULL << (t % NUM_BITS) ); // printf("After adding: %d\n", first_set[lhs][t / NUM_BITS] & ( 1ULL << t % NUM_BITS )); @@ -383,35 +408,45 @@ void populate_first_sets() { is_changed = true; bool eps_in_rhs = false; - bool already_pres = false; if( ( rhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) != 0) { // printf("hi"); eps_in_rhs = true; - if(( lhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) != 0) - already_pres = true; // ull a = 0x00; // printf("%llu\n", (( ( 1ULL << (EPSILON % NUM_BITS)) ))); - // rhs_symbol_fset[ EPSILON / NUM_BITS ] &= ((ull )( ~( 1ULL << (EPSILON % NUM_BITS)) )) ; + // rhs_symbol_fset[ EPSILON / NUM_BITS ] &= (( ~( 1ULL << (EPSILON % NUM_BITS)) )) ; + // printf("Epsilon not removed? : %llu\n", rhs_symbol_fset[ EPSILON / NUM_BITS ] & ((( 1ULL << (EPSILON % NUM_BITS)) ))); } + // printf("Adding first(%s) to first(%s)\n", non_terminal_string[(temp->s).t] ,non_terminal_string[lhs]); + // print_first((temp->s).t); + // print_first(lhs); for(int j = 0; j < BITSTRING_PART_NUM; j++) { lhs_symbol_fset[j] |= rhs_symbol_fset[j]; } + + + if( eps_in_rhs) { - printf("eps pres"); - if(eps_in_rhs && (!already_pres)) - lhs_symbol_fset[ EPSILON / NUM_BITS ] &= (~((ull)pow(2,EPSILON))); - + rhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); + printf("Epsilon added back? : %llu\n", rhs_symbol_fset[ EPSILON / NUM_BITS ] & ((( 1ULL << (EPSILON % NUM_BITS)) ))); if(temp->next == NULL) { lhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); } } + else // if a current nt does not contain epsilon , break + { + break; + } + } + else // if is_diff == false, break; + { + break; } temp = temp -> next; From 0f192d6a36ba0aa84d18d30aa39be7341278f156 Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Sat, 22 Feb 2020 15:12:21 +0530 Subject: [PATCH 6/8] First sets done, follow on progress --- Code/driver.c | 8 +- Code/hashtable.c | 1 + Code/lexer.c | 3 +- Code/lexerDef.h | 5 +- Code/parser.c | 300 +++++++++++++++++++++++++++++++---------------- Code/parser.h | 8 ++ Code/parserDef.h | 1 + 7 files changed, 219 insertions(+), 107 deletions(-) diff --git a/Code/driver.c b/Code/driver.c index 5356986..d8af533 100644 --- a/Code/driver.c +++ b/Code/driver.c @@ -63,6 +63,12 @@ int main(int argc, char *argv[]) // get_nullable_set(); populate_first_sets(); - print_first_sets(); + populate_follow_sets(); + + // ull *fset = get_rule_first_set(grammar[0].head); + // print_rule_fset(fset); + + // print_first_sets(); + print_follow_sets(); } // end of main diff --git a/Code/hashtable.c b/Code/hashtable.c index d922b5c..c1e354f 100644 --- a/Code/hashtable.c +++ b/Code/hashtable.c @@ -78,6 +78,7 @@ void init_hash_table() { insert( "break" , BREAK); insert( "default" , DEFAULT); insert( "while" , WHILE); + insert( "dollar" , DOLLAR); // left TAGGED, UNION and RECORD // printf("Values inserted"); diff --git a/Code/lexer.c b/Code/lexer.c index 5cd3681..e7315d2 100644 --- a/Code/lexer.c +++ b/Code/lexer.c @@ -90,7 +90,8 @@ void lexer_init() "END_OF_FILE", "DELIM", "EPSILON", - "PROGRAM" + "PROGRAM", + "DOLLAR" }; for(int i = 0; i < NUM_OF_TERMINALS; i++) diff --git a/Code/lexerDef.h b/Code/lexerDef.h index 2baecef..fe48edb 100644 --- a/Code/lexerDef.h +++ b/Code/lexerDef.h @@ -2,7 +2,7 @@ #define LEXERDEF_H #define BUFFER_SIZE 20 -#define NUM_OF_TERMINALS 60 +#define NUM_OF_TERMINALS 61 typedef enum { DEF, MODULE, ENDDEF, @@ -28,7 +28,8 @@ typedef enum { END_OF_FILE, DELIM, EPSILON, - PROGRAM + PROGRAM, + DOLLAR } token_name; typedef struct token { diff --git a/Code/parser.c b/Code/parser.c index 80d35fe..4a8718c 100644 --- a/Code/parser.c +++ b/Code/parser.c @@ -241,50 +241,42 @@ void print_grammar() } } -unsigned long long int get_nullable_set() -{ - unsigned long long int set = 0; - int mask = 1; - int sym = 0; - int flag; +// ull - for(int i=0; iflag; - if(flag == NT) - { - sym = (int)( (grammar[i].head)->s ).nt; - continue; - } - else + printf("FIRST(%s) = { " , non_terminal_string[i] ); + for(int j = 0; j< BITSTRING_PART_NUM ; j++) { - sym = (int)( (grammar[i].head)->s ).t; - if( sym == EPSILON ) + for(int k = 0; k < NUM_BITS; k++) { - set |= (mask << grammar[i].sym); - // printf("%d", sym); - // printf("%s\n", non_terminal_string[grammar[i].sym]); + if((first_set[i][j] & (1ULL << k)) != 0) + { + // printf(" %d ", j); + printf("%s ", terminal_string[j*NUM_BITS + k]); + } } } + printf(" }\n"); + // printf("%llu\n", first_set[i][0]); } - return set; } -// ull - -void print_first_sets() +void print_follow_sets() { for( int i = 0; i < NUM_OF_NONTERMINALS; i++) { - printf("FIRST(%s) = { " , non_terminal_string[i] ); + printf("FOLLOW(%s) = { " , non_terminal_string[i] ); for(int j = 0; j< BITSTRING_PART_NUM ; j++) { for(int k = 0; k < NUM_BITS; k++) { - if((first_set[i][j] & (1ULL << k)) != 0) + if((follow_set[i][j] & (1ULL << k)) != 0) { // printf(" %d ", j); - printf("%s, ", terminal_string[j*NUM_BITS + k]); + printf("%s ", terminal_string[j*NUM_BITS + k]); } } } @@ -293,29 +285,6 @@ void print_first_sets() } } -// ull* firstOf(nonterminal nt) -// { -// return first_set[nt]; -// } - -// bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]) -// { -// // for(int i=0; i a cannot be superset of b -// { -// // printf("diff is in %s.\n", terminal_string[i*NUM_BITS + j]); -// // printf("Differing in : %d, %d\n", a[i], b[i]); -// return false; -// } -// } -// } -// return true; -// } void print_first(nonterminal nt) { // printf("\n{"); @@ -333,54 +302,118 @@ void print_first(nonterminal nt) } printf(" }\n"); } + +void print_rule_fset(ull *fset) +{ + for(int j = 0; j< BITSTRING_PART_NUM ; j++) + { + for(int k = 0; k < NUM_BITS; k++) + { + if((fset[j] & (1ULL << k)) != 0) + { + // printf(" %d ", j); + printf("%s, ", terminal_string[j*NUM_BITS + k]); + } + } + } + printf("\n"); +} + +void populate_follow_sets() +{ + bool is_changed = true; + int lhs; + int rhs_sym; + rhsnode_ptr rhs_ptr; + + follow_set[MAINPROGRAM][DOLLAR / NUM_BITS] |= (1ULL << (DOLLAR % NUM_BITS) ); + + while(is_changed == true) //traverse until convergence + { + is_changed = false; + for(int i=0; iflag == NT) + { + rhs_sym = ( (temp->s).nt ); + ull *rhs_rule_set = get_rule_first_set(temp->next); + ull *tmp_follow = (ull*)malloc(sizeof(ull) * BITSTRING_PART_NUM); + + for(int j = 0; j < BITSTRING_PART_NUM ; j++) + { + tmp_follow[j] = follow_set[rhs_sym][j]; + } + + bool eps_in_rhs = false; + + if(rhs_rule_set[EPSILON / NUM_BITS] & ( (1ULL << (EPSILON % NUM_BITS) ) ) != 0)//eps present in this rule + { + eps_in_rhs = true; + } + + rhs_rule_set[EPSILON / NUM_BITS] &= (~ (1ULL << (EPSILON % NUM_BITS) ) ); + + for(int j = 0; j < BITSTRING_PART_NUM ; j++) + { + follow_set[rhs_sym][j] |= rhs_rule_set[j]; + } + + if(eps_in_rhs == true) + { + for(int j = 0; j < BITSTRING_PART_NUM ; j++) + { + follow_set[rhs_sym][j] |= follow_set[lhs][j]; + } + } + + for(int j = 0; j < BITSTRING_PART_NUM ; j++) + { + if(follow_set[rhs_sym][j] != tmp_follow[j]) + is_changed = true; + } + // } + // follow_set[rhs_sym][EPSILON / NUM_BITS] &= (~ (1ULL << (EPSILON % NUM_BITS) ) ); + } + temp = temp -> next; + } // end of rule linked list traversal while loop + } // end of for - grammar traversal + } // end of while - infinite loop until convergence +} + void populate_first_sets() { - // grammar[i].head bool is_changed = true; int lhs; rhsnode_ptr rhs_ptr; - // for(int i=0; iflag == T) - // { - // token_name t = (rhs_ptr -> s).t; - // first_set[lhs][t / NUM_BITS] |= ( 1ULL << (t % NUM_BITS) ); - // } - // } - while(is_changed == true) + while(is_changed == true) //traverse until convergence { - // printf("Iterating over grammar.\n"); is_changed = false; for(int i=0; iflag == T) + if(rhs_ptr->flag == T) //if terminal, add it and move ahead { token_name t = (rhs_ptr -> s).t; if( ( first_set[lhs][t / NUM_BITS] & ( 1ULL << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not { - printf("Adding term %s to first(%s) - \n", terminal_string[t], non_terminal_string[lhs]); - // printf("B4 adding : %d\n", first_set[lhs][t / NUM_BITS] & ( 1ULL << t % NUM_BITS )); first_set[lhs][t / NUM_BITS] |= ( 1ULL << (t % NUM_BITS) ); - // printf("After adding: %d\n", first_set[lhs][t / NUM_BITS] & ( 1ULL << t % NUM_BITS )); - // printf("sizeof(llu) : %lu\n", NUM_BITS); - // printf("EPSILON : = %d\n", EPSILON); is_changed = true; - // break; } } - else + else //nonterminal { rhsnode_ptr temp = rhs_ptr; ull* rhs_symbol_fset; ull* lhs_symbol_fset = first_set[lhs]; - while(temp != NULL) + while(temp != NULL) //traverse till end of the rule { - if(temp->flag == T) + if(temp->flag == T) // if terminal add and move to next rule { token_name t = (temp -> s).t; if( ( first_set[lhs][t / NUM_BITS] & ( 1ULL << (t % NUM_BITS) ) ) == 0) //check if terminal already there in the first set or not @@ -395,6 +428,20 @@ void populate_first_sets() rhs_symbol_fset = first_set[ (temp -> s).nt ]; bool is_diff = false; + bool eps_in_rhs = false; + bool eps_in_lhs = false; + + if( ( rhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) != 0) //remove epsilon from current nt before checking things + { + eps_in_rhs = true; + rhs_symbol_fset[ EPSILON / NUM_BITS ] &= (( ~( 1ULL << (EPSILON % NUM_BITS)) )) ; + } + + if( ( lhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) != 0) + { + eps_in_lhs = true; + } + for(int j = 0; j < BITSTRING_PART_NUM; j++) { if( (lhs_symbol_fset[j] & rhs_symbol_fset[j]) != rhs_symbol_fset[j] ) @@ -404,51 +451,47 @@ void populate_first_sets() } } - if( is_diff == true ) + if( is_diff == true ) //rhs nt has a terminal which lhs nt does not have in it's fset { is_changed = true; - bool eps_in_rhs = false; - - if( ( rhs_symbol_fset[ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) != 0) - { - // printf("hi"); - eps_in_rhs = true; - // ull a = 0x00; - // printf("%llu\n", (( ( 1ULL << (EPSILON % NUM_BITS)) ))); - // rhs_symbol_fset[ EPSILON / NUM_BITS ] &= (( ~( 1ULL << (EPSILON % NUM_BITS)) )) ; - // printf("Epsilon not removed? : %llu\n", rhs_symbol_fset[ EPSILON / NUM_BITS ] & ((( 1ULL << (EPSILON % NUM_BITS)) ))); - } - - // printf("Adding first(%s) to first(%s)\n", non_terminal_string[(temp->s).t] ,non_terminal_string[lhs]); - // print_first((temp->s).t); - // print_first(lhs); for(int j = 0; j < BITSTRING_PART_NUM; j++) { lhs_symbol_fset[j] |= rhs_symbol_fset[j]; } - - - - if( eps_in_rhs) + if( eps_in_rhs == false ) //if rhs nt does not have eps, no need to go further { - rhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); - printf("Epsilon added back? : %llu\n", rhs_symbol_fset[ EPSILON / NUM_BITS ] & ((( 1ULL << (EPSILON % NUM_BITS)) ))); - if(temp->next == NULL) - { - lhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); - } + break; } - else // if a current nt does not contain epsilon , break + else { - break; + rhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); // set eps back to rhs nt + if(eps_in_lhs == false) + { + if(temp->next == NULL) // only add eps to lhs nt if rhs is last nt in the rule + { + lhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); + is_changed = true; + } + } } } else // if is_diff == false, break; { - break; + if(eps_in_rhs == true) + { + rhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); // set eps back to rhs nt + if(eps_in_lhs == false) + { + if(temp->next == NULL) // only add eps to lhs nt if rhs is last nt in the rule + { + lhs_symbol_fset[ EPSILON / NUM_BITS ] |= ( 1ULL << (EPSILON % NUM_BITS) ); + is_changed = true; + } + } + } + break; // } - temp = temp -> next; } // end of rule linked list traversal while loop } // end of else (non-terminal branch) @@ -456,3 +499,54 @@ void populate_first_sets() } // end of while - infinite loop until convergence } // end of function +ull *get_rule_first_set(rhsnode_ptr node) +{ + ull *fset = malloc(sizeof(ull) * BITSTRING_PART_NUM); + + for(int i = 0; i < BITSTRING_PART_NUM; i++) + { + fset[i] = 0; + } + + rhsnode_ptr temp = node; + + int sym; + while(temp != NULL) + { + if( temp->flag == T) + { + sym = (temp->s).t; + fset[sym / NUM_BITS] |= (1ULL << (sym % NUM_BITS)); + return fset; + } + + else + { + sym = (temp->s).nt; + + if( ( first_set[sym][ EPSILON / NUM_BITS ] & ( 1ULL << (EPSILON % NUM_BITS) ) ) == 0) //eps not in the nt + { + for(int j = 0; j < BITSTRING_PART_NUM; j++) + { + fset[j] |= first_set[sym][j]; + } + return fset; + } + else + { + for(int j = 0; j < BITSTRING_PART_NUM; j++) + { + fset[j] |= first_set[sym][j]; + } + + if( temp->next != NULL) + { + fset[ EPSILON / NUM_BITS ] &= (~ ( 1ULL << (EPSILON % NUM_BITS) )); + } + } // end of else - eps present in fset + } // end of else - is nt + + temp = temp -> next; + } // end of while - ll traversal + return fset; +} diff --git a/Code/parser.h b/Code/parser.h index c63a6a6..bbebd22 100644 --- a/Code/parser.h +++ b/Code/parser.h @@ -15,13 +15,21 @@ void parser_init(); void populate_first_sets(); +void populate_follow_sets(); + unsigned long long int get_nullable_set(); void print_first_sets(); +void print_follow_sets(); + void print_first(nonterminal a); +void print_rule_fset(ull *fset); + ull* firstOf(nonterminal nt); +ull *get_rule_first_set(rhsnode_ptr node); + bool is_superset(ull a[BITSTRING_PART_NUM], ull b[BITSTRING_PART_NUM]); #endif \ No newline at end of file diff --git a/Code/parserDef.h b/Code/parserDef.h index ba90df4..d8886bb 100644 --- a/Code/parserDef.h +++ b/Code/parserDef.h @@ -95,6 +95,7 @@ typedef struct } cell; unsigned long long int first_set[NUM_OF_NONTERMINALS][BITSTRING_PART_NUM]; //f_set for each nt calculated by considering bits of 3 unsigned long long int +unsigned long long int follow_set[NUM_OF_NONTERMINALS][BITSTRING_PART_NUM]; //f_set for each nt calculated by considering bits of 3 unsigned long long int // typedef cell *grammar_t; cell grammar[NUM_OF_RULES]; #endif From 4a20c23ad246f1f0a4b1c73e3dead6c0159e0bdd Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Sat, 22 Feb 2020 15:46:15 +0530 Subject: [PATCH 7/8] Follow Sets successfully done --- Code/parser.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Code/parser.c b/Code/parser.c index 4a8718c..61f0ad0 100644 --- a/Code/parser.c +++ b/Code/parser.c @@ -341,9 +341,11 @@ void populate_follow_sets() if(temp->flag == NT) { rhs_sym = ( (temp->s).nt ); + // printf("Calculating follow for %s\n", non_terminal_string[rhs_sym]); ull *rhs_rule_set = get_rule_first_set(temp->next); + // print_rule_fset(rhs_rule_set); ull *tmp_follow = (ull*)malloc(sizeof(ull) * BITSTRING_PART_NUM); - + for(int j = 0; j < BITSTRING_PART_NUM ; j++) { tmp_follow[j] = follow_set[rhs_sym][j]; @@ -351,7 +353,7 @@ void populate_follow_sets() bool eps_in_rhs = false; - if(rhs_rule_set[EPSILON / NUM_BITS] & ( (1ULL << (EPSILON % NUM_BITS) ) ) != 0)//eps present in this rule + if((rhs_rule_set[EPSILON / NUM_BITS] & ( (1ULL << (EPSILON % NUM_BITS) ) )) != 0)//eps present in this rule { eps_in_rhs = true; } @@ -363,8 +365,9 @@ void populate_follow_sets() follow_set[rhs_sym][j] |= rhs_rule_set[j]; } - if(eps_in_rhs == true) + if( (eps_in_rhs == true) || (temp -> next == NULL)) { + // printf("eps present\n"); for(int j = 0; j < BITSTRING_PART_NUM ; j++) { follow_set[rhs_sym][j] |= follow_set[lhs][j]; @@ -374,7 +377,10 @@ void populate_follow_sets() for(int j = 0; j < BITSTRING_PART_NUM ; j++) { if(follow_set[rhs_sym][j] != tmp_follow[j]) + { + // printf("is changed....\n"); is_changed = true; + } } // } // follow_set[rhs_sym][EPSILON / NUM_BITS] &= (~ (1ULL << (EPSILON % NUM_BITS) ) ); @@ -501,6 +507,8 @@ void populate_first_sets() ull *get_rule_first_set(rhsnode_ptr node) { + // if(node && node->flag == NT) + // printf("get_rule start at nt : %s\n", non_terminal_string[(node->s).nt]); ull *fset = malloc(sizeof(ull) * BITSTRING_PART_NUM); for(int i = 0; i < BITSTRING_PART_NUM; i++) From 980fe17cf061bd26f8385a8fbc56e923d77a404f Mon Sep 17 00:00:00 2001 From: SinghCoder Date: Sat, 22 Feb 2020 15:58:01 +0530 Subject: [PATCH 8/8] FIRST and FOLLOW sets implementation --- Code/.vscode/settings.json | 5 - Code/driver.c | 2 +- Code/exe | Bin 27256 -> 0 bytes Code/f1.txt | 55 --- Code/f2.txt | 55 --- Code/junkfile2.c | 624 --------------------------- Code/junkfile3.c | 849 ------------------------------------- Code/myalloc_myfree.c | 18 - 8 files changed, 1 insertion(+), 1607 deletions(-) delete mode 100644 Code/.vscode/settings.json delete mode 100755 Code/exe delete mode 100644 Code/f1.txt delete mode 100644 Code/f2.txt delete mode 100644 Code/junkfile2.c delete mode 100644 Code/junkfile3.c delete mode 100644 Code/myalloc_myfree.c diff --git a/Code/.vscode/settings.json b/Code/.vscode/settings.json deleted file mode 100644 index 0f06797..0000000 --- a/Code/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "files.associations": { - "stdio.h": "c" - } -} \ No newline at end of file diff --git a/Code/driver.c b/Code/driver.c index d8af533..5fe597e 100644 --- a/Code/driver.c +++ b/Code/driver.c @@ -68,7 +68,7 @@ int main(int argc, char *argv[]) // ull *fset = get_rule_first_set(grammar[0].head); // print_rule_fset(fset); - // print_first_sets(); + print_first_sets(); print_follow_sets(); } // end of main diff --git a/Code/exe b/Code/exe deleted file mode 100755 index d2c0cf948c7200da5c778fd538ca2e6b0aec5190..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27256 zcmeHwdwf$x`uC*mfr7Oy3W|yn1e6LCP=qR=4ecq3_GX(RE{mb`LMy#(njomKtO1u8 z!m_*S;$>I%r~4CK{ds*6UBx0u(4VXIURQjrE^1=yT5nht$?yBjB|SMLh4=Hm?|(ab z&dhx0d7gRZnd_NJhhOT&MJW~wQ!bTV!YJ03Es%VfkXIuE@bXz68;19pYz!NMcpAq< zdcN?GDqO!5t^9O}r@|+`yc7YL{V|eG`AS3bHJO&kvpNK-e8#LCAU@J{QVEa3xAK7U zyGR+h@L9rFe54-n z_SW3^+X+c)X=g51yQjKlg{!7vg{!{0v5C-{hH5u)cv@Gm`sHp%gm=UdQ^?F!y zbyHJo4MfyIEbR#m&|8(XTI8n3J43N^O@ zB~?iWx5w37-Ppnw6g%b@xX#bLAQqY#3!R^PA#*vLC9c{!cir;Fc2Aw#SyIr{+EVAN zUe;6xY0H~iTck9XAWcXJTT$OlG5nWm2*;;TEDaC!^He;Hsd4G##>NwA1bs$gBY8ZP z?UQlu>5apPyAyG8Y8+Sf)HXuk)8%-r@aYmyFSl|6g{!#Lggb3Q&Lk7QNaE8?xJ%-B zCcIwa`6j$g;&v0hM&jir+$ZrxCfqOar6zo{#2ZZb?spA4@rEd34dAQ9VYxuiSIYz?@GMWgnuCM zE)yP>c()1fm3Y{Me<|@k6aKx#`%O5TFWMoyBEH=Ymw37f&z5+$2_Gr((I)&fiCazh z1c^^F;pa(wx(Uyfc%BKLA@O_@ev!oOCj3%~mz(e+i7ztY4v8-{;l&bfFyU87yv>AP zDe+Y%+$HgKCcIYS8%_8%65nFNTO_{Kgu5la&4ha;zTJeclK3Mg{CbJ+G~sI{-eJOj zCGq_xe7(dwP54HMcbV{j#Jf%Str8EL@IOnu&xGG6@qQEju*AE{MEj?;Zi?&@XF>%J+Is-mDhBCI%fM*!+Ee0GDbj)ov;F&R$ zv26xC+kkI3;3Ew9BL>{KF579qPd3nZ7;yTgP;S2gAEiL}odz7MyO^{4deiLz?X7$( zvv26|q(nRIzP;&tSoFf-fT9yK@Xi{Wj}XNhh%eF|g`1c{aSO#edHjb<5vQgZ>EQ9N zC{9fv@(7QAMsce8$TlATh~iY`ku5y_4~kP&N7nK9>lCLdjVJboR;sp=vfJWl-eTM*}N#^(zdw(>d7pqPI;sdf-(-!&;vdjeHY=;L2em8U+b`<3`#<)ahN?OpG*ne5E{S&6 z=Vpyv&*d-KL$x#z2mfir7rgK!E+*#)`0cxoq}u&`_TAx2?UsG^*N%8bL*R#(2!RlobEdms|!A@U`W zdQeaZt**S*9!Q%w2_$yE-an%Qnq~-7422hNf(?Y)@gJDl920 zjFdH?>?wq9BGq5D^Q)|EeJ2FYkYv=S!gGo2%othl!l6bXXCZYxQiAh7gcm*y2{37w-M6NTc{2dNP4Gzs2LQuPhmf}@(CfogXh^yY*#jm0 zwe|}#c%^x?@KuMS(HHmd>iku2IePL@HF|5u50D+yeuI3T`U_dn|Bl_i?`!*Cs1^Eu z2&DUm+k+Kp_F#Tm){gXz6tN&;32k@+oMiW&W%gi@!~{5Guj#b=JMFu>Q|uO0!pIFo zLqW^#V-N&KHBvhVss9x!>YN{=Q6HCS@jXkq&0KqHXv0pG?V`|zXH)4Y=N{C4CG$>V1#L$aBz!sEr2js4HZ#=vGO@mI(+%} zqAGm)ItzOIMpVTLDCOhEx7Y)>QXNzE?WzSi)z1C>05)`FZLspe!_+!gAXPLI+d^BF z%~?rfI2!48V>{`Z(Gk)+D1X><`+#YAkeik#l4(20#=bQj%)2L#k4$6lI5^I6gt6xCxFkdKM@%Lkv{zyi*L;{GuJ*INkP=^?{@h23YJj#58shZ zp11oxv)KJKIiT@|^sSV+!~cS97X_ZC?!_rAoqwb7(}dc;``W&uBio*{7ZUBh4hY61 zKodcrN@){06pe;-K797%0PhT7Sl^npBP}IsR|o9-F4FzGV+k8GQhfSO3lY_HaZ#Z8 z7Bu#Y_PAljy_9dy?0~-4zSS_qq!wCXTsaGg*^QxCFhe>>VTPXJK7B82uy@YdJ;nZm z0sRRK++D_jdmCjEtm|HE`?+n2?dm0B4EObBhxBhK)5sWDE}(ytxQi~+tqd&}W=uE` z2S-^hf`&BC(!9F&Ts4nej9iln9R;DhKVV3UtcMOTNPi$d&DU@7&f{U0Ji{${h75le z%kaf-F@;W(%pC?3cFb3qk%l=jhOJ;Aa(f^@osX@)HT~!|zd%QMZa8!bfAAlpfLwcWxg~^LNffo(7V(t>tMr_18gV$$T3JUAjzky_n zcT!T~f6@E`;|G{u@9SBdly0Aq&bOxTV(+x1w7&x_iLC1N?|$PW)ah3HFumWWhf&Nh ziaB2ll>J9Hlb?9GR5LkxP7aPbG|Nykdnc-!3jxt#M$I-koo({&_L2 zZvUEU_b__oVPF5HSvR>+aG%}}GxVoqZC=EO%Xue*JUo_D@^&yYYxDPpc7RFS?)P@f z!u70`CF^MvIw>WnGN4l+J8==bv&`h(gOPiNia}@Wfc7hYGobk@Xugx|6sy7MA-$7; z$nWuehA@Kg-Xt%w5>P;YGjU_Bw~4;5A4p1aIY?wr?n7tlL%XIqh4SE0v|Nly2X}{u zU?rd*h|4FgmVcmOgxB&xU-zX6DmrV!z7*1x%i8Dva!_dx#z~_U3^kii{b(uvLw5gb z_F+eB(`RTjO~j8mR$1sDi0s@2S%{qD5n~GFRSgb)~20Vts5D~H_6 zXjG=MCFk#19nkKZP6bHwoDk6N;=v)_Q;;+5IYP2VlDGW}3ner+?Rrj-PPA=2nBg4* zT4YSL?_xa|e&X|JwBDYzw&NICqX_%OTkkH7rq3gw9}UpB~Yu6eQ|z6 zBL`Y>EGcly3nVxeR~;m%y~s&NT6|mzK14~_oV9=4NjZfz>=tf;mEt~0SN5t!KUVuz z`Xmi#|H_TEzz+~47tr2Ch=%f@_6CK5b7_|Z&1d)1v~c2scli4rs@B84HHU|LP793s zPY`1I!!*eG?nD&L($L~%eru6{7Eg_!od<_|PeDIG5SW~2T=_&!*ShP13*RUgm zXQL8VCS|~GfM6I822AlR@*o)2t@$9=3n`fZ&mr&+0-QtO_fxEF5`i}ga0-D30p<|c zEWpJCz6uZ@o-DFRfJ+E`8X&d>SfoIJlL-6^z$pag3NVMjEdZwyc$xsCUK)$?fDAI0 zM83@8`dd92D8+Ek5WI(ZCui-_Kfu^QkQd$gAuK?7Mc#`*NIM5Pok%%7ESM)#_9Fly z$|83Na4La^CtLZMd}Ma89WdB zb1=Bl?IExa0efT?Xu_|4ddw2u00w^ep;2LuUD^N4Kq0A)!lTWF{2T3hBrW7vWaQUQ z#5<%7#hcteSeNi=<)5HF7$VLfVap zrc?9;DiV!tVV$bUSZ`t4!3gNv64zTXB>1(9@eXM-@g_G|k?uQTph)XrJyCCyxrTf& z_~KkE8xA3nRiF%yICg1wfdQl}vH4UV9&WYh;fKBiHxRM~VxyW_}a7syoI z{Xu3^aO`aQ&XJ{AP1$H4B8sYEk!uhY6?8A(Q9^yF(O`ktN=Z^de}x3Uc01k}|L`U^ zSOpC+==}j&v#6l>k$(aj6(91S<|{bN-H-AW+y#)2F2o|w3?t-bzDvhm(y?@a^6GNQC0i|YDJie&t5rTrz7OZW1tCXrz6UbEAK&6SmJ%YFVj6$ zC~*r`grYD%PN2d>@D6GJ#GBk;Mp|BYdT!~u|r4?@EECb77RER=vLinXCB2s^2|YYoi72_q0gCF~xd@{s-i1`5{3RgpYg6#X{4a?IQ~q|yQ5JN^ zG~sjgr{k^sM-Y)V(4f8Xss43~*PbgE)HcR){p4)Y{!P5`{ew3rNdD~?u^DnbJ(erC>ucD)Z0Vb}ACEU5i8 zCMS){`Dz@=>BXDeU~*iLqwJc8Iu~~3g?VV4l~MgiX2rB``;yx=g@gvRyjZTALF(6T zz&oU^#GBk;xkl0ka(x~a5xH`^&K_rFRDaW8TI;K(sS7{+vsr)!p+LKBui_fdO zV0nX;Mu2?+sdNWuLRt!<6Db;?sNPL$dt|*0-zLHupN}PLyoYy4dkb%JgIU84Iif4A z`GQwD%|DQMMNHzIT%w;#^c{!9LufRrFt7Hh!th0KA=uA_{K(%ya~xIvS3x#jsML|A z3YpGTI&>BlvJY=^gB7wy$T^M3g4*>lIUV5jYyZXNY(EY;X9zh~A?L!FoK;*-J(sie zIOM!x;l;-oO!|BQlZ(om&gGoN<%~WKIRPODvlGd=KPD%9CY9@LyhBR;3A_lEsd2XRb;`e8je z*iFLrFFrCeu`!hOu-<$7cWdw)jKcxFcOcgL+L|zJ;QsP85Q&XJIt`zMtmz=Pi?(p4 zAQ!*h8`2Mw1w(q6yx55y5n(fD2g};kg)E<@@y5UJxohEtYp|1*^>CN}-KX!{zx;jw z{_j2urVZcSulb+%@88`&mUghPw`{>H#nPFx+y9cK>L8ugLCze#_l7?PN)8?`Cl333 zdnYZ9oeNoMhYvqGK_U32+SyPA2eQgKMZzYB=~S&Bzj;WW4VcLA@rv#TDo&_y!q+KI zk5lbA;@3PlSqN&s#EKoeVmS2m+(PFCM^Obw6@o_ZseOv`Qk@3BQ3IF>>2@jY%$-1@3Qa**p1tT>6w5#2zZ46tb+*HOu(}QU^FLS z0|5^bfU%2!YYDiM0DQ#|&_uw^1Ry{_H36#$pwB7*P68STAiV)xPQaA}&`J}4jesHo z<_KUm0T&WLJ4~R+C13&pV+3$60Vfl1rU1qekV3#&0>~oZOPopU8Y_TQ0=fyHH9Ca; zgd&7rC4d&>0KOz(7Xet;5by~B4-tS>8v*YTa2Emi!XcoOfB*p(3Lr$lbp*^30PPuv z8wnUEfCmX!M8MesxSN0l1W?a`(BBd;i-1W2_%#6&3CIz^^#qJ0;5-4e5s*p%P5zLv zoPe+B&?UY@sNOIVNdvthatF8)HmTsKyQf-BjdaOd{3jaY6EcXl4<5Ar9`FC}PIkJ; z2%Riz{aiX4#>mKC+%xdgQT}^P+m*J34*xs!OP}K4#6u(aPf@3mUupCIyr+IYTI~O!*ngVV|&Umr8{|Sx*owNAb{?77ZoL6(v|9eIm*L@T zxx2c#x!TQ1#9yVMV98vG%~4ujQMRDMRuWHAQdU@1tQYD9#kLBY(@|DhDbtTiP<&-h zyIvtu6|G1rtZ*z;DTnf!QE4JDB)AG~mG&5W zS(P&}i!mXNrO@WIIj<_$VN$;yb@eg0tQyR&{kZfI|`3V z6A8A83Ui91v`}Akv`(tdEA*;LlWJw<)cg_@-BEaSb#uugA+GL3j{2hV3LS+kD>Z2V z)R{1`LNDg6-&Rb?1}@@Am{?r4z)@f;E-N<>^WXrrDXps1R&kZB!r`=+=uSt$Kt_~D z!op+nSed^;(ynOgyo!|_3eQbly&%^TuCbRbly7ul+co$>N1nRh5Bh9+}T zg{^eKQSynyI-Ft5oq7od-F7Q0a-eZfn%JIWooBtCpefj8SOYTTC`T}+mlje?jDQ0n z2|I0<>y?a;P^|Kb`DOGfU>M`7Dlpt)@ThQ-1PG*2SZCO8sJ>ztN%Uw`NoiFHtDskgb!e)lF8N{@~ntv2|kmaE8AI zuUn2kBFEo}H?d`{txa{+EsXwJ-0iMjjlT=WUxKr`mReR@SJPDOu4B!uwO;&fcdff| zCAiw$t+aGt__K7^%IYR>9S}%l-u60nZKJ1wwN)dz4u2Wm&OB};*HVeR0qX<#+gpk4;uRzQLJYHt&kGYyT%-*QIkoNm+$t=R8+ zDH@%E)&7D1hTY-!!aofB(96+i6XGwt5{+(xKdLhteGvX8_%FkM{Pk$G7ry_^Xmlj@ zPWEA;ITQXPZ$l3JSMYV@f$#2$MsIIpX~}o*QA;SBc-2nSE1A!HGjsU$Q_YuFDO8XZ$@H zEssmTJeGbJ(yLJq^qC&xPjM4F=YIz9PNbg;d&bgJ&rTtN34jjZc@FhPKjn+1U%ojt zn6^pt4N2Wc8I3_w8q%9luk(y_sd+IyCIG(~OYYA=8>AEj9 zv(eC)jlxL34db2$e?UjZ+Y0584%=UkzV?+dkCe|v2EeF|PXWy$^us!$nV*^co0R#P zqc^AOnbu(1{LD$4G<)Xsn}*mk^L#@~GV|S;dA7{yw#-TMGp(Ql{rt>y{u>wS-|HZU zRQb32w*~%ffqz>-Ss-?w42`95G@OxBeujv$blUBNqs=68x6r;M932ReQ}@)+T8EsP zn`u9joVu@u#%FRW{o5naR*G+xgOf%Pxal%LyNu*IWIFcnxEm)G#11BRbRmf@ z|DV#|A^m;Qe_Q&WNdJ)Zhhz)+r%3-C>CcjWk@OczUr-BV6(|ACJ0!N8Puh?$=TG=?tGQ%BSwjQTOAh`*0@6^z)>z?zd6**{J(#)O|HYlFlK0 zb^naIZ${lOqwbTbm2}rgU)>L*?t@YHzo`3O)cr2%J{NU=%X-PL?q>+IZ_xznf4@bcJZJK z|I3+TL_|0hzg_ht#M5HqoRKRPKWkOvTRc7_HV($)Lzx;M;_-B*#+7({7*pd{JU%=& z&c)*=#K!x0JcC(fzm3OFWJ?qD&s6*pPu;f}PoEW=KjQIhwoT3}@%V_?y^-0mCc_+~@w;y|Zx|C(X_iPZuB6f|P?1QEnk5IMSP)EO zUq%NI5u*7v1$LvKqZl222*B}f6FyGj>i##yGaCU5^huZcP=4f!@L2E_t>$A6;?<0Y z@>S~rg|9}ylE9u@fhW@c&q?qHlHiXe!IRl%7w9ecUE>%B0w$M8pO-n!$w!sejiKI8TpYJ(-bUUQ)KTCpkayIajV9zbGJ*(m1e2LGY4Ffp-4GaHLj&msPMSDfo zI7CF+fG4`g@l#~P|1`nWKG%bu^to%i;OF1g2yWu|(HU$@g8#21cu2~hDHnizZvtYw zlF;`6C;hFmeWpnj`jgNPN4bc8f{8vGcsBJ%Q@=YEIF)y{T+s178RT(}lrzaB=R!$8 z_F|z3|1L*x3fJf8@oE95mr5JF#YymMrTok33>>iFAC5!O$N$~m5hqtQ+D<{m574w$Nce>11t(y;_x ztQhtp_c3r@FQ#_*Rg!#%a64qO*XTzBaMOjyvZOvsWxiA8cft51cFU7;PR|np`2Gum z^dmj1lxg&?l=LUcA)4>WfuIifh?G;AdHv`n93@lplxnxX;`nH`?MjhLzf7PX8B#m6 z%5k7hW_~Bfjhcw?!%1-ZeIv;^|7=0W_tp^n6Swndmfd6(3G&^;?L095sy*0Oq#TQE zhkOqVJnu@pLVlp#E9HC)JR9xJ+`qn*^w*6LgnZ8x!9xX7u^vb;+qzU^u&*TgPA6gP4?DDOC-;9RNa7Rndx&1~npfC~wdPnPmNDs#|nd4-U#`nf4SmXAzd%X4axivV|P&eYbJk72ex;&R| z0j_OzEpKXFR^8;P^|ZR%UDe)ItfsZOtqDgdwYjrrT{Jr}5#8w9=;CKLuDTYF8+Q!T z`HQR8+uXbwEQXj1l%6zJ92dr?h;t;D3s-l$bi0&bFRWm$1;u6aZN)gcE2_k?qSH3NSSP9c9^#sI zFE0=sFO}A<@;K>tEb2<+{Cssc@v-h97WX5^(?eo`yjK}FF~_ILdzIrsx}MpD&`B5c zbGLh3?Q|_OziT<3h71VvFT?f7E&LYd_+*&HgL{(eYO6if{0yyqb@Q^;COS%E*LWcp zhhy&Q8q_&!ud8;~G!(bCuJE>r>y^bV(D4Gz_eT$;&SX@H>W4m1yQvBHPrI+JcGtSv zJap!UDo1eLZG(UNC$zL)dv9`U{)ll71OE*zF${@M6 zvBiah$yyexGgVG%Xe3?Up&g&CxU)MR;~hh=)K|BAN?L36Rc)-X-PP{JjofGrki4uN zQGR~PZ_*Y9#$Dhob*`3H*4EnQZ9?rE8xV|F>A_)Vv$z>NK1(FJTD(n7)D5Y<$$P`& ziC{kb*ef0rZIEC89iJc@7gTkY6sZ18_j#)!39#`tZ{v;O@si^193M!!K#nBBP%Ia% z8#MI@7mgQ26H{{&f)h&?6 zxR3VBQ*b2R-q_j_7jYrYUDs4igfi5IW8_?3-?<3pE^kHDgYkvs((nnQR^B0V>l);M z*-#51DkfOP$SF8f7&%loH`YKW>d(1o04x`yb~8p|F$4VHy=nPRQy`Wf+^KyN<*WT) znapWbn0%Vlyr%AYBTmHvuO`74l4d(-OPF1U0NGUWt9>Wst9^5ozq$PD zCI5UWQ0>DjU+voy|0#IP{F{N1T}eN3>fA#4@^erm;!V05ogWb+zuNCoem*4{oZ602 z{w+u|@-LMM%2)a-5h||yKbrW}{+aU6RttG4K;^6C-DBcc=QPTf%TPsrRQd0h{Hp%c zcaHK8Dn+FPYfO>|JPMD>ulSSwJ1>gOZ2#v>{Azzs`8%m|7@SER_9D_~uS}7teH``g zz?kV(T={Q-m$o<+zuG68UIGwK(TUhVT;U%gf!dzpSNluq-`VMt?I+egr65(0w68`a zieK$NPAU_GU6NjTDqrQ(caM=jwoh3uIaI~Ss8#yP>qCHKsr+N#BNc)oTYg_DdaDXZ z|6BN!|8x`1B)_Toa#|vlX5qd`MJ{Q&%AW>TIGSg2@u+gE@srwjOm~Ep%U>cw- -#include -#include -#include -#include -#include "lexerDef.h" - -void lexError(char *errStr, int line_no) -{ - printf("%d ) Lexical Error : %s\n", line_no, errStr); -token_name searchLookupTable(char* lexeme){ - char *lookupTable[30][2]; - return ID; - - -} - -TOKEN getToken(){ - - if(lexeme_begin == BUFFER_SIZE){ - lexeme_begin = 0; - } - TOKEN t; - - int lex_size = forward_ptr - lexeme_begin; - if(lex_size <0){ - lex_size+= BUFFER_SIZE; - } - lexeme[lex_size] = '\0'; - - if(2 == state){ - - if(lex_size >20){ - t.name = LEX_ERROR; - return t; - } - - token_name name = searchLookupTable(lexeme); - t.name = name; - t.str = lexeme; - return t; - } - - if(4 == state || 6 == state){ - t.name = NUM; - t.num = atoi(lexeme); - } - - if( 8 == state || 12 == state){ - t.name = RNUM; - t.rnum = atof(lexeme); - } - - return t; -} - -void retract(int num){ - forward_ptr -= num; - if(forward_ptr < 0){ - forward_ptr += BUFFER_SIZE; - } - just_retracted = true; -} - -void init() -{ - state = 0; - buffer[BUFFER_SIZE] - lexeme_begin = forward_ptr= 0; - just_retracted = false; - line_no = 1; -} - -FILE *getStream(FILE *fp) -void getStream(FILE *fp) -{ - return fp; - //printf("stream: "); - //for( int i=0; i' == c){ - state = 30; - } - else{ - state = 28; - } - break; - - case 28: ; - retract(1); - t.name = GT; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 29: ; - t.name = GE; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 30: ; - t.name = ENDDEF; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 31: ; - c = getChar(fp); - if('=' == c){ - state = 32; - } - else{ - t.name = LEX_ERROR; - //printf("%d", state); - return t; - } - break; - - case 32: ; - t.name = EQ; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 33: ; - c = getChar(fp); - if('=' == c){ - state = 34; - } - else{ - t.name = LEX_ERROR; - //printf("%d", state); - return t; - } - break; - - case 34: ; - t.name = NE; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 35: ; - c = getChar(fp); - if('=' == c){ - state = 36; - } - else{ - state = 37; - } - break; - - case 36: ; - t.name = ASSIGNOP; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 37: ; - retract(1); - t.name = COLON; - lexeme_begin = forward_ptr; state =0; - //printf("colon %c %d", buffer[forward_ptr], forward_ptr); - return t; - break; - - case 38: ; - c = getChar(fp); - if('.' == c){ - state = 39; - } - else{ - t.name = LEX_ERROR; - //printf("%d", state); - return t; - } - break; - - case 39: ; - t.name = RANGEOP; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 40: ; - t.name = SEMICOL; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 41: ; - t.name = COMMA; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 42: ; - t.name = SQBO; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 43: ; - t.name = SQBC; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 44: ; - t.name = BO; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - case 45: ; - t.name = BC; - lexeme_begin = forward_ptr; state =0; - return t; - break; - - default: ; - t.name = LEX_ERROR; - //printf("%d", state); - return t; - } - - - c = getchar1() - } - - return t; -} - -void removeComments(FILE *ip, FILE *op){ - while(!feof(ip)){ - c = getchar(fp); - - } -} -void lexError(char *errStr,FILE* fp) -{ - -} - printf("%d ) Lexical Error : %s\n", line_no, errStr); - char c; - while(1){ - if(getChar(fp) == '\n'){ - line_no++; - break; - } - } -} - -void removeComments(FILE *ifp, FILE* ofp) -{ - state = 0; - int x = get_char(ifp); - while(x!=EOF){ - switch(state) - { - case 0: - if('*' == x) - state = 1; - else - fputc(c, ofp); - break; - case 1: - if('*' == x) - state = 2; - else - { - state = 0; - fputc(x, ofp) - } - break; - case 2: - if('*' == x) - state = 3; - else {} //nothing - break; - case 3: - if('*' == x) - state = 0; - else - state = 2; - break; - } - x = get_char(ifp); - } -} - - while(c != EOF) - { - if(!incomment && '*' != c) { - putchar(c, ofp) - } - if('*' == c && 0 == onestar) - { - onestar = 1; - } - else if('*' == c && 1 == onestar) - { - twostar = 1; - } - } -} diff --git a/Code/junkfile3.c b/Code/junkfile3.c deleted file mode 100644 index 61c505a..0000000 --- a/Code/junkfile3.c +++ /dev/null @@ -1,849 +0,0 @@ -#include -#include -#include -#include -#include "lexerDef.h" - -void lex_error(int line_no, char *errStr) -{ - printf("%d ) Lexical Error : %s\n", line_no, errStr); -} - -void init() -{ - state = 0; - c = '$'; - id_count = 0; - line_count = 0; - lex_len = 0; - la_str[1] = '\0'; - forward_ptr = -1; - get_stream(fp); - //buffer[BUFFER_SIZE] -} - -//Initialize/Refill buffer -void get_stream(FILE *fp) -{ - // return fp; - int num; - - //Single buffer treated as two buffers. - //Forward pointer = -1 at the beginning - if( -1 == forward_ptr) - { - forward_ptr = 0; - num = fread(&buffer[0], 1, BUFFER_SIZE, fp); - } - //Second buffer filled up - if( BUFFER_SIZE == forward_ptr) - { - forward_ptr = 0; - num = fread(&buffer[forward_ptr + BUFFER_SIZE/2], 1, BUFFER_SIZE/2, fp); - } - //First buffer filled up - else if( BUFFER_SIZE/2 == forward_ptr) - { - num = fread(&buffer[0], 1, BUFFER_SIZE/2, fp); - } - //If not enough characters left to read, append EOF - if(BUFFER_SIZE/2 != num) - { - buffer[num+forward_ptr] = EOF; - } -} - -void removeComments(FILE *ifp, FILE* ofp) -{ - state = 0; - char x = get_char(ifp); - switch(state) - { - case 0: if('*' == x) { - - } - } - - while(c != EOF) - { - if(!incomment && '*' != c) { - putchar(c, ofp) - } - if('*' == c && 0 == onestar) - { - onestar = 1; - } - else if('*' == c && 1 == onestar) - { - twostar = 1; - } - } -} - -/* - look for atoi() and atof() - EOF == c? case 0: state 46 - while ( (c= getchar()) != EOF) me c should be int -*/ - -void add_char(char next_char) { - if(lex_len <= MAX_LEXEME_LENGTH) - { - lexeme[lex_len] = next_char; - lex_len++; - } - else - { - lex_error(line_count + 1, "Identifier length exceeds 20"); - } -} - -char get_char(FILE* fp) { - /*if(){ // if buffer is read completely - getStream(fp); - }*/ - //c = fgetc(fp); - //forward_ptr++; - ////add_char(c); - if((forward_ptr == BUFFER_SIZE || forward_ptr == BUFFER_SIZE/2)) // && just_retracted == false - { - get_stream(fp); - } - char c = buffer[forward_ptr]; - int lex_index = forward_ptr - lexeme_begin; - if(lex_index<0) - { - lex_index += BUFFER_SIZE; - } - lexeme[lex_index] = c; - forward_ptr++; - //printf("%c %d", c, forward_ptr-1); - // just_retracted = false; - //return c; - return c; -} - -bool is_alphabet (char c) -{ - if( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ) - { - return true; - } - else - { - return false; - } -} -bool is_digit (char c) -{ - if(c >= '0' && c <= '9') - { - return true; - } - else - { - return false; - } -} - -bool is_delim (char c) -{ - if( (c == ' ') || (c == '\t') || (c == '\r') || (c == '\n') ) - { - return true; - } - else - { - return false; - } -} - - -TOKEN getNextToken() -{ - TOKEN t; - //use the feof instead o - while(true){ - //for identifer and num - la_str[0] = c; - //perror(la_str); - switch(state){ - case 0: - { - c = get_char(fp); - if(is_alphabet(c)) - { - //add_char(c); - id_count++; - state = 1; - } - else if( is_digit(c) ) - { - //add_char(c); - state = 3; - } - else if( is_delim(c) ) - { - if('\n' == c) - { - line_count++; - } - state = 13; - } - else if( '.' == c) - { - state = 38; - } - else if( ';' == c ) - { - state = 40; - } - else if( ',' == c ) - { - state = 41; - } - else if( '[' == c) - { - state = 42; - } - else if( ']' == c ) - { - state = 43; - } - else if( '(' == c ) - { - state = 44; - } - else if( ')' == c ) - { - state = 45; - } - else if( '+' == c ) - { - state = 15; - } - else if( '-' == c ) - { - state = 16; - } - else if( '*' == c ) - { - state = 17; - } - else if( '/' == c ) - { - state = 22; - } - else if( '<' == c ) - { - state = 23; - } - else if( '>' == c ) - { - state = 27; - } - else if( '=' == c ) - { - state = 31; - } - else if( '!' == c ) - { - state = 33; - } - else if( ':' == c ) - { - state = 35; - } - else if( EOF == c) - { - state = 46; - } - else - { - state = TRAP_STATE; - la_str[0] = c; - err_msg = strcat("Invalid character read : ", la_str); - } - c = get_char(fp); - } - break; - - case 1: - { - if( (is_alphabet(c) || is_digit(c) || ('_' == c) ) && id_count<20) - { - //add_char(c); - c = get_char(fp); - id_count++; - state = 1; - } - else - { - //add_char('\0'); - state = 2; - } - } - break; - case 2: - { - //la_str[0] = c; - //perror(la_str); - ungetc(c, fp); - //lookup till end_ptr-1 - retract(1); - lexeme_begin = forward_ptr; - printf("ID (%s)",lexeme); // or KW - lex_len = 0; - id_count = 0; - state = 0; - } - break; - case 3: - { - if(is_digit(c)) - { - //add_char(c); - c = get_char(fp); - state = 3; - } - else if('.' == c) - { - //add_char(c); - c = get_char(fp); - state = 5; - } - else - { - //add_char('\0'); - state = 4; - } - } - break; - case 4: - { - ungetc(c, fp); - // do atoi() to get value till end_ptr-1 - retract(1); - lexeme_begin = forward_ptr; - printf("NUM(%d) ", atoi(lexeme)); - lex_len = 0; - state = 0; - } - break; - case 5: - { - if(is_digit(c)) - { - //add_char(c); - c = get_char(fp); - state = 7; - } - else if( '.' == c) - { - state = 6; - } - else - { - state = TRAP_STATE; - la_str[0] = c; - err_msg = strcat("Expected a digit after ., found : ", la_str); - } - } - break; - case 6: - { - ungetc('.', fp); - ungetc('.', fp); - // atoi( ( f_ptr .. end_ptr - 2 ) - lex_len--; //remove one . which was added - //add_char('\0'); - retract(2); - lexeme_begin = forward_ptr; - printf("NUM(%d) ", atoi(lexeme)); - lex_len = 0; - state = 0; - } - case 7: - { - if(is_digit(c)) - { - //add_char(c); - c = get_char(fp); - state = 7; - } - else if('E' == c || 'e' == c) - { - //add_char(c); - c = getc(fp); - state = 9; - } - else - { - //add_char('\0'); - state = 8; - } - } - break; - case 8: - { - ungetc(c, fp); - // do atof() till end_ptr-1 - lexeme[forward_ptr-1] = '\0'; - printf("RNUM(%f) ", atof(lexeme)); - state=0; - } - break; - case 9: - { - if('+' == c || '-' == c) - { - //add_char(c); - c = get_char(fp); - state = 10; - } - else if(is_digit(c)) - { - //add_char(c); - c = get_char(fp); - state = 11; - } - else - { - state = TRAP_STATE; - la_str[0] = c; - err_msg = strcat( "Expected +/-/digit after e/E, found :", la_str); - // invalidate the token read - } - } - break; - case 10: - { - if(is_digit(c)) - { - //add_char(c); - c = get_char(fp); - state = 11; - } - else - { - state = TRAP_STATE; - la_str[0] = c; - err_msg = strcat( "Expected digit after +/-, found :", la_str); - // invalidate token read - } - } - break; - case 11: - { - if(is_digit(c)) - { - //add_char(c); - c = get_char(fp); - state = 11; - } - else - { - //add_char('\0'); - state = 12; - } - } - break; - case 12: - { - ungetc(c,fp); - //atof() end_ptr-1 - retract(1); - printf("RNUM(%f) ", atof(lexeme)); - lex_len = 0; - state = 0; - } - break; - case 13: - { - if ( is_delim(c) ) - { - if('\n' == c) - { - line_count++; - } - c = get_char(fp); - state = 13; - } - else - { - state = 14; - } - } - break; - case 14: - { - retract(1); - ungetc(c, fp); - state = 0; - } - break; - case 15: - { - //arith_op , PLUS - lexeme[forward_ptr] = '\0'; - printf("PLUS "); - state = 0; - } - break; - case 16: - { - //arith_op , MINUS - lexeme[forward_ptr] = '\0'; - printf("MINUS "); - state = 0; - } - break; - case 17: - { - if( '*' == c) - { - c = get_char(fp); - state = 19; - } - else - { - state = 18; - } - } - break; - case 18: - { - ungetc(c, fp); - //arith_op, mul - retract(1); - printf("MUL "); - state = 0; - } - break; - case 19: - { - if( '*' == c) - { - c = get_char(fp); - state = 20; - } - else - { - state = 19; - } - } - break; - case 20: - { - if( '*' == c) - { - state = 21; - } - else - { - c = get_char(fp); - state = 19; - } - } - break; - case 21: - { - printf("COMMENT "); //ignore this token - state = 0; - } - break; - case 22: - { - lexeme[forward_ptr] = '\0'; - printf("DIV "); - state = 0; - } - break; - case 23: - { - if('=' == c) - { - state = 25; - } - else if('<' == c) - { - state = 26; - } - else - { - state = 24; - } - } - break; - case 24: - { - ungetc(c, fp); - lexeme[forward_ptr - 1] = '\0'; - printf("LT "); - } - break; - case 25: - { - lexeme[forward_ptr] = '\0'; - printf("LE "); - state = 0; - } - break; - case 26: - { - lexeme[forward_ptr] = '\0'; - printf("DEF "); - state = 0; - } - break; - case 27: - { - if('=' == c) - { - state = 29; - } - else if('>' == c) - { - state = 30; - } - else - { - state = 28; - } - } - break; - case 28: - { - lexeme[forward_ptr - 1] = '\0'; - ungetc(c, fp); - printf("GT "); - } - break; - case 29: - { - lexeme[forward_ptr] = '\0'; - printf("GE "); - state = 0; - } - break; - case 30: - { - lexeme[forward_ptr] = '\0'; - printf("ENDDEF "); - state = 0; - } - break; - case 31: - { - if('=' == c) - { - state = 32; - } - else - { - state = TRAP_STATE; - la_str[0] = c; - err_msg = strcat( "Expected = after =, found :", la_str ); - } - } - break; - case 32: - { - lexeme[forward_ptr] = '\0'; - printf("EQ "); - state = 0; - } - break; - case 33: - { - if('=' == c) - { - state = 34; - } - else - { - state = TRAP_STATE; - la_str[0] = c; - err_msg = strcat( "Expected = after !, found :", la_str ); - } - } - break; - case 34: - { - lexeme[forward_ptr] = '\0'; - printf("NE "); - state = 0; - } - break; - case 35: - { - if('=' == c) - { - state = 36; - } - else - { - state = 37; - } - } - break; - case 36: - { - lexeme[forward_ptr] = '\0'; - printf("ASSIGNOP "); - state = 0; - } - break; - case 37: - { - ungetc( c, fp ); - retract(1); - printf("COLON "); - state = 0; - } - break; - case 38: - { - if( '.' == c) - { - state = 39; - } - else - { - state = TRAP_STATE; - la_str[0] = c; - err_msg = strcat( "Expected . after ., found :", la_str ); - } - } - break; - case 39: - { - retract(1); - printf("RANGEOP "); - state = 0; - } - break; - case 40: - { - retract(1); - ungetc(c, fp); - printf("SEMICOL "); - state = 0; - } - break; - case 41: - { - retract(1); - ungetc(c, fp); - printf("COMMA "); - state = 0; - } - break; - case 42: - { - retract(1); - ungetc(c, fp); - printf("SQBO "); - state = 0; - } - break; - case 43: - { - retract(1); - ungetc(c, fp); - printf("SQBC "); - state = 0; - } - break; - case 44: - { - retract(1); - ungetc(c, fp); - printf("BO "); - state = 0; - } - break; - case 45: - { - retract(1); - ungetc(c, fp); - printf("BC "); - state = 0; - } - break; - case 46: - { - // ungetc(c, fp); - printf("LEXING DONE... "); - state = 0; - // exit(0); - } - break; - case TRAP_STATE: - default: - { - // ungetc() the character read - ungetc(c, fp); - lex_len = 0; //ignore whatever u read rn - lex_error(line_count + 1, err_msg); //line_no, msg - state = 0; - } - } // End of switch - } // End of while - /* - if(BUFFER_SIZE == lexeme_begin) - { - lexeme_begin = 0; - } - TOKEN t; - - lex_len = forward_ptr - lexeme_begin; - - if(lex_len <0) - { - lex_len += BUFFER_SIZE; - } - lexeme[lex_len] = '\0'; - */ -} - -/* - do getc() in each non-accepting state - ungetc() in each retracting state - and nothin in (accepting + non-retracting) states -*/ - -int main ( int argc, char *argv[] ) { - FILE* fp; - fp = fopen(argv[1], "r"); - if(fp == NULL){ - printf("FILE OPEN ERROR\n"); - exit(1); - } - init(); -// #if 0 - FILE *source = fopen("input.txt", "r"); - TOKEN t; - while(1){ - t = getNextToken(source); - if(t.name == END_OF_FILE){ - break; - } - else{ - if(t.name == LEX_ERROR){ - lexError("Error..."); - } - else{ - if(t.name != DELIM){ - printf("%s ",terminal_string[ t.name ]); - } - else{ - if(t.num == '\n'){ - printf("\n"); - } - } - } - } - } -// #endif -} // End of main \ No newline at end of file diff --git a/Code/myalloc_myfree.c b/Code/myalloc_myfree.c deleted file mode 100644 index cefcceb..0000000 --- a/Code/myalloc_myfree.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include - -int totalspacealloc = 0; - -void* myalloc(int n) -{ - void *a = malloc(n + sizeof(int)); - totalspacealloc += n; - *(int*) a = n; - return (a + sizeof(int)); -} - -void myfree(void *ptr) -{ - totalspacealloc -= *(int*)(ptr-sizeof(int)); - free(ptr-sizeof(int)); -}