From 080bc8287123f5f6be7ccb585940ac8c6e6d1767 Mon Sep 17 00:00:00 2001 From: MFilipe777 Date: Wed, 3 Nov 2021 19:29:04 -0300 Subject: [PATCH] Update: vision --- .../env/__pycache__/grid.cpython-38.pyc | Bin 0 -> 3186 bytes .../env/__pycache__/player.cpython-38.pyc | Bin 0 -> 1036 bytes .../env/__pycache__/plot.cpython-38.pyc | Bin 0 -> 1128 bytes .../env/__pycache__/utils.cpython-38.pyc | Bin 0 -> 493 bytes player_game/env/grid.py | 14 +++++------ player_game/env/rat_game_env.py | 23 +++++++----------- 6 files changed, 16 insertions(+), 21 deletions(-) create mode 100644 player_game/env/__pycache__/grid.cpython-38.pyc create mode 100644 player_game/env/__pycache__/player.cpython-38.pyc create mode 100644 player_game/env/__pycache__/plot.cpython-38.pyc create mode 100644 player_game/env/__pycache__/utils.cpython-38.pyc diff --git a/player_game/env/__pycache__/grid.cpython-38.pyc b/player_game/env/__pycache__/grid.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c53078b78e11b45b23b4f52883335a0db07d615b GIT binary patch literal 3186 zcmai0Pmdc(6|d@Uw|g9WGRcPBNm!tT5MX4+vl|wqRTO2EY?ej410>mtm#9_KRpV)o zyUSDEon$QMlze~{;=l=Q!~yaZ5aN;xpP_HSg}EakapL!?ZO1diE^XKAs`p;K_v-zt z{ zo^0G_UH=69tjnv0Fh(3>(ljEINmj%}SbaP!Zeir_11L7&k_{Zm2d>g!I*i96&sAtttoBq`rXox-;IlXpp;X~7#^P9+P^DpB5-9V5 z^j59z=V#J?mqdXM{@Ty%nOf773QkULp4?@_;vnzFZRh~ zUm2hk7_uUfLSnWs-=5VTp_T=qul2&bFoU#-GHFu484`x;Oc-DviB)mJ>?MT}CJ|(S z3S*r@&R7)5BN6V4I2l{L^TWOq{_tjaZ+Sd86|RI+jkV_8b5y-IQjnxA`T7}hnTC1~ zzK+Xr&F~{)_+_b*k*Ebe5|jd@jb|ef0;p19>qwu@_Es0?*i_P$ssi0zC^0^SKK?d#AVE zyWZP=XwUT7_Nmo%P)^gfd+$E1%|D@*QaYvDn9Q@vLET1>-hGehaBQ<1-eq znG48g4xlsh0Nq&w(33=M6uxxM|AwveIO$ldi-L7QUA${7tw~Nz!E7#JfvxwP#m;wq zg|>y<<@=P}FUMNF!`86%jE|Z)XHcw3XNFd@$)1rXT6XO$*B54N6kxQjGQbO0nQR=o zpL6q@b9m>_0e&Lk!_^%(SQh?29iQ`~-VpMmwuPNyUHZ@X9rp0oPyN-sPn)aOJtez> zSre88u&W8X921_xkxh;P?KKwmy!}-&Vas2^-Tva@toZ^>8S@V20nI`Cqh#od!R}py zEMq6s%TMeKX~j+&1gQCTH!6Q(yIbXFQLM)~y^VgHSM5ADN@daGL>32CXTCa6$?%}4 zJgGBfNLIxkR325NAsqvMsKq#XGJhWr*5U(&E|>$goE}#VTnIx|`C>9rne47rUarRb zrUMBER}nMo1U3j<0;qU02x>u)vzQ&fWacWU|GCpY(5dRDs9crz@94OsE|&L&Q6eh# z`VV$K`nZ4Z<9@pH(Ny*8<3>ZYy1vRD^~v_F$#E4#5juMjMc;tT{HFlS@m-(W;rPxb zfAj3`->vor-vDLPx#Dc_E5z-z`8s}A@Y`_K`DNbb(+*uuTMPLyIco9HoW^a8{AU1E z^CMigo9xibdsOu!zFcwX95TYv+iXksKE>UcapabR-0}_`bCQf2%;#(xbAQhJt@aR;{HRwUv0#pE^7{57NOn`_jH-b+)#tE`E=I=m_F#bb)am> zsB^t^&0Ba?{7J=+ZE$LEjgXg;JbHwCQ$}P;ft%19^8bR3Y0dfN*-yIZPJ<(DKHtEH=6&|k*|jOx<@GV%2Gq+b$GQ&}u;62Brys28r%OQHY|d6-B9*<(TH}C^|yk%qh(%k~)qe^L5(zEdt*o zK<+VJ0@N;70G-@WA<@VwjRMDetMy*c3f>4@OWEx8nu5GE(o2iPMsp5aG#Dmes|Qz(^0$KBrpHU< zxSL^nkpW{5<#1pU)Vpy6C zd*R=0#~bNDCHgiJdU_2wb0Y8SOu9yx!@8@Jmt(aFp6gS>0=UL4`j~Fe`@AMZpF?!I zzPMs!_d*{#0^3vw+Y}-%Ib-P z(M(o6nWS2=2?T^^aO55!g%loH6wfV1Vw)mxVfAZ#6*#qBR;;M4Qi3aXS&TWaiJZvV zm5P>Q^$m$OI56nPHG8rC~={RSFl3^lP601RYc6fXezCH=_!{aFnby`fs z=LtMv#U;q4I;bXfcfiZ^U0jK*P};ZDeL`4t0smKqzB6U`z$i4r3dNQw0QUnTTA-~v zaoi*u$HD`}zlB(Du0INn6_`D z8KRv#Nc9>z$4g@3XbZfuz$*(w*A}BK+NJ)=-!|=;C_B0i)4Z+z87z-|m1Jt#eqPCp zm&L>VDLHsBoNo8W`GAT2a=8BzW+x-(G%W=a7+MkF$e!d`aifKsKL9Vlk7Fi!{`r%J z?@xkVGfSao7x^h?iHKp9s_RWg=L=-CA%a-~jCM%|nxMcfE=i#26!Y=d3|aSPBc9C;TI z!Y=ml7WVKS?yNo^--FKw$=C+nXg5!kPKjp&SYVT!+fbqZuBdhVYN9U6!c=T_QaD;q oGSKO@Hi;T<8`+ru%Iv21C@<-lv&Xt)Qg6Bs@pmkbc(!N%1@RgJHUIzs literal 0 HcmV?d00001 diff --git a/player_game/env/__pycache__/utils.cpython-38.pyc b/player_game/env/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e2e6569c372c3a0a453d18da3e2b94330538a1d GIT binary patch literal 493 zcmb7Bu};G<5VajAq!r~85Ca2Q@W6^HDqyHob)c|hiA-(MRB;m7E>ciNKBOJ_2Y!Ra zD-*xaiE|NB2Nq7ccX#sB-MjO{V9-Zk?;kBc!w7x)V&4KNuE5PPfItG5DE)~zB_b4o zpszR$1fHQJ`T`D`ZZFuPR+6c@Fw(G_dcIP!vW9Ujd1fEk{b)R4cN3;Y;}^-eG18j3 zAM|TJkHN3O%@Cj?`qZK&UK7AzO>MZ0)(0K#(1vVqM>b@J{(6~#R1k)zN$g0IyPnm& zGEcQuE;MsqN*5N2=hD%#=E9NMINET#aCBK0RT89eq4KQaO6nfSaDkTQ4e3M3x3~Y= zvMG5hb(TXbCaY(*vPEgmo7VOH tuple: - discretazed_state = [] - for i in range(len(state)): - if state[i] <= self.state_bounds[i][0]: - new_state = 0 - elif state[i] >= self.state_bounds[i][1]: - new_state = (10,10)[i] - 1 - else: - new_state = int(round(state[i])) - discretazed_state.append(new_state) - return tuple(discretazed_state) + return tuple(state) def decide_action(self, state) -> int: @@ -188,6 +179,10 @@ def decide_action(self, state) -> int: return action def update_q(self, current_state, action, reward, next_state): + print("current:",current_state) + print("next:",next_state) + print("action:", action) + print("sum",tuple(current_state) + (action,)) self.Q[tuple(current_state) + (action,)] = self.Q[tuple(current_state) + (action,)] + self.learning_rate * (reward + self.discount * np.max(self.Q[tuple(next_state)]) - self.Q[tuple(current_state) + (action,)]) @@ -206,7 +201,7 @@ def update_epsilon(self, episode) -> float: def train(self): for episode in range(EPISODES): current_state = self.env._reset() - #current_state = self.discretize_state(current_state) + current_state = self.discretize_state(current_state) done = False