我和我的项目合作伙伴目前在我们最新的大学项目中遇到问题.
我们的任务是实现一个玩Pong游戏的神经网络.我们将球的速度和球拍的位置传递给网络,并提供三个输出:UP DOWN DO_nothing.玩家获得11分后,我们将训练所有状态的网络,做出的决策以及做出的决策的奖励(请参阅reward_cal()).我们面临的问题是,仅根据学习率,损失就一直保持在特定值上.因此,即使我们将其视为严重错误,网络也总是做出相同的决定.
请帮助我们找出我们做错了什么,我们感谢每一个建议!下面是我们的代码,请随时询问是否有任何问题.我们对这个话题还很新,所以如果有什么完全愚蠢的话,请不要粗鲁:D
这是我们的代码:
import sys,pygame,timeimport numpy as npimport randomfrom os.path import isfileimport kerasfrom keras.optimizers import SGDfrom keras.layers import Densefrom keras.layers.core import Flattenpygame.init()pygame.mixer.init()#surface of the gamewIDth = 400height = 600black = 0,0 #RGB valuescreen = pygame.display.set_mode((wIDth,height),32)#(Resolution(x,y),flags,colour depth)Font = pygame.Font.SysFont('arial',36,bold=True)pygame.display.set_caption('PyPong') #Title of window#consts for the gameacceleration = 0.0025 # ball becomes faster during the gamemousematch = 1delay_time = 0paddleP = pygame.image.load("schlaeger.gif")playerRect = paddleP.get_rect(center = (200,550))paddleC = pygame.image.load("schlaeger.gif")comrect = paddleC.get_rect(center=(200,50))ball = pygame.image.load("ball.gif")ballRect = ball.get_rect(center=(200,300))#Variables for the gamepointsPlayer = [0]pointsCom = [0]playermove = [0,0]speedbar = [0,0]speed = [6,6]hitX = 0#neural constlearning_rate = 0.01number_of_actions = 3filehandler = open('logfile.log','a')filename = sys.argv[1]#neural variablesstates,action_prob_grads,rewards,action_probs = [],[],[]reward_sum = 0episode_number = 0reward_sums = []pygame.display.flip()def pointcontrol(): #having a look at the points in the game and restart() if pointsPlayer[0] >= 11: print('Player Won ',pointsPlayer[0],'/',pointsCom[0]) restart(1) return 1 if pointsCom[0] >= 11: print('Computer Won ',pointsCom[0]) restart(1) return 1 elif pointsCom[0] < 11 and pointsPlayer[0] < 11: restart(0) return 0def restart(finished): #@R_419_5990@ting the positions and the ball speed and(if point limit was reached) the points ballRect.center = 200,300 comrect.center = 200,50 playerRect.center = 200,550 speed[0] = 6 speed[1] = 6 screen.blit(paddleC,comrect) screen.blit(paddleP,playerRect) pygame.display.flip() if finished: pointsPlayer[0] = 0 pointsCom[0] = 0def reward_cal(r,gamma = 0.99): #rewarding every move discounted_r = np.zeros_like(r) #making zero array with size ofreward array running_add = 0 for t in range(r.size - 1,-1): #iterating beginning in the end if r[t] != 0: #if reward -1 or 1 (point made or lost) running_add = 0 running_add = running_add * gamma + r[t] #making every movebefore the point the same reward but a little bit smaller discounted_r[t] = running_add #putting the value in the newreward array #e.g r = 000001000-1 -> discounted_r = 0.5 0.6 0.7 0.8 0.9 1 -0.7-0.8 -0.9 -1 values are not really correct just to make it clear return discounted_r#neural netmodel = keras.models.Sequential()model.add(Dense(16,input_dim = (8),kernel_initializer ='glorot_normal',activation = 'relu'))model.add(Dense(32,kernel_initializer = 'glorot_normal',activation ='relu'))model.add(Dense(number_of_actions,activation='softmax'))model.compile(loss = 'categorical_crossentropy',optimizer = 'adam')model.summary()if isfile(filename): model.load_weights(filename)# one ball movement before the AI gets to make a decisionballRect = ballRect.move(speed)reward_temp = 0.0if ballRect.left < 0 or ballRect.right > wIDth: speed[0] = -speed[0]if ballRect.top < 0: pointsPlayer[0] += 1 reward_temp = 1.0 done = pointcontrol()if ballRect.bottom > height: pointsCom[0] += 1 done = pointcontrol() reward_temp = -1.0if ballRect.collIDerect(playerRect): speed[1] = -speed[1]if ballRect.collIDerect(comrect): speed[1] = -speed[1]if speed[0] < 0: speed[0] -= accelerationif speed[0] > 0: speed[0] += accelerationif speed[1] < 0: speed[1] -= accelerationif speed[1] > 0 : speed[1] += accelerationwhile True: #game for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() state = np.array([ballRect.center[0],ballRect.center[1],speed[0],speed[1],playerRect.center[0],playerRect.center[1],comrect.center[0],comrect.center[1]]) states.append(state) action_prob = model.predict_on_batch(state.reshape(1,8))[0,:] action_probs.append(action_prob) action = np.random.choice(number_of_actions,p=action_prob) if(action == 0): playermove = [0,0] elif(action == 1): playermove = [5,0] elif(action == 2): playermove = [-5,0] playerRect = playerRect.move(playermove) y = np.array([-1,-1,-1]) y[action] = 1 action_prob_grads.append(y-action_prob) #enemy move comrect = comrect.move(speedbar) ballY = ballRect.left+5 comrectY = comrect.left+30 if comrect.top <= (height/1.5): if comrectY - ballY > 0: speedbar[0] = -7 elif comrectY - ballY < 0: speedbar[0] = 7 if comrect.top > (height/1.5): speedbar[0] = 0 if(mousematch == 1): done = 0 reward_temp = 0.0 ballRect = ballRect.move(speed) if ballRect.left < 0 or ballRect.right > wIDth: speed[0] = -speed[0] if ballRect.top < 0: pointsPlayer[0] += 1 done = pointcontrol() reward_temp = 1.0 if ballRect.bottom > height: pointsCom[0] += 1 done = pointcontrol() reward_temp = -1.0 if ballRect.collIDerect(playerRect): speed[1] = -speed[1] if ballRect.collIDerect(comrect): speed[1] = -speed[1] if speed[0] < 0: speed[0] -= acceleration if speed[0] > 0: speed[0] += acceleration if speed[1] < 0: speed[1] -= acceleration if speed[1] > 0 : speed[1] += acceleration rewards.append(reward_temp) if (done): episode_number += 1 reward_sums.append(np.sum(rewards)) if len(reward_sums) > 40: reward_sums.pop(0) s = 'Episode %d Total Episode Reward: %f,Mean %f' % (episode_number,np.sum(rewards),np.mean(reward_sums)) print(s) filehandler.write(s + '\n') filehandler.flush() # Propagate the rewards back to actions where no rewardwas given. # Rewards for earlIEr actions are attenuated rewards = np.vstack(rewards) action_prob_grads = np.vstack(action_prob_grads) rewards = reward_cal(rewards) X = np.vstack(states).reshape(-1,8) Y = action_probs + learning_rate * rewards * y print('loss: ',model.train_on_batch(X,Y)) model.save_weights(filename) states,[] reward_sum = 0 screen.fill(black) screen.blit(paddleP,playerRect) screen.blit(ball,ballRect) screen.blit(paddleC,comrect) pygame.display.flip() pygame.time.delay(delay_time)
这是我们的输出:
pygame 1.9.4 Hello from the pygame community. https://www.pygame.org/contribute.HTML Using TensorFlow backend. _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_1 (Dense) (None,16) 144 _________________________________________________________________ dense_2 (Dense) (None,32) 544 _________________________________________________________________ dense_3 (Dense) (None,3) 99 ================================================================= Total params: 787 Trainable params: 787 Non-trainable params: 0 _________________________________________________________________ 2019-02-14 11:18:10.543401: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your cpu supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA 2019-02-14 11:18:10.666634: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with propertIEs: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:17:00.0 totalMemory: 10.92GiB freeMemory: 10.76GiB 2019-02-14 11:18:10.775144: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 1 with propertIEs: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:65:00.0 totalMemory: 10.91GiB freeMemory: 10.73GiB 2019-02-14 11:18:10.776037: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0,1 2019-02-14 11:18:11.176560: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix: 2019-02-14 11:18:11.176590: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988] 0 1 2019-02-14 11:18:11.176596: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0: N Y 2019-02-14 11:18:11.176600: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 1: Y N 2019-02-14 11:18:11.176914: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10403 MB memory) -> physical GPU (device: 0,name: GeForce GTX 1080 Ti,pci bus ID: 0000:17:00.0,compute capability: 6.1) 2019-02-14 11:18:11.177216: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10382 MB memory) -> physical GPU (device: 1,pci bus ID: 0000:65:00.0,compute capability: 6.1) Computer Won 0 / 11 Episode 1 Total Episode Reward: -11.000000,Mean -11.000000 loss: 0.254405 Computer Won 0 / 11 Episode 2 Total Episode Reward: -11.000000,Mean -11.000000 loss: 0.254304 Computer Won 0 / 11 Episode 3 Total Episode Reward: -11.000000,Mean -11.000000 loss: 0.254304 Computer Won 0 / 11 Episode 4 Total Episode Reward: -11.000000,Mean -11.000000 loss: 0.254304 Computer Won 0 / 11 Episode 5 Total Episode Reward: -11.000000,Mean -11.000000 loss: 0.254304 Computer Won 0 / 11 Episode 6 Total Episode Reward: -11.000000,Mean -11.000000 loss: 0.254304
最佳答案那是邪恶的“露露”,显示出它的力量.Relu具有一个没有渐变的“零”区域.当所有输出都为负时,Relu使所有输出均等于零并消除反向传播.
安全使用Relus的最简单解决方案是在它们之前添加Batchnormalization层:
model = keras.models.Sequential()model.add(Dense(16,kernel_initializer = 'glorot_normal'))model.add(Batchnormalization())model.add(Activation('relu'))model.add(Dense(32,kernel_initializer = 'glorot_normal'))model.add(Batchnormalization())model.add(Activation('relu'))model.add(Dense(number_of_actions,activation='softmax'))
这将使该层的“输出”一半“为零”,而另一半为可训练的.
其他解决方案包括很好地控制您的学习速度和优化器,这对于初学者而言可能是个头疼的问题.
总结以上是内存溢出为你收集整理的python-神经网络无法学习(损失保持不变) 全部内容,希望文章能够帮你解决python-神经网络无法学习(损失保持不变) 所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)