基于tensorflow的躲避障碍物的ai训练

import pygame
import random
from pygame.locals import *
import numpy as np
from collections import deque
import tensorflow as tf  # http://blog.topspeedsnail.com/archives/10116
import cv2  # http://blog.topspeedsnail.com/archives/4755
score = 0
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)

SCREEN_SIZE = [320, 400]
BAR_SIZE = [100, 5]
BALL_SIZE = [20, 20]

# 神经网络的输出
MOVE_STAY = [1, 0, 0]
MOVE_LEFT = [0, 1, 0]
MOVE_RIGHT = [0, 0, 1]

class Game(object):
    def __init__(self):
        pygame.init()
        self.clock = pygame.time.Clock()
        self.screen = pygame.display.set_mode(SCREEN_SIZE)
        pygame.display.set_caption('Simple Game')

        self.ball_pos_x = SCREEN_SIZE[0] // 2 - BALL_SIZE[0] / 2
        self.ball_pos_y = SCREEN_SIZE[1] // 2 - BALL_SIZE[1] / 2

        self.ball_dir_x = -1  # -1 = left 1 = right
        self.ball_dir_y = -1  # -1 = up   1 = down
        self.ball_pos = pygame.Rect(self.ball_pos_x, self.ball_pos_y, BALL_SIZE[0], BALL_SIZE[1])

        self.bar_pos_x = SCREEN_SIZE[0] // 2 - BAR_SIZE[0] // 2
        self.bar_pos = pygame.Rect(self.bar_pos_x, SCREEN_SIZE[1] - BAR_SIZE[1], BAR_SIZE[0], BAR_SIZE[1])

        self.ball2_pos_x = SCREEN_SIZE[0] // 2 - BALL_SIZE[0] / 2
        self.ball2_pos_y = SCREEN_SIZE[1] // 2 - BALL_SIZE[1] / 2

        self.ball2_dir_x = -1  # -1 = left 1 = right
        self.ball2_dir_y = -1  # -1 = up   1 = down
        self.ball2_pos = pygame.Rect(self.ball2_pos_x, self.ball2_pos_y, BALL_SIZE[0], BALL_SIZE[1])

    # action是MOVE_STAY、MOVE_LEFT、MOVE_RIGHT
    # ai控制棒子左右移动；返回游戏界面像素数和对应的奖励。(像素->奖励->强化棒子往奖励高的方向移动)
    def step(self, action):

        if action == MOVE_LEFT:
            self.bar_pos_x = self.bar_pos_x - 2
        elif action == MOVE_RIGHT:
            self.bar_pos_x = self.bar_pos_x + 2
        else:
            pass
        if self.bar_pos_x < 0:
            self.bar_pos_x = 0
        if self.bar_pos_x > SCREEN_SIZE[0] - BAR_SIZE[0]:
            self.bar_pos_x = SCREEN_SIZE[0] - BAR_SIZE[0]

        self.screen.fill(BLACK)
        self.bar_pos.left = self.bar_pos_x
        pygame.draw.rect(self.screen, WHITE, self.bar_pos)

        # if random.randint(0, 2) < 1:
        #     self.ball_pos.left += self.ball_dir_x * random.randint(2, 10)
        # else:
        #     self.ball2_pos.left -= self.ball2_dir_x * random.randint(2, 10)
        self.ball_pos.left += self.ball_dir_x * random.randint(2, 10)
        self.ball_pos.bottom += self.ball_dir_y * random.randint(2, 10)
        # if self.ball_pos.left < 0:
        #     self.ball_pos.left = 1
        #
        # if self.ball_pos.left > 320:
        #     self.ball_pos.left = 305
        pygame.draw.rect(self.screen, WHITE, self.ball_pos)

        if self.ball_pos.top <= 0 or self.ball_pos.bottom >= (SCREEN_SIZE[1] - BAR_SIZE[1] + 1):
            self.ball_dir_y = self.ball_dir_y * -1
        if self.ball_pos.left <= 0 or self.ball_pos.right >= (SCREEN_SIZE[0]):
            self.ball_dir_x = self.ball_dir_x * -1

        # if random.randint(0, 2) < 1:
        #     self.ball2_pos.left += self.ball2_dir_x * random.randint(2, 10)
        # else:
        #     self.ball2_pos.left -= self.ball2_dir_x * random.randint(2, 10)
        self.ball2_pos.left += self.ball2_dir_x * random.randint(2, 10)
        self.ball2_pos.bottom += self.ball2_dir_y * random.randint(2, 10)

        # if self.ball2_pos.left < 0:
        #     self.ball2_pos.left = 1
        # if self.ball2_pos.left > 320:
        #     self.ball2_pos.left = 305

        pygame.draw.rect(self.screen, WHITE, self.ball2_pos)

        if self.ball2_pos.top <= 0 or self.ball2_pos.bottom >= (SCREEN_SIZE[1] - BAR_SIZE[1] + 1):
            self.ball2_dir_y = self.ball2_dir_y * -1
        if self.ball2_pos.left <= 0 or self.ball2_pos.right >= (SCREEN_SIZE[0]):
            self.ball2_dir_x = self.ball2_dir_x * -1

        reward = 0

        if (self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left < self.ball_pos.right and self.bar_pos.right > self.ball_pos.left)) or (self.bar_pos.top <= self.ball2_pos.bottom and (self.bar_pos.left < self.ball2_pos.right and self.bar_pos.right > self.ball2_pos.left)) :
            reward = - 10  # 击中惩罚
            score = +1
            print(score)
        elif self.bar_pos.top <= self.ball_pos.bottom and (
                self.bar_pos.left > self.ball_pos.right or self.bar_pos.right < self.ball_pos.left):
            reward = +1  # 躲避奖励

        # 获得游戏界面像素
        screen_image = pygame.surfarray.array3d(pygame.display.get_surface())
        pygame.display.update()
        # 返回游戏界面像素和对应的奖励
        return reward, screen_image

# learning_rate
LEARNING_RATE = 0.99
# 更新梯度
INITIAL_EPSILON = 1.0
FINAL_EPSILON = 0.05
# 测试观测次数
EXPLORE = 500000
OBSERVE = 50000
# 存储过往经验大小
REPLAY_MEMORY = 500000

BATCH = 100

output = 3  # 输出层神经元数。代表3种操作-MOVE_STAY:[1, 0, 0]  MOVE_LEFT:[0, 1, 0]  MOVE_RIGHT:[0, 0, 1]
input_image = tf.placeholder("float", [None, 80, 100, 4])  # 游戏像素
action = tf.placeholder("float", [None, output])  # 操作

# 定义CNN-卷积神经网络 参考:http://blog.topspeedsnail.com/archives/10451
def convolutional_neural_network(input_image):
    weights = {'w_conv1': tf.Variable(tf.zeros([8, 8, 4, 32])),
               'w_conv2': tf.Variable(tf.zeros([4, 4, 32, 64])),
               'w_conv3': tf.Variable(tf.zeros([3, 3, 64, 64])),
               'w_fc4': tf.Variable(tf.zeros([3456, 784])),
               'w_out': tf.Variable(tf.zeros([784, output]))}

    biases = {'b_conv1': tf.Variable(tf.zeros([32])),
              'b_conv2': tf.Variable(tf.zeros([64])),
              'b_conv3': tf.Variable(tf.zeros([64])),
              'b_fc4': tf.Variable(tf.zeros([784])),
              'b_out': tf.Variable(tf.zeros([output]))}

    conv1 = tf.nn.relu(
        tf.nn.conv2d(input_image, weights['w_conv1'], strides=[1, 4, 4, 1], padding="VALID") + biases['b_conv1'])
    conv2 = tf.nn.relu(
        tf.nn.conv2d(conv1, weights['w_conv2'], strides=[1, 2, 2, 1], padding="VALID") + biases['b_conv2'])
    conv3 = tf.nn.relu(
        tf.nn.conv2d(conv2, weights['w_conv3'], strides=[1, 1, 1, 1], padding="VALID") + biases['b_conv3'])
    conv3_flat = tf.reshape(conv3, [-1, 3456])
    fc4 = tf.nn.relu(tf.matmul(conv3_flat, weights['w_fc4']) + biases['b_fc4'])

    output_layer = tf.matmul(fc4, weights['w_out']) + biases['b_out']
    return output_layer

# 深度强化学习入门: https://www.nervanasys.com/demystifying-deep-reinforcement-learning/
# 训练神经网络
def train_neural_network(input_image):
    predict_action = convolutional_neural_network(input_image)

    argmax = tf.placeholder("float", [None, output])
    gt = tf.placeholder("float", [None])

    action = tf.reduce_sum(tf.multiply(predict_action, argmax), reduction_indices=1)
    cost = tf.reduce_mean(tf.square(action - gt))
    optimizer = tf.train.AdamOptimizer(1e-6).minimize(cost)

    game = Game()
    D = deque()

    _, image = game.step(MOVE_STAY)
    # 转换为灰度值
    image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)
    # 转换为二值
    ret, image = cv2.threshold(image, 1, 255, cv2.THRESH_BINARY)
    input_image_data = np.stack((image, image, image, image), axis=2)

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()

        n = 0
        epsilon = INITIAL_EPSILON
        while True:
            action_t = predict_action.eval(feed_dict={input_image: [input_image_data]})[0]

            argmax_t = np.zeros([output], dtype=np.int)
            if (random.random() <= INITIAL_EPSILON):
                maxIndex = random.randrange(output)
            else:
                maxIndex = np.argmax(action_t)
            argmax_t[maxIndex] = 1
            if epsilon > FINAL_EPSILON:
                epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

            # for event in pygame.event.get():  macOS需要事件循环，否则白屏
            #  if event.type == QUIT:
            #     pygame.quit()
            #     sys.exit()
            reward, image = game.step(list(argmax_t))

            image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)
            ret, image = cv2.threshold(image, 1, 255, cv2.THRESH_BINARY)
            image = np.reshape(image, (80, 100, 1))
            input_image_data1 = np.append(image, input_image_data[:, :, 0:3], axis=2)

            D.append((input_image_data, argmax_t, reward, input_image_data1))

            if len(D) > REPLAY_MEMORY:
                D.popleft()

            if n > OBSERVE:
                minibatch = random.sample(D, BATCH)
                input_image_data_batch = [d[0] for d in minibatch]
                argmax_batch = [d[1] for d in minibatch]
                reward_batch = [d[2] for d in minibatch]
                input_image_data1_batch = [d[3] for d in minibatch]

                gt_batch = []

                out_batch = predict_action.eval(feed_dict={input_image: input_image_data1_batch})

                for i in range(0, len(minibatch)):
                    gt_batch.append(reward_batch[i] + LEARNING_RATE * np.max(out_batch[i]))

                optimizer.run(feed_dict={gt: gt_batch, argmax: argmax_batch, input_image: input_image_data_batch})

            input_image_data = input_image_data1
            n = n + 1

            if n % 10000 == 0:
                saver.save(sess, 'game.cpk', global_step=n)  # 保存模型

            print(n, "epsilon:", epsilon, " ", "action:", maxIndex, " ", "reward:", reward)

train_neural_network(input_image)

基于tensorflow的躲避障碍物的ai训练的更多相关文章

AI学习---基于TensorFlow的案例[实现线性回归的训练]
线性回归原理复习 1)构建模型 |_> y = w1x1 + w2x2 + -- + wnxn + b 2)构造损失函数 | ...
云原生的弹性 AI 训练系列之一：基于 AllReduce 的弹性分布式训练实践
引言随着模型规模和数据量的不断增大,分布式训练已经成为了工业界主流的 AI 模型训练方式.基于 Kubernetes 的 Kubeflow 项目,能够很好地承载分布式训练的工作负载,业已成为了云原生 ...
手写数字识别 ----在已经训练好的数据上根据28*28的图片获取识别概率（基于Tensorflow,Python）
通过: 手写数字识别 ----卷积神经网络模型官方案例详解(基于Tensorflow,Python) 手写数字识别 ----Softmax回归模型官方案例详解(基于Tensorflow,Pytho ...
基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型（二）
前言已完成数据预处理工作,具体参照: 基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一) 设置配置文件新建目录face_faster_rcn ...
【实践】如何利用tensorflow的object_detection api开源框架训练基于自己数据集的模型（Windows10系统）
如何利用tensorflow的object_detection api开源框架训练基于自己数据集的模型(Windows10系统) 一.环境配置 1. Python3.7.x(注:我用的是3.7.3.安 ...
ChatGirl 一个基于 TensorFlow Seq2Seq 模型的聊天机器人[中文文档]
ChatGirl 一个基于 TensorFlow Seq2Seq 模型的聊天机器人[中文文档] 简介简单地说就是该有的都有了,但是总体跑起来效果还不好. 还在开发中,它工作的效果还不好.但是你可以直 ...
基于TensorFlow Serving的深度学习在线预估
一.前言随着深度学习在图像.语言.广告点击率预估等各个领域不断发展,很多团队开始探索深度学习技术在业务层面的实践与应用.而在广告CTR预估方面,新模型也是层出不穷: Wide and Deep[1] ...
大前端技术系列：TWA技术+TensorFlow.js => 集成原生和AI功能的app
大前端技术系列:TWA技术+TensorFlow.js => 集成原生和AI功能的app ( 本文内容为melodyWxy原作,git地址:https://github.com/melodyWx ...
在OpenShift平台上验证NVIDIA DGX系统的分布式多节点自动驾驶AI训练
在OpenShift平台上验证NVIDIA DGX系统的分布式多节点自动驾驶AI训练自动驾驶汽车的深度神经网络(DNN)开发是一项艰巨的工作.本文验证了DGX多节点,多GPU,分布式训练在DXC机器 ...

随机推荐

jQuery中bind() live() delegate() on() 的区别
实例 bind(type,[data],fn) 为每个匹配元素的特定事件绑定事件处理函数 $("a").bind("click",function(){aler ...
WebLogic 11g的安装与配置详谈配置详谈
之前以weblogic8.1为例介绍了其具体安装,但是由于现在weblogic 11g毕竟使用越来越广泛 ,因此,下面将介绍weblogic 11g的具体安装以及配置: 一.安装步骤(基本跟之前we ...
Testlink Testlink在Windows下的安装
Testlink在Windows下的安装 by:授客 QQ:1033553122 测试环境 testlink-1.9.14 下载地址:http://pan.baidu.com/s/1pLrcu ...
Linux 学习笔记之超详细基础linux命令 Part 13
Linux学习笔记之超详细基础linux命令 by:授客 QQ:1033553122 ---------------------------------接Part 12---------------- ...
Python:BeautifulSoup移除某些不需要的属性
很久之前,我看到一个问题,大概是: 他爬了一段html,他获取下了所需的部分(img标签部分),但是不想保留img标签的某些属性, 比如 <img width="147" h ...
ipa的plist文件查看
1.ipa包解压缩:右键.ipa包,使用[归档实用工具/unarchiver]打开 2.进入解压缩后的payload目录,右键ipa包-显示包内容 3.找到info.plist文件,直接拖拽出来 4. ...
Postgresql_根据执行计划优化SQL
执行计划路径选择 postgresql查询规划过程中,查询请求的不同执行方案是通过建立不同的路径来表达的,在生成许多符合条件的路径之后,要从中选择出代价最小的路径,把它转化为一个计划,传递给执行器执行 ...
Django 项目连接数据库Mysql要安装mysqlclient驱动出错 : Failed building wheel for mysqlclient：
1,如果直接用 CMD命令:pip install mysqlclient ,会安装出错. 2,解决问题,参考了这个博友的帖子:https://blog.csdn.net/qq_29784441/ar ...
[ISE 14.7]Fail to Link the designer导致无法仿真问题
一.当前配置操作系统:WIN 8.1 64位软件:Xilinx ISE 14.7 二.解决方法首先,似乎64位的binary都有些问题,所以先把ISE Design Suite 14.7这个快捷 ...
SSM搭建一个后台管理系统
看一下效果图: 登陆界面: 图片上传页面: 我也把项目放到服务器上了,可以直接查看项目内容: http://codingcoge.cn/ssm-demo/login.html 1 我也放到github ...

基于tensorflow的躲避障碍物的ai训练

基于tensorflow的躲避障碍物的ai训练的更多相关文章

随机推荐

热门专题