输入命令在’'’和’'’之间,然后按下ctrl+enter就可以运行了。(太恐怖了,我的后半句话甚至是它给我自动生成的
'''
写一个简单的模仿学习
'''
import gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import random
import os
import time
# 1. 环境
env = gym.make('CartPole-v0')
env.seed(1)
env = env.unwrapped
# 2. 神经网络
class Actor(keras.Model):
def __init__(self, state_dim, action_dim):
super(Actor, self).__init__()
self.fc1 = layers.Dense(20, activation='relu')
self.fc2 = layers.Dense(20, activation='relu')
self.out = layers.Dense(action_dim, activation='softmax')
def call(self, inputs):
x = self.fc1(inputs)
x = self.fc2(x)
x = self.out(x)
return x
# 3. 模型
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
actor = Actor(state_dim, action_dim)
actor.build(input_shape=(None, state_dim))
# 4. 模型训练
def train():
# 4.1. 专家数据
expert_data = []
for i_episode in range(20):
observation = env.reset()
while True:
action = env.action_space.sample()
observation_, reward, done, info = env.step(action)
expert_data.append((observation, action))
observation = observation_
if done:
break
# 4.2. 模型训练
actor.compile(optimizer=optimizers.Adam(0.001), loss=losses.sparse_categorical_crossentropy)
actor.fit(np.array([x[0] for x in expert_data]), np.array([x[1] for x in expert_data]), epochs=10, verbose=0)
# 4.3. 模型评估
for i_episode in range(5):
observation = env.reset()
while True:
env.render()
action = np.argmax(actor.predict(observation[np.newaxis, :]))
observation_, reward, done, info = env.step(action)
observation = observation_
if done:
break
if __name__ == '__main__':
train()
PREVIOUSRL学习随记-深度确定性策略梯度
NEXT尝试命令GPT写一个GAN