OpenAI Gym의 CartPole-v0을 DQN으로 해결
코드
DQN 버전
import numpy as np
import gym
from gym import wrappers
from keras.layers import Flatten, Dense, Input
from keras.models import Model
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_model(input_dim, output_dim):
x_input = Input(shape=(1, input_dim))
x = Flatten()(x_input)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(output_dim, activation="linear")(x)
return Model(inputs=x_input, outputs=x)
def run():
env = gym.make("CartPole-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-v0-dqn', force=True)
model = build_model(env.observation_space.shape[0], env.action_space.n)
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy,)
dqn.compile("adam", metrics=["mae"])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
if __name__ == "__main__":
run()
Duel-DQN 버전
import numpy as np
import gym
from gym import wrappers
from keras.layers import Flatten, Dense, Input
from keras.models import Model
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_model(input_dim, output_dim):
x_input = Input(shape=(1, input_dim))
x = Flatten()(x_input)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(output_dim, activation="linear")(x)
return Model(inputs=x_input, outputs=x)
def run():
env = gym.make("CartPole-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-v0-duel-dqn', force=True)
model = build_model(env.observation_space.shape[0], env.action_space.n)
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy, enable_dueling_network=True, dueling_type="avg")
dqn.compile("adam", metrics=["mae"])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
if __name__ == "__main__":
run()
점수
Keras==2.0.6
라면, 제대로 움직이지 않게 되어 있는 모양. DQN: 32.98 ± 2.91
Duel-DQN: 42.46 ± 3.83
References
Plappert, Keras-RL , 2016. ↩
Mnih, Playing Atari with Deep Reinforcement Learning , 2013. ↩
Wang, Dueling Network Architectures for Deep Reinforcement Learning , 2016. ↩
Reference
이 문제에 관하여(OpenAI Gym의 CartPole-v0을 DQN으로 해결), 우리는 이곳에서 더 많은 자료를 발견하고 링크를 클릭하여 보았다 https://qiita.com/namakemono/items/aed23d6fff1e314d3de1텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
우수한 개발자 콘텐츠 발견에 전념 (Collection and Share based on the CC Protocol.)