OpenAI Gym의 CartPole-v0을 DQN으로 해결

OpenAI Gym의 CartPole-v0을 Keras-RL 샘플 DQN 1으로 해결하려고했습니다.

  • DQN 버전 2과 Duel-DQN 버전 3이 있으며 DQNAgent 생성자에서 구성 가능



  • 코드



    DQN 버전


    import numpy as np
    import gym
    from gym import wrappers
    from keras.layers import Flatten, Dense, Input
    from keras.models import Model
    from rl.agents.dqn import DQNAgent
    from rl.policy import BoltzmannQPolicy
    from rl.memory import SequentialMemory
    
    def build_model(input_dim, output_dim):
        x_input = Input(shape=(1, input_dim))
        x = Flatten()(x_input)
        x = Dense(16, activation="relu")(x)
        x = Dense(16, activation="relu")(x)
        x = Dense(16, activation="relu")(x)
        x = Dense(output_dim, activation="linear")(x)
        return Model(inputs=x_input, outputs=x)
    
    def run():
        env = gym.make("CartPole-v0")
        env = wrappers.Monitor(env, '/tmp/cartpole-v0-dqn', force=True)
        model = build_model(env.observation_space.shape[0], env.action_space.n)
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy,)
        dqn.compile("adam", metrics=["mae"])
        dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
    
    if __name__ == "__main__":
        run()
    

    Duel-DQN 버전


    import numpy as np
    import gym
    from gym import wrappers
    from keras.layers import Flatten, Dense, Input
    from keras.models import Model
    from rl.agents.dqn import DQNAgent
    from rl.policy import BoltzmannQPolicy
    from rl.memory import SequentialMemory
    
    def build_model(input_dim, output_dim):
        x_input = Input(shape=(1, input_dim))
        x = Flatten()(x_input)
        x = Dense(16, activation="relu")(x)
        x = Dense(16, activation="relu")(x)
        x = Dense(16, activation="relu")(x)
        x = Dense(output_dim, activation="linear")(x)
        return Model(inputs=x_input, outputs=x)
    
    def run():
        env = gym.make("CartPole-v0")
        env = wrappers.Monitor(env, '/tmp/cartpole-v0-duel-dqn', force=True)
        model = build_model(env.observation_space.shape[0], env.action_space.n)
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy, enable_dueling_network=True, dueling_type="avg")
        dqn.compile("adam", metrics=["mae"])
        dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
    
    if __name__ == "__main__":
        run()
    

    점수


  • 50,000 단계 정도 시도했지만 둘 다 풀리지 않았습니다.
  • 생성자시의 파라미터를 설정하면 풀 수 있을지도 모르지만, Keras==2.0.6라면, 제대로 움직이지 않게 되어 있는 모양.
  • DQN: 32.98 ± 2.91
    Duel-DQN: 42.46 ± 3.83
    

    References





    Plappert, Keras-RL , 2016. 

    Mnih, Playing Atari with Deep Reinforcement Learning , 2013. 

    Wang, Dueling Network Architectures for Deep Reinforcement Learning , 2016. 

    좋은 웹페이지 즐겨찾기