Skip to main content

Getting Started

Install Copelliasim from

https://www.coppeliarobotics.com/downloads

Minimal RL example

Here we show a minimal RL example from https://github.com/stepjam/PyRep/blob/master/examples/example_reinforcement_learning_env.py

before running: make sure scene_reinforcement_learning_env.ttt file is in the correct location.


"""
An example of how one might use PyRep to create their RL environments.
In this case, the Franka Panda must reach a randomly placed target.
This script contains examples of:
- RL environment example.
- Scene manipulation.
- Environment resets.
- Setting joint properties (control loop disabled, motor locked at 0 vel)
"""
from os.path import dirname, join, abspath
from pyrep import PyRep
from pyrep.robots.arms.panda import Panda
from pyrep.objects.shape import Shape
import numpy as np

SCENE_FILE = join(dirname(abspath(__file__)),
'scene_reinforcement_learning_env.ttt')
POS_MIN, POS_MAX = [0.8, -0.2, 1.0], [1.0, 0.2, 1.4]
EPISODES = 5
EPISODE_LENGTH = 200


class ReacherEnv(object):

def __init__(self):
self.pr = PyRep()
self.pr.launch(SCENE_FILE, headless=False)
self.pr.start()
self.agent = Panda()
self.agent.set_control_loop_enabled(False)
self.agent.set_motor_locked_at_zero_velocity(True)
self.target = Shape('target')
self.agent_ee_tip = self.agent.get_tip()
self.initial_joint_positions = self.agent.get_joint_positions()

def _get_state(self):
# Return state containing arm joint angles/velocities & target position
return np.concatenate([self.agent.get_joint_positions(),
self.agent.get_joint_velocities(),
self.target.get_position()])

def reset(self):
# Get a random position within a cuboid and set the target position
pos = list(np.random.uniform(POS_MIN, POS_MAX))
self.target.set_position(pos)
self.agent.set_joint_positions(self.initial_joint_positions)
return self._get_state()

def step(self, action):
self.agent.set_joint_target_velocities(action) # Execute action on arm
self.pr.step() # Step the physics simulation
ax, ay, az = self.agent_ee_tip.get_position()
tx, ty, tz = self.target.get_position()
# Reward is negative distance to target
reward = -np.sqrt((ax - tx) ** 2 + (ay - ty) ** 2 + (az - tz) ** 2)
return reward, self._get_state()

def shutdown(self):
self.pr.stop()
self.pr.shutdown()


class Agent(object):

def act(self, state):
del state
return list(np.random.uniform(-1.0, 1.0, size=(7,)))

def learn(self, replay_buffer):
del replay_buffer
pass


env = ReacherEnv()
agent = Agent()
replay_buffer = []

for e in range(EPISODES):

print('Starting episode %d' % e)
state = env.reset()
for i in range(EPISODE_LENGTH):
action = agent.act(state)
reward, next_state = env.step(action)
replay_buffer.append((state, action, reward, next_state))
state = next_state
agent.learn(replay_buffer)

print('Done!')
env.shutdown()