Skip to main content

Camera Sensors

Create a camera

    near, far = 0.1, 100
width, height = 640, 480
camera = scene.add_camera(
name="camera",
width=width,
height=height,
fovy=np.deg2rad(35),
near=near,
far=far,
)
camera.set_pose(sapien.Pose(p=[1, 0, 0]))

Mount a camera

    near, far = 0.1, 100
width, height = 640, 480
camera = scene.add_camera(
name="camera",
width=width,
height=height,
fovy=np.deg2rad(35),
near=near,
far=far,
)
camera.set_pose(sapien.Pose(p=[1, 0, 0]))

Render an image

    scene.step()  # make everything set
scene.update_render()
camera.take_picture()

# RGBA
rgba = camera.get_float_texture('Color') # [H, W, 4]
# An alias is also provided
# rgba = camera.get_color_rgba() # [H, W, 4]
rgba_img = (rgba * 255).clip(0, 255).astype("uint8")
rgba_pil = Image.fromarray(rgba_img)
rgba_pil.save('color.png')


# Point Cloud
# Each pixel is (x, y, z, render_depth) in camera space (OpenGL/Blender)
position = camera.get_float_texture('Position') # [H, W, 4]

# OpenGL/Blender: y up and -z forward
points_opengl = position[..., :3][position[..., 3] < 1]
points_color = rgba[position[..., 3] < 1][..., :3]
# Model matrix is the transformation from OpenGL camera space to SAPIEN world space
# camera.get_model_matrix() must be called after scene.update_render()!
model_matrix = camera.get_model_matrix()
points_world = points_opengl @ model_matrix[:3, :3].T + model_matrix[:3, 3]

# SAPIEN CAMERA: z up and x forward
# points_camera = points_opengl[..., [2, 0, 1]] * [-1, -1, 1]

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points_world)
pcd.colors = o3d.utility.Vector3dVector(points_color)
coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame()
o3d.visualization.draw_geometries([pcd, coord_frame])

# Depth
depth = -position[..., 2]
depth_image = (depth * 1000.0).astype(np.uint16)
depth_pil = Image.fromarray(depth_image)
depth_pil.save('depth.png')

# Segmentation
# Each pixel is (visual_id, actor_id/link_id, 0, 0)
# visual_id is the unique id of each visual shape
seg_labels = camera.get_uint32_texture('Segmentation') # [H, W, 4]
colormap = sorted(set(ImageColor.colormap.values()))
color_palette = np.array([ImageColor.getrgb(color) for color in colormap],
dtype=np.uint8)
label0_image = seg_labels[..., 0].astype(np.uint8) # mesh-level
label1_image = seg_labels[..., 1].astype(np.uint8) # actor-level
# Or you can use aliases below
# label0_image = camera.get_visual_segmentation()
# label1_image = camera.get_actor_segmentation()
label0_pil = Image.fromarray(color_palette[label0_image])
label0_pil.save('label0.png')
label1_pil = Image.fromarray(color_palette[label1_image])
label1_pil.save('label1.png')

Viewer and Rendering

    viewer = Viewer(renderer)
viewer.set_scene(scene)
# We show how to set the viewer according to the pose of a camera
# opengl camera -> sapien world
model_matrix = camera.get_model_matrix()
# sapien camera -> sapien world
# You can also infer it from the camera pose
model_matrix = model_matrix[:, [2, 0, 1, 3]] * np.array([-1, -1, 1, 1])
# The rotation of the viewer camera is represented as [roll(x), pitch(-y), yaw(-z)]
rpy = mat2euler(model_matrix[:3, :3]) * np.array([1, -1, -1])
viewer.set_camera_xyz(*model_matrix[0:3, 3])
viewer.set_camera_rpy(*rpy)
viewer.window.set_camera_parameters(near=0.05, far=100, fovy=1)
while not viewer.closed:
if viewer.window.key_down('p'): # Press 'p' to take the screenshot
rgba = viewer.window.get_float_texture('Color')
rgba_img = (rgba * 255).clip(0, 255).astype("uint8")
rgba_pil = Image.fromarray(rgba_img)
rgba_pil.save('screenshot.png')
scene.step()
scene.update_render()
viewer.render()