Adding python version of vision code to estimate pose
Contains sample training images, and does compute of 3D pose based on
3D location of feature points.
Python library for training images and doing matches, along with example code
to run this.
Helper functions to display position on the field.
Cleaned up a few items from Alex's comments.
Removed matplotlib import, since I'm not using it anymore
Ran yapf to clean up python formatting.
Change-Id: Ieaa4beb6c381b65ff62db19d27a5952946573926
diff --git a/y2020/vision/tools/python_code/field_display.py b/y2020/vision/tools/python_code/field_display.py
new file mode 100644
index 0000000..a3f1024
--- /dev/null
+++ b/y2020/vision/tools/python_code/field_display.py
@@ -0,0 +1,143 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+
+
+def load_field_image():
+ field_image = cv2.imread('test_images/field_cropped_scaled.png')
+ return field_image
+
+
+def show_field(img_in=None):
+ if (img_in is None):
+ img_in = load_field_image
+
+ cv2.imshow('Field', img_in), cv2.waitKey()
+ return
+
+
+# Convert field coordinates (meters) to image coordinates (pixels).
+# Field origin is (x,y) at center of red alliance driver station wall,
+# with x pointing into the field.
+# The default field image is 1598 x 821 pixels, so 1 cm per pixel on field
+# I.e., Field is 1598 x 821 pixels = 15.98 x 8.21 meters
+# Our origin in image coordinates is at (1598, 410) pixels, facing in the -x image direction
+
+
+def field_coord_to_image_coord(robot_position):
+ # Scale by 100 pixel / cm
+ robot_pos_img_coord = np.array([[1598, 410]]).T + np.int32(
+ 100.0 * np.array([[-robot_position[0][0], robot_position[1][0]]]).T)
+ return robot_pos_img_coord
+
+
+# Given a field image, plot the robot and an optional heading vector on it
+def plot_bot_on_field(img_in, color, robot_position, robot_heading=None):
+ if img_in is None:
+ img_out = load_field_image()
+ else:
+ img_out = img_in.copy()
+
+ robot_pos_img_coord = field_coord_to_image_coord(robot_position)
+
+ ROBOT_RADIUS = 10
+ img_out = cv2.circle(
+ img_out, (robot_pos_img_coord[0][0], robot_pos_img_coord[1][0]),
+ ROBOT_RADIUS, color, 3)
+
+ if (robot_heading is not None):
+ img_out = cv2.line(
+ img_out, (robot_pos_img_coord[0][0], robot_pos_img_coord[1][0]),
+ (int(robot_pos_img_coord[0][0] -
+ 3 * ROBOT_RADIUS * math.cos(robot_heading)),
+ int(robot_pos_img_coord[1][0] +
+ 3 * ROBOT_RADIUS * math.sin(robot_heading))), color, 3,
+ cv2.LINE_AA)
+
+ return img_out
+
+
+# Plot a line on the field, given starting and ending field positions
+def plot_line_on_field(img_in, color, start_position, end_position):
+ if img_in is None:
+ img_out = load_field_image()
+ else:
+ img_out = img_in.copy()
+
+ start_pos_img_coord = field_coord_to_image_coord(start_position)
+ end_pos_img_coord = field_coord_to_image_coord(end_position)
+
+ img_out = cv2.line(img_out,
+ (start_pos_img_coord[0][0], start_pos_img_coord[1][0]),
+ (end_pos_img_coord[0][0], end_pos_img_coord[1][0]),
+ color, 3, cv2.LINE_AA)
+
+ return img_out
+
+
+# Helper function to plot a few quantities like
+# Heading (angle of the target relative to the robot)
+# Distance (of the target to the robot)
+# Skew (angle of the target normal relative to the robot)
+def plot_camera_to_target(img_in, color, heading, distance, skew):
+ if img_in is None:
+ img_out = np.zeros((821, 1598, 3), np.uint8)
+ else:
+ img_out = img_in
+
+ # Bot location is at origin
+ bot_location = np.array([[0., 0.]]).T
+
+ # Target location is distance away along the heading
+ target_location = bot_location + np.array(
+ [[distance * math.cos(heading), distance * math.sin(heading)]]).T
+
+ # Create part of a line from the target location to the end of the target, based on the skew
+ skew_line_offset = np.array(
+ [[math.cos(skew + math.pi / 2),
+ math.sin(skew + math.pi / 2)]]).T
+
+ # Plot bot origin
+ img_out = plot_bot_on_field(img_out, color, bot_location)
+
+ # Plot heading line
+ img_out = plot_line_on_field(img_out, (255, 255, 0), bot_location,
+ target_location)
+
+ # Plot target location
+ img_out = plot_bot_on_field(img_out, (255, 0, 0), target_location)
+
+ # Plot target rotation
+ img_out = plot_line_on_field(img_out, (0, 255, 0),
+ target_location - skew_line_offset,
+ target_location + skew_line_offset)
+
+ return img_out
+
+
+# Helper function to take camera rotation and bring it back to world coordinate frame
+def camera_rot_to_world(R_mat):
+ R_c_w_mat = np.array([[0., 0., 1.], [-1, 0, 0], [0, -1., 0]])
+
+ return (R_mat).dot(R_c_w_mat.T)
+
+
+def camera_rot_to_world_Rodrigues(R):
+ R_mat = cv2.Rodrigues(R)[0]
+ R_mat_world = camera_rot_to_world(R_mat)
+ R_world = cv2.Rodrigues(R_mat_world)[0]
+ return R_world
+
+
+def invert_pose_Rodrigues(R, T):
+ R_inv_mat = cv2.Rodrigues(R)[0].T
+ T_inv = -R_inv_mat.dot(T)
+ R_inv = cv2.Rodrigues(R_inv_mat)[0]
+ return R_inv, T_inv
+
+
+def invert_pose_mat(R, T):
+ R_inv = R.T
+ T_inv = -R_inv.dot(T)
+ return R_inv, T_inv
diff --git a/y2020/vision/tools/python_code/image_match_test.py b/y2020/vision/tools/python_code/image_match_test.py
new file mode 100644
index 0000000..bcf610f
--- /dev/null
+++ b/y2020/vision/tools/python_code/image_match_test.py
@@ -0,0 +1,307 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+import time
+
+import field_display
+import train_and_match as tam
+
+### DEFINITIONS
+
+# List of images to train on
+training_image_names = [
+ 'test_images/train_power_port_blue.png',
+ # Using webcam captured image for testing-- best to use only one of the two
+ # training images for the power_port_red
+ 'test_images/train_power_port_red_webcam.png',
+ # 'test_images/train_power_port_red.png',
+ 'test_images/train_loading_bay_blue.png',
+ 'test_images/train_loading_bay_red.png'
+]
+
+# Target points on the image -- needs to match training images-- one per image
+# These are manually captured by examining the images, and entering the pixel
+# values from the target center for each image.
+# These are currently only used for visualization of the target
+target_pt_list = [
+ np.float32([[393, 147]]).reshape(-1, 1, 2), # train_power_port_blue.png
+ np.float32([[305, 97]]).reshape(-1, 1, 2), #train_power_port_red_webcam.png
+ # np.float32([[570,192]]).reshape(-1,1,2), # train_power_port_red.png
+ np.float32([[366, 236]]).reshape(-1, 1, 2), # train_loading_bay_blue.png
+ np.float32([[366, 236]]).reshape(-1, 1, 2), # train_loading_bay_red.png
+]
+
+# Put list of all possible images we want to test, and then we'll select one
+# Note, if you query on the same image as training, only that image will match
+query_image_names = [
+ 'test_images/train_power_port_red.png', #0
+ 'test_images/train_power_port_blue.png', #1
+ 'test_images/test_game_manual.png', #2
+ 'test_images/test_train_shift_left_100.png', #3
+ 'test_images/test_train_shift_right_100.png', #4
+ 'test_images/test_train_down_2x.png', #5
+ 'test_images/test_train_down_4x.png', #6
+ 'test_images/test_raspi3_sample.jpg', #7
+ 'test_images/test_VR_sample1.png', #8
+ 'test_images/train_loading_bay_blue.png', #9
+ 'test_images/train_loading_bay_red.png'
+] #10
+
+training_image_index = 0
+# TODO: Should add argParser here to select this
+query_image_index = 0 # Use -1 to use camera capture; otherwise index above list
+
+##### Let's get to work!
+
+# Load the training and query images
+training_images = tam.load_images(training_image_names)
+
+# Create feature extractor
+feature_extractor = tam.load_feature_extractor()
+
+# Extract keypoints and descriptors for model
+train_keypoint_lists, train_descriptor_lists = tam.detect_and_compute(
+ feature_extractor, training_images)
+
+# # Create the matcher. This only needs to be created once
+ts = time.monotonic() # Do some basic timing
+matcher = tam.train_matcher(train_descriptor_lists)
+tf = time.monotonic()
+print("Done training matcher, took ", tf - ts, " secs")
+for i in range(len(train_keypoint_lists)):
+ print("Model ", i, " has ", len(train_keypoint_lists[i]), " features: ")
+
+# Load the query image in. Based on index in our list, or using camera if index is -1
+query_images = []
+
+if (query_image_index is -1):
+ # Based on /dev/videoX setting
+ CAMERA_INDEX = 2
+ print("#### Using camera at /dev/video%d" % CAMERA_INDEX)
+ # Open the device at the ID X for /dev/videoX
+ cap = cv2.VideoCapture(CAMERA_INDEX)
+
+ # Check whether user selected camera is opened successfully.
+ if not (cap.isOpened()):
+ print("Could not open video device")
+ quit()
+
+ # Capture frame-by-frame
+ ret, frame = cap.read()
+ query_images.append(frame)
+
+else:
+ # Load default images
+ query_images = tam.load_images([query_image_names[query_image_index]])
+
+# For storing out pose to measure noise
+log_file = open("pose.out", 'w')
+
+looping = True
+# Grab images until 'q' is pressed (or just once for canned images)
+while (looping):
+ if (query_image_index is -1):
+ # Capture frame-by-frame
+ ret, frame = cap.read()
+ query_images[0] = frame
+ else:
+ looping = False
+
+ # Extract features from query image
+ query_keypoint_lists, query_descriptor_lists = tam.detect_and_compute(
+ feature_extractor, query_images)
+ print("Query image has ", len(query_keypoint_lists[0]), " features")
+
+ ts = time.monotonic()
+ good_matches_list = tam.compute_matches(matcher, train_descriptor_lists,
+ query_descriptor_lists)
+ tf = time.monotonic()
+ print("Done finding matches (took ", tf - ts, " secs)")
+ for i in range(len(train_keypoint_lists)):
+ print("Model ", i, " has # good matches: ", len(good_matches_list[i]))
+
+ # TEMP: Use first list for what follows
+ # TODO: Should look for best match after homography and display that one
+ # OR: show results on all good matches
+ good_matches = good_matches_list[0]
+
+ # Next, find homography (map between training and query images)
+ homography_list, matches_mask_list = tam.compute_homographies(
+ train_keypoint_lists, query_keypoint_lists, good_matches_list)
+
+ if matches_mask_list[0].count(1) < tam.MIN_MATCH_COUNT:
+ print(
+ "Not continuing pose calc because not enough good matches after homography-- only had ",
+ matches_mask_list[0].count(1))
+ continue
+
+ # Next, let's go to compute pose
+ # I've chosen some coordinate frames to help track things. Using indices:
+ # w: world coordinate frame (at center wall of driver station)
+ # tarj: camera pose when jth target (e.g., power panel) was captured
+ # ci: ith camera
+ # b: body frame of robot (robot location)
+ #
+ # And, T for translation, R for rotation, "est" for estimated
+ # So, T_w_b_est is the estimated translation from world to body coords
+ #
+ # A few notes:
+ # -- CV uses rotated frame, where z points out of camera, y points down
+ # -- This causes a lot of mapping around of points, but overall OK
+
+ ### TODO: Still need to clean this rest up
+
+ # TODO: Need to generalize this for each target-- need to have 3D global locations of each target's keypoints
+ field_length = 15.98
+ target_center_offset_y = 1.67
+ target_center_height = 2.49
+ if training_image_index is 0:
+ # webcam
+ fx = 810.
+ fy = 810.
+ cx = 320.
+ cy = 240.
+ cy_above_ground = 1. # meters above ground
+ depth = 4.57 # meters
+ target_center_offset_y = 1.67
+ elif training_image_index is 1:
+ # Made up for screenshot from manual
+ cx = int(1165 / 2)
+ cy_above_ground = -0.5856 # meters above ground
+ cy = 776 / 2
+ fx = 1143 # pixels (or 0.00160m or 1143 pixels?)
+ depth = 4.57 # meters
+ elif training_image_index is 2:
+ cx = 732 / 2
+ cy_above_ground = 0.454 # meters above ground
+ cy = 436 / 2
+ target_width = 5 * 12 * 0.0254 # width in ft * in/ft * m/in
+ target_height = target_width * cy / cx # width in ft * in/ft * m/in * cy / cx
+ FOV_x = 54 * math.pi / 180. # camera FOV in radians (from 54 degree lens)
+ fx = cx / math.tan(FOV_x / 2) # pixels
+ depth = (target_height / 2) / math.tan(FOV_x / 2) # meters
+
+ src_pts_3d = []
+ for m in good_matches:
+ # Project the matches all back to 3D, assuming they're all at the same depth
+ # Add in the distance across the field and any lateral offset of the target
+ src_pts_3d.append([(
+ field_length,
+ -(train_keypoint_lists[training_image_index][m.trainIdx].pt[0] - cx
+ ) * depth / fx - target_center_offset_y,
+ -(train_keypoint_lists[training_image_index][m.trainIdx].pt[1] - cy
+ ) * depth / fx + cy_above_ground)])
+
+ # Reshape 3d point list to work with computations to follow
+ src_pts_3d_array = np.asarray(np.float32(src_pts_3d)).reshape(-1, 3, 1)
+
+ cam_mat = np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+ distortion_coeffs = np.zeros((5, 1))
+
+ # Create list of matching query point locations
+ dst_pts = np.float32([
+ query_keypoint_lists[0][m.queryIdx].pt for m in good_matches
+ ]).reshape(-1, 1, 2)
+
+ ts = time.monotonic()
+ # TODO: May want to supply it with estimated guess as starting point
+ # Find offset from camera to original location of camera relative to target
+ retval, R_ci_w_estj, T_ci_w_estj, inliers = cv2.solvePnPRansac(
+ src_pts_3d_array, dst_pts, cam_mat, distortion_coeffs)
+
+ tf = time.monotonic()
+ print("Solving Pose took ", tf - ts, " secs")
+ print("Found ", len(inliers), " inliers, out of ", len(dst_pts),
+ " matched points")
+ ### THIS is the estimate of the robot on the field!
+ R_w_ci_estj, T_w_ci_estj = field_display.invert_pose_Rodrigues(
+ R_ci_w_estj, T_ci_w_estj)
+ print("Pose from PnP is:\n", R_ci_w_estj, "\n", T_ci_w_estj)
+ # The rotation estimate is for camera with z pointing forwards.
+ # Compute the rotation as if x is forward
+ R_w_ci_estj_robot = field_display.camera_rot_to_world_Rodrigues(
+ R_w_ci_estj)
+ print("Pose in world coords is:\n", R_w_ci_estj, "\n", T_w_ci_estj,
+ "\nWith robot coord frame rotation as \n", R_w_ci_estj_robot)
+
+ # Use the rotational component about the z axis to define the heading
+ heading_est = R_w_ci_estj_robot[2][0]
+ # Plot location of the robot, along with heading
+ img_ret = field_display.plot_bot_on_field(None, (0, 255, 255), T_w_ci_estj,
+ heading_est)
+
+ # Compute vector to target
+ T_w_target_pt = np.array(
+ [[field_length, -target_center_offset_y, target_center_height]]).T
+ vector_to_target = T_w_target_pt - T_w_ci_estj
+ # Compute distance to target (full 3D)
+ distance_to_target = np.linalg.norm(vector_to_target)
+ # Compute distance to target (x,y)
+ distance_to_target_ground = np.linalg.norm(vector_to_target[0:2])
+ #print("Distance comparison: all: ", distance_to_target, " vs. ground: ", distance_to_target_ground)
+
+ angle_to_target_abs = math.atan2(vector_to_target[1][0],
+ vector_to_target[0][0])
+ angle_to_target_robot = angle_to_target_abs - heading_est
+ img_ret = field_display.plot_line_on_field(img_ret, (255, 255, 0),
+ T_w_ci_estj, T_w_target_pt)
+ # THESE ARE OUR ESTIMATES OF HEADING, DISTANCE, AND SKEW TO TARGET
+ log_file.write('%lf, %lf, %lf\n' % (heading_est, distance_to_target_ground,
+ angle_to_target_robot))
+
+ # A bunch of code to visualize things...
+ #
+ # Draw target on the image
+ h, w, ch = query_images[0].shape
+ query_image_copy = query_images[0].copy()
+ # Map target_pt onto image, for display
+ transformed_target = cv2.perspectiveTransform(target_pt_list[0],
+ homography_list[0])
+ # Ballpark the size of the circle so it looks right on image
+ radius = int(32 * abs(homography_list[0][0][0] + homography_list[0][1][1])
+ / 2) # Average of scale factors
+ query_image_copy = cv2.circle(
+ query_image_copy,
+ (transformed_target.flatten()[0], transformed_target.flatten()[1]),
+ radius, (0, 255, 0), 3)
+
+ # If we're querying static images, show full results
+ if (query_image_index is not -1):
+ homography_list, matches_mask_list = tam.show_results(
+ training_images, train_keypoint_lists, query_images,
+ query_keypoint_lists, target_pt_list, good_matches_list)
+
+ # Create empty image the size of the field
+ img_heading = field_display.load_field_image()
+ img_heading[:, :, :] = 0
+ f_h, f_w, f_ch = img_heading.shape
+
+ # Create heading view, and paste it to bottom of field
+ img_heading = field_display.plot_camera_to_target(
+ img_heading, (0, 255, 0), heading_est, distance_to_target_ground,
+ angle_to_target_robot)
+ vis = np.concatenate((img_ret, img_heading), axis=0)
+
+ # Paste query image to right of other views (scale to keep aspect ratio)
+ img_query_scaled = cv2.resize(query_image_copy,
+ (int(2 * w * f_h / h), 2 * f_h))
+ vis = np.concatenate((vis, img_query_scaled), axis=1)
+
+ # Scale down to make it fit on screen
+ vis = cv2.resize(vis, (int(vis.shape[1] / 4), int(vis.shape[0] / 4)))
+ cv2.imshow('field_display', vis)
+
+ #Waits for a user input to quit the application
+ pause_time = 0
+ if (query_image_index is -1):
+ pause_time = 1
+ if cv2.waitKey(pause_time) & 0xFF == ord('q'):
+ break
+
+# Done. Clean things up
+if (query_image_index is -1):
+ # When everything done, release the capture
+ cap.release()
+
+cv2.destroyAllWindows()
diff --git a/y2020/vision/tools/python_code/test_images/field_cropped_scaled.png b/y2020/vision/tools/python_code/test_images/field_cropped_scaled.png
new file mode 100644
index 0000000..c971c19
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/field_cropped_scaled.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_VR_sample1.png b/y2020/vision/tools/python_code/test_images/test_VR_sample1.png
new file mode 100644
index 0000000..86495b2
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_VR_sample1.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_game_manual.png b/y2020/vision/tools/python_code/test_images/test_game_manual.png
new file mode 100644
index 0000000..825f1c3
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_game_manual.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_raspi3_sample.jpg b/y2020/vision/tools/python_code/test_images/test_raspi3_sample.jpg
new file mode 100644
index 0000000..376149c
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_raspi3_sample.jpg
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_down_2x.png b/y2020/vision/tools/python_code/test_images/test_train_down_2x.png
new file mode 100644
index 0000000..b004c51
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_down_2x.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_down_4x.png b/y2020/vision/tools/python_code/test_images/test_train_down_4x.png
new file mode 100644
index 0000000..e217280
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_down_4x.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_shift_left_100.png b/y2020/vision/tools/python_code/test_images/test_train_shift_left_100.png
new file mode 100644
index 0000000..42165f8
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_shift_left_100.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_shift_right_100.png b/y2020/vision/tools/python_code/test_images/test_train_shift_right_100.png
new file mode 100644
index 0000000..9e7c274
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_shift_right_100.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_loading_bay.xcf b/y2020/vision/tools/python_code/test_images/train_loading_bay.xcf
new file mode 100644
index 0000000..6bc62f5
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_loading_bay.xcf
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_loading_bay_blue.png b/y2020/vision/tools/python_code/test_images/train_loading_bay_blue.png
new file mode 100644
index 0000000..c3c0aea
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_loading_bay_blue.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_loading_bay_red.png b/y2020/vision/tools/python_code/test_images/train_loading_bay_red.png
new file mode 100644
index 0000000..42091a6
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_loading_bay_red.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_blue.png b/y2020/vision/tools/python_code/test_images/train_power_port_blue.png
new file mode 100644
index 0000000..6ade26f
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_blue.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_red.png b/y2020/vision/tools/python_code/test_images/train_power_port_red.png
new file mode 100644
index 0000000..9d2f0bf
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_red.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_red.xcf b/y2020/vision/tools/python_code/test_images/train_power_port_red.xcf
new file mode 100644
index 0000000..ad6bed1
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_red.xcf
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_red_webcam.png b/y2020/vision/tools/python_code/test_images/train_power_port_red_webcam.png
new file mode 100644
index 0000000..6115348
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_red_webcam.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/train_and_match.py b/y2020/vision/tools/python_code/train_and_match.py
new file mode 100644
index 0000000..8fed4c3
--- /dev/null
+++ b/y2020/vision/tools/python_code/train_and_match.py
@@ -0,0 +1,291 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+#import tkinter
+# pip install pillow
+#import PIL.Image, PIL.ImageTk
+import time
+
+import field_display
+
+### DEFINITIONS
+MIN_MATCH_COUNT = 10 # 10 is min; more gives better matches
+FEATURE_EXTRACTOR_NAME = 'SIFT'
+QUERY_INDEX = 0 # We use a list for both training and query info, but only ever have one query item
+
+
+# Calculates rotation matrix to euler angles
+# The result is the same as MATLAB except the order
+# of the euler angles ( x and z are swapped ).
+# Input: R: numpy 3x3 rotation matrix
+# Output: numpy 3x1 vector of Euler angles (x,y,z)
+def rotationMatrixToEulerAngles(R):
+ sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
+
+ singular = sy < 1e-6
+ if not singular:
+ x = math.atan2(R[2, 1], R[2, 2])
+ y = math.atan2(-R[2, 0], sy)
+ z = math.atan2(R[1, 0], R[0, 0])
+ else:
+ x = math.atan2(-R[1, 2], R[1, 1])
+ y = math.atan2(-R[2, 0], sy)
+ z = 0
+
+ return np.array([x, y, z])
+
+
+# Load images based on list of image names
+# Return list containing loaded images
+def load_images(image_names):
+ image_list = []
+ for im in image_names:
+ # Load image (in color; let opencv convert to B&W for features)
+ img_data = cv2.imread(im)
+ if img_data is None:
+ print("Failed to load image: ", im)
+ exit()
+ else:
+ image_list.append(img_data)
+
+ return image_list
+
+
+# Load feature extractor based on extractor name
+# Returns feature extractor object
+def load_feature_extractor():
+ if FEATURE_EXTRACTOR_NAME is 'SIFT':
+ # Initiate SIFT detector
+ feature_extractor = cv2.xfeatures2d.SIFT_create()
+ elif FEATURE_EXTRACTOR_NAME is 'SURF':
+ # Initiate SURF detector
+ feature_extractor = cv2.xfeatures2d.SURF_create()
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ # Initiate ORB detector
+ feature_extractor = cv2.ORB_create()
+
+ return feature_extractor
+
+
+# Run feature detector and compute feature descriptions on image_list
+# Input: feature_extractor object
+# image_list: list containing images
+# Return: Lists of keypoints and lists of feature descriptors, one for each image
+def detect_and_compute(feature_extractor, image_list):
+ descriptor_lists = []
+ keypoint_lists = []
+ if (FEATURE_EXTRACTOR_NAME in ('SIFT', 'SURF')):
+ for i in range(len(image_list)):
+ # find the keypoints and descriptors with SIFT
+ kp, des = feature_extractor.detectAndCompute(image_list[i], None)
+ descriptor_lists.append(des)
+ keypoint_lists.append(kp)
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ # TODO: Check whether ORB extractor can do detectAndCompute.
+ # If so, we don't need to have this branch for ORB
+ for i in range(len(image_list)):
+ # find the keypoints and descriptors with ORB
+ kp = feature_extractor.detect(image_list[i], None)
+ keypoint_lists.append(kp)
+ des = feature_extractor.compute(image_list[i], None)
+ descriptor_lists.append(des)
+
+ return keypoint_lists, descriptor_lists
+
+
+# Given a keypoint descriptor list, create a matcher
+# Input: descriptor_lists: List of descriptors, one for each training image
+# Returns: a keypoint matcher trained on all descriptors, indexed by image
+def train_matcher(descriptor_lists):
+ if (FEATURE_EXTRACTOR_NAME in ('SIFT', 'SURF')):
+ # Use FLANN KD tree for SIFT & SURF
+ FLANN_INDEX_KDTREE = 0
+ index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
+ search_params = dict(checks=50)
+
+ matcher = cv2.FlannBasedMatcher(index_params, search_params)
+ matcher.add(descriptor_lists)
+ matcher.train()
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ # Use FLANN LSH for ORB
+ FLANN_INDEX_LSH = 6
+ index_params = dict(
+ algorithm=FLANN_INDEX_LSH,
+ table_number=6, # 12
+ key_size=12, # 20
+ multi_probe_level=2) #2
+ search_params = dict(checks=50)
+ matcher = cv2.FlannBasedMatcher(index_params, search_params)
+ # Other option: BFMatcher with default params
+ # NOTE: I think Brute Force matcher can only do 1 match at a time,
+ # rather than loading keypoints from all model images
+ #matcher = cv2.BFMatcher()
+
+ return matcher
+
+
+# Given a matcher and the query descriptors (and for ORB the training list),
+# Compute the best matches, filtering using the David Lowe magic ratio of 0.7
+# Return list of good match lists (one set of good matches for each training image for SIFT/SURF)
+def compute_matches(matcher, train_descriptor_lists, query_descriptor_lists):
+
+ # We'll create a match list for each of the training images
+ good_matches_list = []
+ if (FEATURE_EXTRACTOR_NAME in ('SIFT', 'SURF')):
+ # We're just doing one query at a time, so grab the first from the list
+ desc_query = query_descriptor_lists[QUERY_INDEX]
+ matches = matcher.knnMatch(desc_query, k=2)
+
+ for train_index in range(len(train_descriptor_lists)):
+ # store all the good matches as per Lowe's ratio test.
+ # (Lowe originally proposed 0.7 ratio, but 0.75 was later proposed as a better option. We'll go with the more conservative (fewer, better matches) for now)
+ good_matches = []
+ for m, n in matches:
+ if m.distance < 0.7 * n.distance and m.imgIdx is train_index:
+ good_matches.append(m)
+
+ good_matches_list.append(good_matches)
+
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ matches = matcher.knnMatch(train_keypoint_lists[0], desc_query, k=2)
+ print(matches)
+ good_matches = []
+ for m in matches:
+ if m:
+ if len(m) == 2:
+ print(m[0].distance, m[1].distance)
+ if m[0].distance < 0.7 * m[1].distance:
+ good_matches.append(m[0])
+ print(m[0].distance)
+
+ good_matches_list.append(good_matches)
+
+ return good_matches_list
+
+
+# Given a set of training keypoints lists, and a query keypoint list,
+# and the good matches for each training<->query match, returns the
+# homography for each pair and a mask list of matchces considered good
+# by the homography
+# Inputs: train_keypoint_lists: List of keypoints lists from training images
+# query_keypoint_lists: Keypoint list (only 1) from query image
+# good_matches_list: List of matches for each training image -> query
+# Returns: homography_list: List of each homography between train->query
+# Returns [] if not enough matches / bad homography
+# matches_mask_list: Mask list of matches kept by homography for each
+# set of matches
+def compute_homographies(train_keypoint_lists, query_keypoint_lists,
+ good_matches_list):
+ homography_list = []
+ matches_mask_list = []
+ for i in range(len(train_keypoint_lists)):
+ good_matches = good_matches_list[i]
+ if len(good_matches) < MIN_MATCH_COUNT:
+ print("Not enough matches are for model ", i, ": ",
+ len(good_matches), " out of needed #: ", MIN_MATCH_COUNT)
+ homography_list.append([])
+ matches_mask_list.append([])
+ continue
+
+ print("Got good number of matches for model %d: %d (needed only %d)" %
+ (i, len(good_matches), MIN_MATCH_COUNT))
+ # Extract and bundle keypoint locations for computations
+ src_pts = np.float32([
+ train_keypoint_lists[i][m.trainIdx].pt for m in good_matches
+ ]).reshape(-1, 1, 2)
+ dst_pts = np.float32([
+ query_keypoint_lists[QUERY_INDEX][m.queryIdx].pt
+ for m in good_matches
+ ]).reshape(-1, 1, 2)
+ H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
+ matches_mask = mask.ravel().tolist()
+ homography_list.append(H)
+ matches_mask_list.append(matches_mask)
+
+ return homography_list, matches_mask_list
+
+
+# Helper function to display images
+# Shows side-by-side panel with query on left, training on right
+# connects keypoints between two and draws target on query
+# Also shows image with query unwarped (to match training image) and target pt
+def show_results(training_images, train_keypoint_lists, query_images,
+ query_keypoint_lists, target_point_list, good_matches_list):
+ print("Showing results for ", len(training_images), " training images")
+
+ homography_list, matches_mask_list = compute_homographies(
+ train_keypoint_lists, query_keypoint_lists, good_matches_list)
+ for i in range(len(train_keypoint_lists)):
+ good_matches = good_matches_list[i]
+ if len(good_matches) < MIN_MATCH_COUNT:
+ continue
+ print("Showing results for model ", i)
+ matches_mask_count = matches_mask_list[i].count(1)
+ if matches_mask_count != len(good_matches):
+ print("Homography rejected some matches! From ",
+ len(good_matches), ", only ", matches_mask_count,
+ " were used")
+
+ if matches_mask_count < MIN_MATCH_COUNT:
+ print(
+ "Skipping match because homography rejected matches down to below ",
+ MIN_MATCH_COUNT)
+ continue
+
+ # Create a box based on the training image to map onto the query img
+ h, w, ch = training_images[i].shape
+ H = homography_list[i]
+ train_box_pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1],
+ [w - 1, 0]]).reshape(-1, 1, 2)
+ query_box_pts = cv2.perspectiveTransform(train_box_pts, H)
+
+ # Figure out where the training target goes on the query img
+ transformed_target = cv2.perspectiveTransform(target_point_list[i], H)
+ # Ballpark the size of the circle so it looks right on image
+ radius = int(
+ 32 * abs(H[0][0] + H[1][1]) / 2) # Average of scale factors
+ # We're only using one query image at this point
+ query_image = query_images[QUERY_INDEX].copy()
+
+ # Draw training box and target points on the query image
+ query_image = cv2.polylines(query_image, [np.int32(query_box_pts)],
+ True, (0, 255, 0), 3, cv2.LINE_AA)
+ query_image = cv2.circle(
+ query_image,
+ (transformed_target.flatten()[0], transformed_target.flatten()[1]),
+ radius, (0, 255, 0), 3)
+
+ # Draw the matches and show it
+ draw_params = dict(
+ matchColor=(0, 255, 0), # draw matches in green color
+ singlePointColor=None,
+ matchesMask=matches_mask_list[i], # draw only inliers
+ flags=2)
+
+ img3 = cv2.drawMatches(query_image, query_keypoint_lists[QUERY_INDEX],
+ training_images[i], train_keypoint_lists[i],
+ good_matches_list[i], None, **draw_params)
+ print("Drawing matches for model ", i,
+ ". Query on left, Training image on right")
+ cv2.imshow('Matches', img3), cv2.waitKey()
+
+ # Next, unwarp the query image so it looks like the training view
+ H_inv = np.linalg.inv(H)
+ query_image_warp = cv2.warpPerspective(query_image, H_inv, (w, h))
+ print("Showing unwarped query image for model ", i)
+ cv2.imshow('Unwarped Image', query_image_warp), cv2.waitKey()
+
+ # Go ahead and return these, for use elsewhere
+ return homography_list, matches_mask_list
+
+
+# Helper function to display keypoints
+# Input: image
+# keypoint_list: List of opencv keypoints
+def show_keypoints(image, keypoint_list):
+ ret_img = image.copy()
+ ret_img = cv2.drawKeypoints(ret_img, keypoint_list, ret_img)
+ cv2.imshow("Keypoints", ret_img)
+ cv2.waitKey(0)
+ return ret_img
diff --git a/y2020/vision/tools/python_code/transform_projection_test.py b/y2020/vision/tools/python_code/transform_projection_test.py
new file mode 100644
index 0000000..b8d94fd
--- /dev/null
+++ b/y2020/vision/tools/python_code/transform_projection_test.py
@@ -0,0 +1,111 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+
+import field_display
+
+# Assume image is 640x480 (VGA)
+num_pixels_x = 640
+num_pixels_y = 480
+
+# Camera center is 320, 240
+c_x = num_pixels_x / 2
+c_y = num_pixels_y / 2
+
+# Horiz. FOV is 54 degrees
+FOV_h = 54 * math.pi / 180.0 # (in radians)
+# Vert FOV is horizontal FOV scaled by aspect ratio (through tan function)
+FOV_v = math.atan(c_y / c_x * math.tan(FOV_h / 2.)) * 2
+
+# Assuming square pixels
+# focal length is (640/2)/tan(FOV_h/2)
+f_x = c_x / (math.tan(FOV_h / 2.))
+f_y = c_y / (math.tan(FOV_v / 2.))
+
+# Height of camera on robot above ground
+cam_above_ground = 0.5
+
+# TODO: Should fix f_y entry below.
+cam_mat = np.array([[f_x, 0, c_x], [0, f_y, c_y], [0, 0, 1.]])
+
+# Use default distortion (i.e., none)
+distortion_coeffs = np.zeros((5, 1))
+
+depth = 5.0 # meters
+
+R_w_tarj_mat = np.array([[0., 0., 1.], [-1, 0, 0], [0, -1., 0]])
+R_w_tarj, jac = cv2.Rodrigues(R_w_tarj_mat)
+T_w_tarj = np.array([[15.98 - depth, -4.10 + 2.36, cam_above_ground]]).T
+
+img_ret = field_display.plot_bot_on_field(None, (0, 255, 0), T_w_tarj)
+#field_display.show_field(img_ret)
+
+# Create fake set of points relative to camera capture (target) frame
+# at +/- 1 meter in x, 5 meter depth, and every 1 meter in y from 0 to 4m (above the camera, so negative y values)
+pts_3d_T_t = np.array([[-1., 0., depth], [1., 0., depth], [-1., -1., depth], [
+ 1., -1., depth
+], [-1., -2., depth], [0., -2., depth], [1., -2., depth], [-1., -3., depth],
+ [1., -3., depth], [-1., -4., depth], [1., -4., depth]])
+
+# Ground truth shift of camera, to compute projections
+R_tarj_ci_gt = np.array([[0., 0.2, 0.2]]).T
+
+R_tarj_ci_gt_mat, jac = cv2.Rodrigues(R_tarj_ci_gt)
+
+T_tarj_ci_gt = np.array([[1., 2., -5.]]).T
+
+# To transform vectors, we apply the inverse transformation
+R_tarj_ci_gt_mat_inv = R_tarj_ci_gt_mat.T
+T_tarj_ci_gt_inv = -R_tarj_ci_gt_mat_inv.dot(T_tarj_ci_gt)
+
+#pts_3d_T_t_shifted = (R_tarj_ci_gt_mat_inv.dot(pts_3d_T_t.T) + T_tarj_ci_gt_inv).T
+
+# Now project from new position
+# This was the manual way to do it-- use cv2.projectPoints instead
+#pts_proj_3d = cam_mat.dot(pts_3d_T_t_shifted.T).T
+#pts_proj_2d = np.divide(pts_proj_3d[:,0:2],(pts_proj_3d[:,2].reshape(-1,1)))
+
+pts_proj_2d_ci, jac_2d = cv2.projectPoints(pts_3d_T_t, R_tarj_ci_gt_mat_inv,
+ T_tarj_ci_gt_inv, cam_mat,
+ distortion_coeffs)
+#print(pts_proj_2d_ci)
+
+# Now, solve for the pose using the original 3d points (pts_3d_T_t) and the projections from the new location
+retval, R_ci_tarj_est, T_ci_tarj_est, inliers = cv2.solvePnPRansac(
+ pts_3d_T_t, pts_proj_2d_ci, cam_mat, distortion_coeffs)
+
+# This is the pose from camera to original target spot. We need to invert to get back to the pose we want
+R_tarj_ci_est_mat = cv2.Rodrigues(R_ci_tarj_est)[0].T
+T_tarj_ci_est = -R_tarj_ci_est_mat.dot(T_ci_tarj_est)
+R_tarj_ci_est = cv2.Rodrigues(R_tarj_ci_est_mat)[0]
+
+print("Check:\n", "Rot error:\n", R_tarj_ci_gt - R_tarj_ci_est,
+ "\nTrans error:\n", T_tarj_ci_gt - T_tarj_ci_est,
+ "\nError is < 0.001 in R & T: ",
+ np.linalg.norm(R_tarj_ci_gt - R_tarj_ci_est) < 0.001,
+ np.linalg.norm(T_tarj_ci_gt - T_tarj_ci_est) < 0.001)
+
+# Compute camera location in world coordinates
+R_w_ci, T_w_ci, d1, d2, d3, d4, d5, d6, d7, d8 = cv2.composeRT(
+ R_tarj_ci_est, T_tarj_ci_est, R_w_tarj, T_w_tarj)
+
+print("Estimate in world coordinates")
+print("R, T:\n", R_w_ci, "\n", T_w_ci)
+img_ret = field_display.plot_bot_on_field(img_ret, (0, 255, 255), T_w_ci)
+field_display.show_field(img_ret)
+
+# Compute vector to target
+# TODO: Likely better to do this from the homography, rather than the pose estimate...
+
+T_w_target_pt = np.array([[15.98, -4.10 + 2.36, 2.0 - cam_above_ground]]).T
+vector_to_target = T_w_target_pt - T_w_ci
+d_ci_target = np.linalg.norm(vector_to_target)
+phi_ci_target = math.atan2(vector_to_target[1][0], vector_to_target[0][0])
+print("Vector to target (from cam frame):\n", vector_to_target,
+ "\nDistance to target: ", d_ci_target, "\nAngle to target (deg): ",
+ phi_ci_target * 180. / math.pi)
+
+img_ret = field_display.plot_line_on_field(img_ret, (255, 255, 0), T_w_ci,
+ T_w_target_pt)
+field_display.show_field(img_ret)
diff --git a/y2020/vision/tools/python_code/usb_camera_stream.py b/y2020/vision/tools/python_code/usb_camera_stream.py
new file mode 100644
index 0000000..42cb1f1
--- /dev/null
+++ b/y2020/vision/tools/python_code/usb_camera_stream.py
@@ -0,0 +1,25 @@
+import cv2
+# Open the device at the ID X for /dev/videoX
+cap = cv2.VideoCapture(2)
+
+#Check whether user selected camera is opened successfully.
+if not (cap.isOpened()):
+ print("Could not open video device")
+ quit()
+
+while (True):
+ # Capture frame-by-frame
+ ret, frame = cap.read()
+
+ exp = cap.get(cv2.CAP_PROP_EXPOSURE)
+ print("Exposure:", exp)
+ # Display the resulting frame
+ cv2.imshow('preview', frame)
+
+ #Waits for a user input to quit the application
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+# When everything done, release the capture
+cap.release()
+cv2.destroyAllWindows()