Merge "Adding python version of vision code to estimate pose"
diff --git a/y2020/vision/tools/python_code/field_display.py b/y2020/vision/tools/python_code/field_display.py
new file mode 100644
index 0000000..a3f1024
--- /dev/null
+++ b/y2020/vision/tools/python_code/field_display.py
@@ -0,0 +1,143 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+
+
+def load_field_image():
+ field_image = cv2.imread('test_images/field_cropped_scaled.png')
+ return field_image
+
+
+def show_field(img_in=None):
+ if (img_in is None):
+ img_in = load_field_image
+
+ cv2.imshow('Field', img_in), cv2.waitKey()
+ return
+
+
+# Convert field coordinates (meters) to image coordinates (pixels).
+# Field origin is (x,y) at center of red alliance driver station wall,
+# with x pointing into the field.
+# The default field image is 1598 x 821 pixels, so 1 cm per pixel on field
+# I.e., Field is 1598 x 821 pixels = 15.98 x 8.21 meters
+# Our origin in image coordinates is at (1598, 410) pixels, facing in the -x image direction
+
+
+def field_coord_to_image_coord(robot_position):
+ # Scale by 100 pixel / cm
+ robot_pos_img_coord = np.array([[1598, 410]]).T + np.int32(
+ 100.0 * np.array([[-robot_position[0][0], robot_position[1][0]]]).T)
+ return robot_pos_img_coord
+
+
+# Given a field image, plot the robot and an optional heading vector on it
+def plot_bot_on_field(img_in, color, robot_position, robot_heading=None):
+ if img_in is None:
+ img_out = load_field_image()
+ else:
+ img_out = img_in.copy()
+
+ robot_pos_img_coord = field_coord_to_image_coord(robot_position)
+
+ ROBOT_RADIUS = 10
+ img_out = cv2.circle(
+ img_out, (robot_pos_img_coord[0][0], robot_pos_img_coord[1][0]),
+ ROBOT_RADIUS, color, 3)
+
+ if (robot_heading is not None):
+ img_out = cv2.line(
+ img_out, (robot_pos_img_coord[0][0], robot_pos_img_coord[1][0]),
+ (int(robot_pos_img_coord[0][0] -
+ 3 * ROBOT_RADIUS * math.cos(robot_heading)),
+ int(robot_pos_img_coord[1][0] +
+ 3 * ROBOT_RADIUS * math.sin(robot_heading))), color, 3,
+ cv2.LINE_AA)
+
+ return img_out
+
+
+# Plot a line on the field, given starting and ending field positions
+def plot_line_on_field(img_in, color, start_position, end_position):
+ if img_in is None:
+ img_out = load_field_image()
+ else:
+ img_out = img_in.copy()
+
+ start_pos_img_coord = field_coord_to_image_coord(start_position)
+ end_pos_img_coord = field_coord_to_image_coord(end_position)
+
+ img_out = cv2.line(img_out,
+ (start_pos_img_coord[0][0], start_pos_img_coord[1][0]),
+ (end_pos_img_coord[0][0], end_pos_img_coord[1][0]),
+ color, 3, cv2.LINE_AA)
+
+ return img_out
+
+
+# Helper function to plot a few quantities like
+# Heading (angle of the target relative to the robot)
+# Distance (of the target to the robot)
+# Skew (angle of the target normal relative to the robot)
+def plot_camera_to_target(img_in, color, heading, distance, skew):
+ if img_in is None:
+ img_out = np.zeros((821, 1598, 3), np.uint8)
+ else:
+ img_out = img_in
+
+ # Bot location is at origin
+ bot_location = np.array([[0., 0.]]).T
+
+ # Target location is distance away along the heading
+ target_location = bot_location + np.array(
+ [[distance * math.cos(heading), distance * math.sin(heading)]]).T
+
+ # Create part of a line from the target location to the end of the target, based on the skew
+ skew_line_offset = np.array(
+ [[math.cos(skew + math.pi / 2),
+ math.sin(skew + math.pi / 2)]]).T
+
+ # Plot bot origin
+ img_out = plot_bot_on_field(img_out, color, bot_location)
+
+ # Plot heading line
+ img_out = plot_line_on_field(img_out, (255, 255, 0), bot_location,
+ target_location)
+
+ # Plot target location
+ img_out = plot_bot_on_field(img_out, (255, 0, 0), target_location)
+
+ # Plot target rotation
+ img_out = plot_line_on_field(img_out, (0, 255, 0),
+ target_location - skew_line_offset,
+ target_location + skew_line_offset)
+
+ return img_out
+
+
+# Helper function to take camera rotation and bring it back to world coordinate frame
+def camera_rot_to_world(R_mat):
+ R_c_w_mat = np.array([[0., 0., 1.], [-1, 0, 0], [0, -1., 0]])
+
+ return (R_mat).dot(R_c_w_mat.T)
+
+
+def camera_rot_to_world_Rodrigues(R):
+ R_mat = cv2.Rodrigues(R)[0]
+ R_mat_world = camera_rot_to_world(R_mat)
+ R_world = cv2.Rodrigues(R_mat_world)[0]
+ return R_world
+
+
+def invert_pose_Rodrigues(R, T):
+ R_inv_mat = cv2.Rodrigues(R)[0].T
+ T_inv = -R_inv_mat.dot(T)
+ R_inv = cv2.Rodrigues(R_inv_mat)[0]
+ return R_inv, T_inv
+
+
+def invert_pose_mat(R, T):
+ R_inv = R.T
+ T_inv = -R_inv.dot(T)
+ return R_inv, T_inv
diff --git a/y2020/vision/tools/python_code/image_match_test.py b/y2020/vision/tools/python_code/image_match_test.py
new file mode 100644
index 0000000..bcf610f
--- /dev/null
+++ b/y2020/vision/tools/python_code/image_match_test.py
@@ -0,0 +1,307 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+import time
+
+import field_display
+import train_and_match as tam
+
+### DEFINITIONS
+
+# List of images to train on
+training_image_names = [
+ 'test_images/train_power_port_blue.png',
+ # Using webcam captured image for testing-- best to use only one of the two
+ # training images for the power_port_red
+ 'test_images/train_power_port_red_webcam.png',
+ # 'test_images/train_power_port_red.png',
+ 'test_images/train_loading_bay_blue.png',
+ 'test_images/train_loading_bay_red.png'
+]
+
+# Target points on the image -- needs to match training images-- one per image
+# These are manually captured by examining the images, and entering the pixel
+# values from the target center for each image.
+# These are currently only used for visualization of the target
+target_pt_list = [
+ np.float32([[393, 147]]).reshape(-1, 1, 2), # train_power_port_blue.png
+ np.float32([[305, 97]]).reshape(-1, 1, 2), #train_power_port_red_webcam.png
+ # np.float32([[570,192]]).reshape(-1,1,2), # train_power_port_red.png
+ np.float32([[366, 236]]).reshape(-1, 1, 2), # train_loading_bay_blue.png
+ np.float32([[366, 236]]).reshape(-1, 1, 2), # train_loading_bay_red.png
+]
+
+# Put list of all possible images we want to test, and then we'll select one
+# Note, if you query on the same image as training, only that image will match
+query_image_names = [
+ 'test_images/train_power_port_red.png', #0
+ 'test_images/train_power_port_blue.png', #1
+ 'test_images/test_game_manual.png', #2
+ 'test_images/test_train_shift_left_100.png', #3
+ 'test_images/test_train_shift_right_100.png', #4
+ 'test_images/test_train_down_2x.png', #5
+ 'test_images/test_train_down_4x.png', #6
+ 'test_images/test_raspi3_sample.jpg', #7
+ 'test_images/test_VR_sample1.png', #8
+ 'test_images/train_loading_bay_blue.png', #9
+ 'test_images/train_loading_bay_red.png'
+] #10
+
+training_image_index = 0
+# TODO: Should add argParser here to select this
+query_image_index = 0 # Use -1 to use camera capture; otherwise index above list
+
+##### Let's get to work!
+
+# Load the training and query images
+training_images = tam.load_images(training_image_names)
+
+# Create feature extractor
+feature_extractor = tam.load_feature_extractor()
+
+# Extract keypoints and descriptors for model
+train_keypoint_lists, train_descriptor_lists = tam.detect_and_compute(
+ feature_extractor, training_images)
+
+# # Create the matcher. This only needs to be created once
+ts = time.monotonic() # Do some basic timing
+matcher = tam.train_matcher(train_descriptor_lists)
+tf = time.monotonic()
+print("Done training matcher, took ", tf - ts, " secs")
+for i in range(len(train_keypoint_lists)):
+ print("Model ", i, " has ", len(train_keypoint_lists[i]), " features: ")
+
+# Load the query image in. Based on index in our list, or using camera if index is -1
+query_images = []
+
+if (query_image_index is -1):
+ # Based on /dev/videoX setting
+ CAMERA_INDEX = 2
+ print("#### Using camera at /dev/video%d" % CAMERA_INDEX)
+ # Open the device at the ID X for /dev/videoX
+ cap = cv2.VideoCapture(CAMERA_INDEX)
+
+ # Check whether user selected camera is opened successfully.
+ if not (cap.isOpened()):
+ print("Could not open video device")
+ quit()
+
+ # Capture frame-by-frame
+ ret, frame = cap.read()
+ query_images.append(frame)
+
+else:
+ # Load default images
+ query_images = tam.load_images([query_image_names[query_image_index]])
+
+# For storing out pose to measure noise
+log_file = open("pose.out", 'w')
+
+looping = True
+# Grab images until 'q' is pressed (or just once for canned images)
+while (looping):
+ if (query_image_index is -1):
+ # Capture frame-by-frame
+ ret, frame = cap.read()
+ query_images[0] = frame
+ else:
+ looping = False
+
+ # Extract features from query image
+ query_keypoint_lists, query_descriptor_lists = tam.detect_and_compute(
+ feature_extractor, query_images)
+ print("Query image has ", len(query_keypoint_lists[0]), " features")
+
+ ts = time.monotonic()
+ good_matches_list = tam.compute_matches(matcher, train_descriptor_lists,
+ query_descriptor_lists)
+ tf = time.monotonic()
+ print("Done finding matches (took ", tf - ts, " secs)")
+ for i in range(len(train_keypoint_lists)):
+ print("Model ", i, " has # good matches: ", len(good_matches_list[i]))
+
+ # TEMP: Use first list for what follows
+ # TODO: Should look for best match after homography and display that one
+ # OR: show results on all good matches
+ good_matches = good_matches_list[0]
+
+ # Next, find homography (map between training and query images)
+ homography_list, matches_mask_list = tam.compute_homographies(
+ train_keypoint_lists, query_keypoint_lists, good_matches_list)
+
+ if matches_mask_list[0].count(1) < tam.MIN_MATCH_COUNT:
+ print(
+ "Not continuing pose calc because not enough good matches after homography-- only had ",
+ matches_mask_list[0].count(1))
+ continue
+
+ # Next, let's go to compute pose
+ # I've chosen some coordinate frames to help track things. Using indices:
+ # w: world coordinate frame (at center wall of driver station)
+ # tarj: camera pose when jth target (e.g., power panel) was captured
+ # ci: ith camera
+ # b: body frame of robot (robot location)
+ #
+ # And, T for translation, R for rotation, "est" for estimated
+ # So, T_w_b_est is the estimated translation from world to body coords
+ #
+ # A few notes:
+ # -- CV uses rotated frame, where z points out of camera, y points down
+ # -- This causes a lot of mapping around of points, but overall OK
+
+ ### TODO: Still need to clean this rest up
+
+ # TODO: Need to generalize this for each target-- need to have 3D global locations of each target's keypoints
+ field_length = 15.98
+ target_center_offset_y = 1.67
+ target_center_height = 2.49
+ if training_image_index is 0:
+ # webcam
+ fx = 810.
+ fy = 810.
+ cx = 320.
+ cy = 240.
+ cy_above_ground = 1. # meters above ground
+ depth = 4.57 # meters
+ target_center_offset_y = 1.67
+ elif training_image_index is 1:
+ # Made up for screenshot from manual
+ cx = int(1165 / 2)
+ cy_above_ground = -0.5856 # meters above ground
+ cy = 776 / 2
+ fx = 1143 # pixels (or 0.00160m or 1143 pixels?)
+ depth = 4.57 # meters
+ elif training_image_index is 2:
+ cx = 732 / 2
+ cy_above_ground = 0.454 # meters above ground
+ cy = 436 / 2
+ target_width = 5 * 12 * 0.0254 # width in ft * in/ft * m/in
+ target_height = target_width * cy / cx # width in ft * in/ft * m/in * cy / cx
+ FOV_x = 54 * math.pi / 180. # camera FOV in radians (from 54 degree lens)
+ fx = cx / math.tan(FOV_x / 2) # pixels
+ depth = (target_height / 2) / math.tan(FOV_x / 2) # meters
+
+ src_pts_3d = []
+ for m in good_matches:
+ # Project the matches all back to 3D, assuming they're all at the same depth
+ # Add in the distance across the field and any lateral offset of the target
+ src_pts_3d.append([(
+ field_length,
+ -(train_keypoint_lists[training_image_index][m.trainIdx].pt[0] - cx
+ ) * depth / fx - target_center_offset_y,
+ -(train_keypoint_lists[training_image_index][m.trainIdx].pt[1] - cy
+ ) * depth / fx + cy_above_ground)])
+
+ # Reshape 3d point list to work with computations to follow
+ src_pts_3d_array = np.asarray(np.float32(src_pts_3d)).reshape(-1, 3, 1)
+
+ cam_mat = np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+ distortion_coeffs = np.zeros((5, 1))
+
+ # Create list of matching query point locations
+ dst_pts = np.float32([
+ query_keypoint_lists[0][m.queryIdx].pt for m in good_matches
+ ]).reshape(-1, 1, 2)
+
+ ts = time.monotonic()
+ # TODO: May want to supply it with estimated guess as starting point
+ # Find offset from camera to original location of camera relative to target
+ retval, R_ci_w_estj, T_ci_w_estj, inliers = cv2.solvePnPRansac(
+ src_pts_3d_array, dst_pts, cam_mat, distortion_coeffs)
+
+ tf = time.monotonic()
+ print("Solving Pose took ", tf - ts, " secs")
+ print("Found ", len(inliers), " inliers, out of ", len(dst_pts),
+ " matched points")
+ ### THIS is the estimate of the robot on the field!
+ R_w_ci_estj, T_w_ci_estj = field_display.invert_pose_Rodrigues(
+ R_ci_w_estj, T_ci_w_estj)
+ print("Pose from PnP is:\n", R_ci_w_estj, "\n", T_ci_w_estj)
+ # The rotation estimate is for camera with z pointing forwards.
+ # Compute the rotation as if x is forward
+ R_w_ci_estj_robot = field_display.camera_rot_to_world_Rodrigues(
+ R_w_ci_estj)
+ print("Pose in world coords is:\n", R_w_ci_estj, "\n", T_w_ci_estj,
+ "\nWith robot coord frame rotation as \n", R_w_ci_estj_robot)
+
+ # Use the rotational component about the z axis to define the heading
+ heading_est = R_w_ci_estj_robot[2][0]
+ # Plot location of the robot, along with heading
+ img_ret = field_display.plot_bot_on_field(None, (0, 255, 255), T_w_ci_estj,
+ heading_est)
+
+ # Compute vector to target
+ T_w_target_pt = np.array(
+ [[field_length, -target_center_offset_y, target_center_height]]).T
+ vector_to_target = T_w_target_pt - T_w_ci_estj
+ # Compute distance to target (full 3D)
+ distance_to_target = np.linalg.norm(vector_to_target)
+ # Compute distance to target (x,y)
+ distance_to_target_ground = np.linalg.norm(vector_to_target[0:2])
+ #print("Distance comparison: all: ", distance_to_target, " vs. ground: ", distance_to_target_ground)
+
+ angle_to_target_abs = math.atan2(vector_to_target[1][0],
+ vector_to_target[0][0])
+ angle_to_target_robot = angle_to_target_abs - heading_est
+ img_ret = field_display.plot_line_on_field(img_ret, (255, 255, 0),
+ T_w_ci_estj, T_w_target_pt)
+ # THESE ARE OUR ESTIMATES OF HEADING, DISTANCE, AND SKEW TO TARGET
+ log_file.write('%lf, %lf, %lf\n' % (heading_est, distance_to_target_ground,
+ angle_to_target_robot))
+
+ # A bunch of code to visualize things...
+ #
+ # Draw target on the image
+ h, w, ch = query_images[0].shape
+ query_image_copy = query_images[0].copy()
+ # Map target_pt onto image, for display
+ transformed_target = cv2.perspectiveTransform(target_pt_list[0],
+ homography_list[0])
+ # Ballpark the size of the circle so it looks right on image
+ radius = int(32 * abs(homography_list[0][0][0] + homography_list[0][1][1])
+ / 2) # Average of scale factors
+ query_image_copy = cv2.circle(
+ query_image_copy,
+ (transformed_target.flatten()[0], transformed_target.flatten()[1]),
+ radius, (0, 255, 0), 3)
+
+ # If we're querying static images, show full results
+ if (query_image_index is not -1):
+ homography_list, matches_mask_list = tam.show_results(
+ training_images, train_keypoint_lists, query_images,
+ query_keypoint_lists, target_pt_list, good_matches_list)
+
+ # Create empty image the size of the field
+ img_heading = field_display.load_field_image()
+ img_heading[:, :, :] = 0
+ f_h, f_w, f_ch = img_heading.shape
+
+ # Create heading view, and paste it to bottom of field
+ img_heading = field_display.plot_camera_to_target(
+ img_heading, (0, 255, 0), heading_est, distance_to_target_ground,
+ angle_to_target_robot)
+ vis = np.concatenate((img_ret, img_heading), axis=0)
+
+ # Paste query image to right of other views (scale to keep aspect ratio)
+ img_query_scaled = cv2.resize(query_image_copy,
+ (int(2 * w * f_h / h), 2 * f_h))
+ vis = np.concatenate((vis, img_query_scaled), axis=1)
+
+ # Scale down to make it fit on screen
+ vis = cv2.resize(vis, (int(vis.shape[1] / 4), int(vis.shape[0] / 4)))
+ cv2.imshow('field_display', vis)
+
+ #Waits for a user input to quit the application
+ pause_time = 0
+ if (query_image_index is -1):
+ pause_time = 1
+ if cv2.waitKey(pause_time) & 0xFF == ord('q'):
+ break
+
+# Done. Clean things up
+if (query_image_index is -1):
+ # When everything done, release the capture
+ cap.release()
+
+cv2.destroyAllWindows()
diff --git a/y2020/vision/tools/python_code/test_images/field_cropped_scaled.png b/y2020/vision/tools/python_code/test_images/field_cropped_scaled.png
new file mode 100644
index 0000000..c971c19
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/field_cropped_scaled.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_VR_sample1.png b/y2020/vision/tools/python_code/test_images/test_VR_sample1.png
new file mode 100644
index 0000000..86495b2
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_VR_sample1.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_game_manual.png b/y2020/vision/tools/python_code/test_images/test_game_manual.png
new file mode 100644
index 0000000..825f1c3
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_game_manual.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_raspi3_sample.jpg b/y2020/vision/tools/python_code/test_images/test_raspi3_sample.jpg
new file mode 100644
index 0000000..376149c
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_raspi3_sample.jpg
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_down_2x.png b/y2020/vision/tools/python_code/test_images/test_train_down_2x.png
new file mode 100644
index 0000000..b004c51
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_down_2x.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_down_4x.png b/y2020/vision/tools/python_code/test_images/test_train_down_4x.png
new file mode 100644
index 0000000..e217280
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_down_4x.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_shift_left_100.png b/y2020/vision/tools/python_code/test_images/test_train_shift_left_100.png
new file mode 100644
index 0000000..42165f8
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_shift_left_100.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/test_train_shift_right_100.png b/y2020/vision/tools/python_code/test_images/test_train_shift_right_100.png
new file mode 100644
index 0000000..9e7c274
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/test_train_shift_right_100.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_loading_bay.xcf b/y2020/vision/tools/python_code/test_images/train_loading_bay.xcf
new file mode 100644
index 0000000..6bc62f5
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_loading_bay.xcf
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_loading_bay_blue.png b/y2020/vision/tools/python_code/test_images/train_loading_bay_blue.png
new file mode 100644
index 0000000..c3c0aea
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_loading_bay_blue.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_loading_bay_red.png b/y2020/vision/tools/python_code/test_images/train_loading_bay_red.png
new file mode 100644
index 0000000..42091a6
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_loading_bay_red.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_blue.png b/y2020/vision/tools/python_code/test_images/train_power_port_blue.png
new file mode 100644
index 0000000..6ade26f
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_blue.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_red.png b/y2020/vision/tools/python_code/test_images/train_power_port_red.png
new file mode 100644
index 0000000..9d2f0bf
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_red.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_red.xcf b/y2020/vision/tools/python_code/test_images/train_power_port_red.xcf
new file mode 100644
index 0000000..ad6bed1
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_red.xcf
Binary files differ
diff --git a/y2020/vision/tools/python_code/test_images/train_power_port_red_webcam.png b/y2020/vision/tools/python_code/test_images/train_power_port_red_webcam.png
new file mode 100644
index 0000000..6115348
--- /dev/null
+++ b/y2020/vision/tools/python_code/test_images/train_power_port_red_webcam.png
Binary files differ
diff --git a/y2020/vision/tools/python_code/train_and_match.py b/y2020/vision/tools/python_code/train_and_match.py
new file mode 100644
index 0000000..8fed4c3
--- /dev/null
+++ b/y2020/vision/tools/python_code/train_and_match.py
@@ -0,0 +1,291 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+#import tkinter
+# pip install pillow
+#import PIL.Image, PIL.ImageTk
+import time
+
+import field_display
+
+### DEFINITIONS
+MIN_MATCH_COUNT = 10 # 10 is min; more gives better matches
+FEATURE_EXTRACTOR_NAME = 'SIFT'
+QUERY_INDEX = 0 # We use a list for both training and query info, but only ever have one query item
+
+
+# Calculates rotation matrix to euler angles
+# The result is the same as MATLAB except the order
+# of the euler angles ( x and z are swapped ).
+# Input: R: numpy 3x3 rotation matrix
+# Output: numpy 3x1 vector of Euler angles (x,y,z)
+def rotationMatrixToEulerAngles(R):
+ sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
+
+ singular = sy < 1e-6
+ if not singular:
+ x = math.atan2(R[2, 1], R[2, 2])
+ y = math.atan2(-R[2, 0], sy)
+ z = math.atan2(R[1, 0], R[0, 0])
+ else:
+ x = math.atan2(-R[1, 2], R[1, 1])
+ y = math.atan2(-R[2, 0], sy)
+ z = 0
+
+ return np.array([x, y, z])
+
+
+# Load images based on list of image names
+# Return list containing loaded images
+def load_images(image_names):
+ image_list = []
+ for im in image_names:
+ # Load image (in color; let opencv convert to B&W for features)
+ img_data = cv2.imread(im)
+ if img_data is None:
+ print("Failed to load image: ", im)
+ exit()
+ else:
+ image_list.append(img_data)
+
+ return image_list
+
+
+# Load feature extractor based on extractor name
+# Returns feature extractor object
+def load_feature_extractor():
+ if FEATURE_EXTRACTOR_NAME is 'SIFT':
+ # Initiate SIFT detector
+ feature_extractor = cv2.xfeatures2d.SIFT_create()
+ elif FEATURE_EXTRACTOR_NAME is 'SURF':
+ # Initiate SURF detector
+ feature_extractor = cv2.xfeatures2d.SURF_create()
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ # Initiate ORB detector
+ feature_extractor = cv2.ORB_create()
+
+ return feature_extractor
+
+
+# Run feature detector and compute feature descriptions on image_list
+# Input: feature_extractor object
+# image_list: list containing images
+# Return: Lists of keypoints and lists of feature descriptors, one for each image
+def detect_and_compute(feature_extractor, image_list):
+ descriptor_lists = []
+ keypoint_lists = []
+ if (FEATURE_EXTRACTOR_NAME in ('SIFT', 'SURF')):
+ for i in range(len(image_list)):
+ # find the keypoints and descriptors with SIFT
+ kp, des = feature_extractor.detectAndCompute(image_list[i], None)
+ descriptor_lists.append(des)
+ keypoint_lists.append(kp)
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ # TODO: Check whether ORB extractor can do detectAndCompute.
+ # If so, we don't need to have this branch for ORB
+ for i in range(len(image_list)):
+ # find the keypoints and descriptors with ORB
+ kp = feature_extractor.detect(image_list[i], None)
+ keypoint_lists.append(kp)
+ des = feature_extractor.compute(image_list[i], None)
+ descriptor_lists.append(des)
+
+ return keypoint_lists, descriptor_lists
+
+
+# Given a keypoint descriptor list, create a matcher
+# Input: descriptor_lists: List of descriptors, one for each training image
+# Returns: a keypoint matcher trained on all descriptors, indexed by image
+def train_matcher(descriptor_lists):
+ if (FEATURE_EXTRACTOR_NAME in ('SIFT', 'SURF')):
+ # Use FLANN KD tree for SIFT & SURF
+ FLANN_INDEX_KDTREE = 0
+ index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
+ search_params = dict(checks=50)
+
+ matcher = cv2.FlannBasedMatcher(index_params, search_params)
+ matcher.add(descriptor_lists)
+ matcher.train()
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ # Use FLANN LSH for ORB
+ FLANN_INDEX_LSH = 6
+ index_params = dict(
+ algorithm=FLANN_INDEX_LSH,
+ table_number=6, # 12
+ key_size=12, # 20
+ multi_probe_level=2) #2
+ search_params = dict(checks=50)
+ matcher = cv2.FlannBasedMatcher(index_params, search_params)
+ # Other option: BFMatcher with default params
+ # NOTE: I think Brute Force matcher can only do 1 match at a time,
+ # rather than loading keypoints from all model images
+ #matcher = cv2.BFMatcher()
+
+ return matcher
+
+
+# Given a matcher and the query descriptors (and for ORB the training list),
+# Compute the best matches, filtering using the David Lowe magic ratio of 0.7
+# Return list of good match lists (one set of good matches for each training image for SIFT/SURF)
+def compute_matches(matcher, train_descriptor_lists, query_descriptor_lists):
+
+ # We'll create a match list for each of the training images
+ good_matches_list = []
+ if (FEATURE_EXTRACTOR_NAME in ('SIFT', 'SURF')):
+ # We're just doing one query at a time, so grab the first from the list
+ desc_query = query_descriptor_lists[QUERY_INDEX]
+ matches = matcher.knnMatch(desc_query, k=2)
+
+ for train_index in range(len(train_descriptor_lists)):
+ # store all the good matches as per Lowe's ratio test.
+ # (Lowe originally proposed 0.7 ratio, but 0.75 was later proposed as a better option. We'll go with the more conservative (fewer, better matches) for now)
+ good_matches = []
+ for m, n in matches:
+ if m.distance < 0.7 * n.distance and m.imgIdx is train_index:
+ good_matches.append(m)
+
+ good_matches_list.append(good_matches)
+
+ elif FEATURE_EXTRACTOR_NAME is 'ORB':
+ matches = matcher.knnMatch(train_keypoint_lists[0], desc_query, k=2)
+ print(matches)
+ good_matches = []
+ for m in matches:
+ if m:
+ if len(m) == 2:
+ print(m[0].distance, m[1].distance)
+ if m[0].distance < 0.7 * m[1].distance:
+ good_matches.append(m[0])
+ print(m[0].distance)
+
+ good_matches_list.append(good_matches)
+
+ return good_matches_list
+
+
+# Given a set of training keypoints lists, and a query keypoint list,
+# and the good matches for each training<->query match, returns the
+# homography for each pair and a mask list of matchces considered good
+# by the homography
+# Inputs: train_keypoint_lists: List of keypoints lists from training images
+# query_keypoint_lists: Keypoint list (only 1) from query image
+# good_matches_list: List of matches for each training image -> query
+# Returns: homography_list: List of each homography between train->query
+# Returns [] if not enough matches / bad homography
+# matches_mask_list: Mask list of matches kept by homography for each
+# set of matches
+def compute_homographies(train_keypoint_lists, query_keypoint_lists,
+ good_matches_list):
+ homography_list = []
+ matches_mask_list = []
+ for i in range(len(train_keypoint_lists)):
+ good_matches = good_matches_list[i]
+ if len(good_matches) < MIN_MATCH_COUNT:
+ print("Not enough matches are for model ", i, ": ",
+ len(good_matches), " out of needed #: ", MIN_MATCH_COUNT)
+ homography_list.append([])
+ matches_mask_list.append([])
+ continue
+
+ print("Got good number of matches for model %d: %d (needed only %d)" %
+ (i, len(good_matches), MIN_MATCH_COUNT))
+ # Extract and bundle keypoint locations for computations
+ src_pts = np.float32([
+ train_keypoint_lists[i][m.trainIdx].pt for m in good_matches
+ ]).reshape(-1, 1, 2)
+ dst_pts = np.float32([
+ query_keypoint_lists[QUERY_INDEX][m.queryIdx].pt
+ for m in good_matches
+ ]).reshape(-1, 1, 2)
+ H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
+ matches_mask = mask.ravel().tolist()
+ homography_list.append(H)
+ matches_mask_list.append(matches_mask)
+
+ return homography_list, matches_mask_list
+
+
+# Helper function to display images
+# Shows side-by-side panel with query on left, training on right
+# connects keypoints between two and draws target on query
+# Also shows image with query unwarped (to match training image) and target pt
+def show_results(training_images, train_keypoint_lists, query_images,
+ query_keypoint_lists, target_point_list, good_matches_list):
+ print("Showing results for ", len(training_images), " training images")
+
+ homography_list, matches_mask_list = compute_homographies(
+ train_keypoint_lists, query_keypoint_lists, good_matches_list)
+ for i in range(len(train_keypoint_lists)):
+ good_matches = good_matches_list[i]
+ if len(good_matches) < MIN_MATCH_COUNT:
+ continue
+ print("Showing results for model ", i)
+ matches_mask_count = matches_mask_list[i].count(1)
+ if matches_mask_count != len(good_matches):
+ print("Homography rejected some matches! From ",
+ len(good_matches), ", only ", matches_mask_count,
+ " were used")
+
+ if matches_mask_count < MIN_MATCH_COUNT:
+ print(
+ "Skipping match because homography rejected matches down to below ",
+ MIN_MATCH_COUNT)
+ continue
+
+ # Create a box based on the training image to map onto the query img
+ h, w, ch = training_images[i].shape
+ H = homography_list[i]
+ train_box_pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1],
+ [w - 1, 0]]).reshape(-1, 1, 2)
+ query_box_pts = cv2.perspectiveTransform(train_box_pts, H)
+
+ # Figure out where the training target goes on the query img
+ transformed_target = cv2.perspectiveTransform(target_point_list[i], H)
+ # Ballpark the size of the circle so it looks right on image
+ radius = int(
+ 32 * abs(H[0][0] + H[1][1]) / 2) # Average of scale factors
+ # We're only using one query image at this point
+ query_image = query_images[QUERY_INDEX].copy()
+
+ # Draw training box and target points on the query image
+ query_image = cv2.polylines(query_image, [np.int32(query_box_pts)],
+ True, (0, 255, 0), 3, cv2.LINE_AA)
+ query_image = cv2.circle(
+ query_image,
+ (transformed_target.flatten()[0], transformed_target.flatten()[1]),
+ radius, (0, 255, 0), 3)
+
+ # Draw the matches and show it
+ draw_params = dict(
+ matchColor=(0, 255, 0), # draw matches in green color
+ singlePointColor=None,
+ matchesMask=matches_mask_list[i], # draw only inliers
+ flags=2)
+
+ img3 = cv2.drawMatches(query_image, query_keypoint_lists[QUERY_INDEX],
+ training_images[i], train_keypoint_lists[i],
+ good_matches_list[i], None, **draw_params)
+ print("Drawing matches for model ", i,
+ ". Query on left, Training image on right")
+ cv2.imshow('Matches', img3), cv2.waitKey()
+
+ # Next, unwarp the query image so it looks like the training view
+ H_inv = np.linalg.inv(H)
+ query_image_warp = cv2.warpPerspective(query_image, H_inv, (w, h))
+ print("Showing unwarped query image for model ", i)
+ cv2.imshow('Unwarped Image', query_image_warp), cv2.waitKey()
+
+ # Go ahead and return these, for use elsewhere
+ return homography_list, matches_mask_list
+
+
+# Helper function to display keypoints
+# Input: image
+# keypoint_list: List of opencv keypoints
+def show_keypoints(image, keypoint_list):
+ ret_img = image.copy()
+ ret_img = cv2.drawKeypoints(ret_img, keypoint_list, ret_img)
+ cv2.imshow("Keypoints", ret_img)
+ cv2.waitKey(0)
+ return ret_img
diff --git a/y2020/vision/tools/python_code/transform_projection_test.py b/y2020/vision/tools/python_code/transform_projection_test.py
new file mode 100644
index 0000000..b8d94fd
--- /dev/null
+++ b/y2020/vision/tools/python_code/transform_projection_test.py
@@ -0,0 +1,111 @@
+import cv2
+import math
+from matplotlib import pyplot as plt
+import numpy as np
+
+import field_display
+
+# Assume image is 640x480 (VGA)
+num_pixels_x = 640
+num_pixels_y = 480
+
+# Camera center is 320, 240
+c_x = num_pixels_x / 2
+c_y = num_pixels_y / 2
+
+# Horiz. FOV is 54 degrees
+FOV_h = 54 * math.pi / 180.0 # (in radians)
+# Vert FOV is horizontal FOV scaled by aspect ratio (through tan function)
+FOV_v = math.atan(c_y / c_x * math.tan(FOV_h / 2.)) * 2
+
+# Assuming square pixels
+# focal length is (640/2)/tan(FOV_h/2)
+f_x = c_x / (math.tan(FOV_h / 2.))
+f_y = c_y / (math.tan(FOV_v / 2.))
+
+# Height of camera on robot above ground
+cam_above_ground = 0.5
+
+# TODO: Should fix f_y entry below.
+cam_mat = np.array([[f_x, 0, c_x], [0, f_y, c_y], [0, 0, 1.]])
+
+# Use default distortion (i.e., none)
+distortion_coeffs = np.zeros((5, 1))
+
+depth = 5.0 # meters
+
+R_w_tarj_mat = np.array([[0., 0., 1.], [-1, 0, 0], [0, -1., 0]])
+R_w_tarj, jac = cv2.Rodrigues(R_w_tarj_mat)
+T_w_tarj = np.array([[15.98 - depth, -4.10 + 2.36, cam_above_ground]]).T
+
+img_ret = field_display.plot_bot_on_field(None, (0, 255, 0), T_w_tarj)
+#field_display.show_field(img_ret)
+
+# Create fake set of points relative to camera capture (target) frame
+# at +/- 1 meter in x, 5 meter depth, and every 1 meter in y from 0 to 4m (above the camera, so negative y values)
+pts_3d_T_t = np.array([[-1., 0., depth], [1., 0., depth], [-1., -1., depth], [
+ 1., -1., depth
+], [-1., -2., depth], [0., -2., depth], [1., -2., depth], [-1., -3., depth],
+ [1., -3., depth], [-1., -4., depth], [1., -4., depth]])
+
+# Ground truth shift of camera, to compute projections
+R_tarj_ci_gt = np.array([[0., 0.2, 0.2]]).T
+
+R_tarj_ci_gt_mat, jac = cv2.Rodrigues(R_tarj_ci_gt)
+
+T_tarj_ci_gt = np.array([[1., 2., -5.]]).T
+
+# To transform vectors, we apply the inverse transformation
+R_tarj_ci_gt_mat_inv = R_tarj_ci_gt_mat.T
+T_tarj_ci_gt_inv = -R_tarj_ci_gt_mat_inv.dot(T_tarj_ci_gt)
+
+#pts_3d_T_t_shifted = (R_tarj_ci_gt_mat_inv.dot(pts_3d_T_t.T) + T_tarj_ci_gt_inv).T
+
+# Now project from new position
+# This was the manual way to do it-- use cv2.projectPoints instead
+#pts_proj_3d = cam_mat.dot(pts_3d_T_t_shifted.T).T
+#pts_proj_2d = np.divide(pts_proj_3d[:,0:2],(pts_proj_3d[:,2].reshape(-1,1)))
+
+pts_proj_2d_ci, jac_2d = cv2.projectPoints(pts_3d_T_t, R_tarj_ci_gt_mat_inv,
+ T_tarj_ci_gt_inv, cam_mat,
+ distortion_coeffs)
+#print(pts_proj_2d_ci)
+
+# Now, solve for the pose using the original 3d points (pts_3d_T_t) and the projections from the new location
+retval, R_ci_tarj_est, T_ci_tarj_est, inliers = cv2.solvePnPRansac(
+ pts_3d_T_t, pts_proj_2d_ci, cam_mat, distortion_coeffs)
+
+# This is the pose from camera to original target spot. We need to invert to get back to the pose we want
+R_tarj_ci_est_mat = cv2.Rodrigues(R_ci_tarj_est)[0].T
+T_tarj_ci_est = -R_tarj_ci_est_mat.dot(T_ci_tarj_est)
+R_tarj_ci_est = cv2.Rodrigues(R_tarj_ci_est_mat)[0]
+
+print("Check:\n", "Rot error:\n", R_tarj_ci_gt - R_tarj_ci_est,
+ "\nTrans error:\n", T_tarj_ci_gt - T_tarj_ci_est,
+ "\nError is < 0.001 in R & T: ",
+ np.linalg.norm(R_tarj_ci_gt - R_tarj_ci_est) < 0.001,
+ np.linalg.norm(T_tarj_ci_gt - T_tarj_ci_est) < 0.001)
+
+# Compute camera location in world coordinates
+R_w_ci, T_w_ci, d1, d2, d3, d4, d5, d6, d7, d8 = cv2.composeRT(
+ R_tarj_ci_est, T_tarj_ci_est, R_w_tarj, T_w_tarj)
+
+print("Estimate in world coordinates")
+print("R, T:\n", R_w_ci, "\n", T_w_ci)
+img_ret = field_display.plot_bot_on_field(img_ret, (0, 255, 255), T_w_ci)
+field_display.show_field(img_ret)
+
+# Compute vector to target
+# TODO: Likely better to do this from the homography, rather than the pose estimate...
+
+T_w_target_pt = np.array([[15.98, -4.10 + 2.36, 2.0 - cam_above_ground]]).T
+vector_to_target = T_w_target_pt - T_w_ci
+d_ci_target = np.linalg.norm(vector_to_target)
+phi_ci_target = math.atan2(vector_to_target[1][0], vector_to_target[0][0])
+print("Vector to target (from cam frame):\n", vector_to_target,
+ "\nDistance to target: ", d_ci_target, "\nAngle to target (deg): ",
+ phi_ci_target * 180. / math.pi)
+
+img_ret = field_display.plot_line_on_field(img_ret, (255, 255, 0), T_w_ci,
+ T_w_target_pt)
+field_display.show_field(img_ret)
diff --git a/y2020/vision/tools/python_code/usb_camera_stream.py b/y2020/vision/tools/python_code/usb_camera_stream.py
new file mode 100644
index 0000000..42cb1f1
--- /dev/null
+++ b/y2020/vision/tools/python_code/usb_camera_stream.py
@@ -0,0 +1,25 @@
+import cv2
+# Open the device at the ID X for /dev/videoX
+cap = cv2.VideoCapture(2)
+
+#Check whether user selected camera is opened successfully.
+if not (cap.isOpened()):
+ print("Could not open video device")
+ quit()
+
+while (True):
+ # Capture frame-by-frame
+ ret, frame = cap.read()
+
+ exp = cap.get(cv2.CAP_PROP_EXPOSURE)
+ print("Exposure:", exp)
+ # Display the resulting frame
+ cv2.imshow('preview', frame)
+
+ #Waits for a user input to quit the application
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+# When everything done, release the capture
+cap.release()
+cv2.destroyAllWindows()