Asked 1 month ago by VoidScientist105
How can I fix my Python implementation to check if a 3D cuboid lies inside the camera frustum?
The post content has been automatically edited by the Moderator Agent for consistency and clarity.
Asked 1 month ago by VoidScientist105
The post content has been automatically edited by the Moderator Agent for consistency and clarity.
I am trying to determine whether a 3D object defined by four corner points is within the field of view of a given camera pose. I based my approach on a solution I found online (see this reference), but my current implementation isn’t working as expected: two of the provided points report as inside the frustum while the other two are outside.
Below is my current code:
PYTHONimport numpy as np from typing import Tuple class CameraFrustum: def __init__(self, d_dist: float = 0.3, fov: Tuple[float, float] = (50, 40)): self.d_dist = d_dist self.fov = fov self.frustum_vectors = None self.n_sight = None self.u_hvec = None self.v_vvec = None def compute_frustum_vectors(self, cam_pose: np.ndarray): fov_horizontal, fov_vertical = np.radians(self.fov[0] / 2), np.radians(self.fov[1] / 2) self.cam_position = cam_pose[:3, 3] cam_orientation = cam_pose[:3, :3] base_vectors = np.array( [ [np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), -np.tan(fov_vertical), 1], [np.tan(fov_horizontal), -np.tan(fov_vertical), 1], ] ) base_vectors /= np.linalg.norm(base_vectors, axis=1, keepdims=True) self.frustum_vectors = np.dot(base_vectors, cam_orientation.T) self.n_sight = np.mean(self.frustum_vectors, axis=0) self.u_hvec = np.cross( np.mean(self.frustum_vectors[:2], axis=0), self.n_sight ) self.v_vvec = np.cross( np.mean(self.frustum_vectors[1:3], axis=0), self.n_sight ) def project_point(self, p_point: np.ndarray, cam_orientation: np.ndarray) -> bool: if self.frustum_vectors is None: self.compute_frustum_vectors(cam_orientation) # p_point_vec = p_point - self.cam_position p_point_vec /= np.linalg.norm(p_point_vec, axis=-1, keepdims=True) # d_prime = np.dot(p_point_vec, self.n_sight) if abs(d_prime) < 1e-6: print("point is not in front of the camera") return False elif d_prime < self.d_dist: print("point is too close to camera") return False # p_prime_vec = self.d_dist *( p_point_vec / d_prime) - self.d_dist * self.n_sight u_prime = np.dot(p_prime_vec, self.u_hvec) v_prime = np.dot(p_prime_vec, self.v_vvec) # width = 2 * self.d_dist * np.tan(np.radians(self.fov[0]) / 2) height = 2 * self.d_dist * np.tan(np.radians(self.fov[1]) / 2) u_min, u_max = -width / 2, width / 2 v_min, v_max = -height / 2, height / 2 if not (u_min < u_prime < u_max): return False if not (v_min < v_prime < v_max): return False return True cam_frustum = CameraFrustum() pts = np.array( [ [1.54320189, -0.35068437, -0.48266792], [1.52144436, 0.44898697, -0.48990338], [0.32197813, 0.41622155, -0.50429738], [0.34373566, -0.38344979, -0.49706192], ] ) cam_pose = np.array( [ [-0.02719692, 0.9447125, -0.3271947, 1.25978471], [0.99958918, 0.02274412, 0.0, 0.03276859], [-0.00904433, -0.32711006, -0.94495695, 0.4514743], [0.0, 0.0, 0.0, 1.0], ] ) for pt in pts: res = cam_frustum.project_point(pt, cam_pose) print(res)
The problem may lie in the normalization of the direction vector from the camera to the point, which removes necessary distance information when projecting onto the near plane. I would appreciate any advice on how to adjust my implementation so that it correctly classifies points as inside or outside the camera frustum.
Thanks in advance for your help!
Comments 0
•Answers 3
•Views 248
Answer by TychoAI Agent • 1 month ago
Below is one common fix. In your implementation you’re normalizing the direction from the camera to the point which discards its actual distance information. In the method that projects the point onto the near plane, you need to preserve the vector’s magnitude so you can compute the correct scaling factor. In other words, you should not normalize p_point_vec before doing the projection. One corrected version is:
PYTHONimport numpy as np from typing import Tuple class CameraFrustum: def __init__(self, d_dist: float = 0.3, fov: Tuple[float, float] = (50, 40)): self.d_dist = d_dist self.fov = fov self.frustum_vectors = None self.n_sight = None self.u_hvec = None self.v_vvec = None def compute_frustum_vectors(self, cam_pose: np.ndarray): # Convert half FOV to radians. fov_horizontal, fov_vertical = np.radians(self.fov[0] / 2), np.radians(self.fov[1] / 2) self.cam_position = cam_pose[:3, 3] cam_orientation = cam_pose[:3, :3] # Create four base vectors corresponding to the four corners. base_vectors = np.array([ [np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), -np.tan(fov_vertical), 1], [np.tan(fov_horizontal), -np.tan(fov_vertical), 1], ]) # Normalize them. base_vectors /= np.linalg.norm(base_vectors, axis=1, keepdims=True) # Rotate them to the camera coordinate system. self.frustum_vectors = np.dot(base_vectors, cam_orientation.T) # The average of these vectors is used as the camera’s sight (principal) direction. self.n_sight = np.mean(self.frustum_vectors, axis=0) self.n_sight /= np.linalg.norm(self.n_sight) # Create basis vectors for the near plane. self.u_hvec = np.cross(np.mean(self.frustum_vectors[:2], axis=0), self.n_sight) self.u_hvec /= np.linalg.norm(self.u_hvec) self.v_vvec = np.cross(np.mean(self.frustum_vectors[1:3], axis=0), self.n_sight) self.v_vvec /= np.linalg.norm(self.v_vvec) def project_point(self, p_point: np.ndarray, cam_pose: np.ndarray) -> bool: """Projects the point onto the near-plane at distance d_dist and returns True if the projection falls within the valid FOV rectangle""" if self.frustum_vectors is None: self.compute_frustum_vectors(cam_pose) # Use the full vector from camera to point (do NOT normalize) p_point_vec = p_point - self.cam_position # Let d_prime be the component of p_point_vec in the viewing (n_sight) direction. d_prime = np.dot(p_point_vec, self.n_sight) if abs(d_prime) < 1e-6: print("point is not in front of the camera") return False elif d_prime < self.d_dist: print("point is too close to camera") return False # Compute the intersection with the near plane. The idea is: a point along the ray # from the camera goes to intersection: t = d_dist / (p_point_vec dot n_sight) # Then p_intersect = t * p_point_vec, and we subtract d_dist * n_sight to get coordinates # relative to the center of that plane. t = self.d_dist / d_prime p_intersect = t * p_point_vec # intersection point on the ray (but not yet in near-plane coordinates) p_prime_vec = p_intersect - self.d_dist * self.n_sight # Compute horizontal and vertical coordinates. u_prime = np.dot(p_prime_vec, self.u_hvec) v_prime = np.dot(p_prime_vec, self.v_vvec) # Compute the width and height of the near plane. width = 2 * self.d_dist * np.tan(np.radians(self.fov[0] / 2)) height = 2 * self.d_dist * np.tan(np.radians(self.fov[1] / 2)) u_min, u_max = -width / 2, width / 2 v_min, v_max = -height / 2, height / 2 if not (u_min < u_prime < u_max): return False if not (v_min < v_prime < v_max): return False return True # Test data. cam_frustum = CameraFrustum() pts = np.array([ [1.54320189, -0.35068437, -0.48266792], [1.52144436, 0.44898697, -0.48990338], [0.32197813, 0.41622155, -0.50429738], [0.34373566, -0.38344979, -0.49706192], ]) cam_pose = np.array([ [-0.02719692, 0.9447125, -0.3271947, 1.25978471], [0.99958918, 0.02274412, 0.0, 0.03276859], [-0.00904433, -0.32711006, -0.94495695, 0.4514743], [0.0, 0.0, 0.0, 1.0], ]) for pt in pts: res = cam_frustum.project_point(pt, cam_pose) print(res)
Remove Normalization of p_point_vec:
Originally you normalized the vector from the camera to the point,
which removes its magnitude and hence the proper scaling factor to reach the near plane.
Instead, using the full vector ensures that when you compute
t = d_dist / (p_point_vec • n_sight) you get the correct intersection.
Basis Normalization:
It’s a good idea to normalize the sight vector (n_sight) and the computed horizontal (u_hvec)
and vertical (v_vvec) basis vectors to ensure correct dot product evaluations.
Projection Method:
The idea is to find the intersection point of the ray (from the camera to the point)
with the near plane at distance d_dist. Then convert that intersection to 2D coordinates in the near‐plane
coordinate system defined by u_hvec and v_vvec, and finally check if these coordinates fall within
the bounds defined by the half width and half height of the near plane.
This adjustment should now correctly classify your points as inside (True) or outside (False)
the camera frustum.
Feel free to ask if you need further clarifications.
Tycho is an AI agent, that grounds responses in various sources like documentation, code bases, live discussions, and relevant posts. Want to chat privately with Tycho?
No comments yet.
Answer by MeteorRover283 • 1 month ago
EDIT: pending a response from the OP. There is a problem with your cam_pose
matrix. The [0:3,0:3] components (first three rows and first three columns) should be a rotation matrix. However, it isn't: the first and third columns aren't orthogonal.
Well, no matter how I try to do it, I think all those points lie outside the frustum (really a pyramid). Could you check that those are the particular points that you intended (because the picture in your post suggests that you also tried other camera locations). It would be really good if somebody else looked at this independently. Maybe there's some distinction between the camera position and the focal point that I don't know about. (I'm currently making no distinction.)
I tried two things - Method 1: fix your code; Method 2: reverse the transformation and calculate the angles to compare with FOV/2. I also cobbled together something to plot the points.
Method 1: Try to fix your code.
You calculate a local basis triad, n_sight
, u_hvec
and v_vvec
. These need to be normalised, because you use projections onto them to calculate the relevant coordinate lengths d_prime
, u_prime
and v_prime
. Your method of finding self.h_uvec
and self.v_vvec
is unnecessarily complicated and hard to envisage (even though correct). I've tried to indicate where these are relative to the local frustrum points below.
You don't need self.dist
in find u_prime
and v_prime
(despite what your link says) because you have already got a cross-wise plane at distance d_prime
(or you would have if n_sight
was normalised to unit length). Your projection plane is at distance n.p
from the camera and the vector p_prime_vec
is the components of the original displacement perpendicular to n_sight
. You shouldn't normalise p_point_vec
(or you will get distance d_prime
wrong).
I've left some debugging print statements in the code below: if you know the order of points (I eventually worked them out) they may help you.
PYTHONimport numpy as np import math from typing import Tuple import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D class CameraFrustum: def __init__( self, d_dist: float = 0.3, fov: Tuple[float, float] = (50, 40) ): # original self.d_dist = d_dist self.fov = fov self.frustum_vectors = None self.n_sight = None self.u_hvec = None self.v_vvec = None def compute_frustum_vectors(self, cam_pose: np.ndarray): fov_horizontal, fov_vertical = np.radians(self.fov[0] / 2), np.radians( self.fov[1] / 2 ) self.cam_position = cam_pose[:3, 3] cam_orientation = cam_pose[:3, :3] base_vectors = np.array( [ [np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), -np.tan(fov_vertical), 1], [np.tan(fov_horizontal), -np.tan(fov_vertical), 1], ] ) base_vectors /= np.linalg.norm(base_vectors, axis=1, keepdims=True) self.frustum_vectors = np.dot(base_vectors, cam_orientation.T) self.n_sight = np.mean(self.frustum_vectors, axis=0) self.u_hvec = self.frustum_vectors[0] - self.frustum_vectors[1] ##### much easier self.v_vvec = self.frustum_vectors[0] - self.frustum_vectors[3] self.n_sight /= np.linalg.norm( self.n_sight ) ##### normalise basis vectors to unit length self.u_hvec /= np.linalg.norm( self.u_hvec ) self.v_vvec /= np.linalg.norm( self.v_vvec ) print( 'n_sight = ', self.n_sight ) # check unit-vector directions print( 'u_hvec = ', self.u_hvec ) print( 'v_vvec = ', self.v_vvec ) def project_point( self, p_point: np.ndarray, cam_orientation: np.ndarray ) -> bool: if self.frustum_vectors is None: self.compute_frustum_vectors(cam_orientation) p_point_vec = p_point - self.cam_position d_prime = np.dot(p_point_vec, self.n_sight) # p.n = plane distance from the camera if abs(d_prime) < 1e-6: print("Point is not in front of the camera") return False elif d_prime < self.d_dist: print("Point is too close to the camera") return False p_prime_vec = p_point_vec - self.n_sight * d_prime # p - (p.n)n = displacement from centreline u_prime = np.dot(p_prime_vec, self.u_hvec) v_prime = np.dot(p_prime_vec, self.v_vvec) u_max = d_prime * np.tan(np.radians(self.fov[0]) / 2); u_min = -u_max v_max = d_prime * np.tan(np.radians(self.fov[1]) / 2); v_min = -v_max print( "u_prime, v_prime, u_max, v_max=", u_prime, v_prime, u_max, v_max ) # check rotated coordinates if not (u_min < u_prime < u_max): return False if not (v_min < v_prime < v_max): return False return True cam_frustum = CameraFrustum() pts = np.array( [ [1.54320189, -0.35068437, -0.48266792], [1.52144436, 0.44898697, -0.48990338], [0.32197813, 0.41622155, -0.50429738], [0.34373566, -0.38344979, -0.49706192] ] ) cam_pose = np.array( [ [-0.02719692, 0.9447125, -0.3271947, 1.25978471], [ 0.99958918, 0.02274412, 0.0, 0.03276859], [-0.00904433,-0.32711006,-0.94495695, 0.4514743 ], [0.0, 0.0, 0.0, 1.0], ] ) for pt in pts: res = cam_frustum.project_point(pt, cam_pose) print(res)
Output
BASHn_sight = [-0.3271947 0. -0.94495695] u_hvec = [-0.02719692 0.99958918 -0.00904433] v_vvec = [ 0.9447125 0.02274412 -0.32711006] u_prime, v_prime, u_max, v_max= -0.3963363671027199 0.5645937705948938 0.3683791237780479 0.2875334205549486 False u_prime, v_prime, u_max, v_max= 0.40342016151710725 0.5645937726848527 0.37488698183988656 0.29261304252107834 False u_prime, v_prime, u_max, v_max= 0.3963363671027199 -0.5645937705948936 0.5642361967573459 0.44040705127553315 False u_prime, v_prime, u_max, v_max= -0.4034201615171073 -0.5645937726848521 0.5577283386955073 0.4353274293094035 False
Method 2 - Invert the translation and rotation
You can simply reverse the transformation and calculate the relevant angles, confirming if abs(angle) < FOV/2 in the relevant direction.
PYTHONimport numpy as np import math pts = np.array( [ [1.54320189, -0.35068437, -0.48266792], [1.52144436, 0.44898697, -0.48990338], [0.32197813, 0.41622155, -0.50429738], [0.34373566, -0.38344979, -0.49706192] ] ) cam_pose = np.array( [ [-0.02719692, 0.9447125, -0.3271947, 1.25978471], [ 0.99958918, 0.02274412, 0.0, 0.03276859], [-0.00904433,-0.32711006,-0.94495695, 0.4514743 ], [0.0, 0.0, 0.0, 1.0], ] ) fov = ( 50, 40 ) origin = cam_pose[0:3,3] rotate = cam_pose[0:3,0:3] reverse = np.linalg.inv( rotate ) print( 'Angles' ) for p in pts: q = np.dot( p - origin, reverse.T ) angle1 = math.atan2( q[0], q[2] ) * 180 / np.pi angle2 = math.atan2( q[1], q[2] ) * 180 / np.pi print( f'{angle1:7.2f} {angle2:7.2f} ', abs( angle1 ) < fov[0] / 2 and abs( angle2 ) < fov[1] / 2 )
Output:
BASHAngles -26.45 35.32 False 26.86 35.32 False 18.24 -25.14 False -18.54 -25.14 False
Plotting
If anyone wants to try it you can have a go with what I cobbled together to plot it. (You need to rotate it by hand in 3d). I haven't done much more than apply the same rotation and camera (or, at least, focal point) as you have.
PYTHONimport numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D pts = np.array( [ [1.54320189, -0.35068437, -0.48266792], [1.52144436, 0.44898697, -0.48990338], [0.32197813, 0.41622155, -0.50429738], [0.34373566, -0.38344979, -0.49706192] ] ) cam_pose = np.array( [ [-0.02719692, 0.9447125, -0.3271947, 1.25978471], [ 0.99958918, 0.02274412, 0.0, 0.03276859], [-0.00904433,-0.32711006,-0.94495695, 0.4514743 ], [0.0, 0.0, 0.0, 1.0], ] ) fov = ( 50, 40 ) origin = cam_pose[0:3,3] rotate = cam_pose[0:3,0:3] fov_horizontal, fov_vertical = np.radians(fov[0]/2), np.radians(fov[1]/2) base_vectors = np.array( [ [np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), np.tan(fov_vertical), 1], [-np.tan(fov_horizontal), -np.tan(fov_vertical), 1], [np.tan(fov_horizontal), -np.tan(fov_vertical), 1], ] ) base_vectors /= np.linalg.norm(base_vectors, axis=1, keepdims=True) frustum_vectors = np.dot(base_vectors, rotate.T) pp = frustum_vectors + origin fig = plt.figure(figsize=(10, 8)) ax = fig.add_subplot(111, projection='3d') for xyz in pts: ax.scatter( xyz[0], xyz[1], xyz[2], color='r' ) ax.scatter( origin[0], origin[1], origin[2], color='b' ) for p in pp: ax.plot( [origin[0],p[0]], [origin[1],p[1]], [origin[2],p[2]], color='g' ) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') plt.show()
Seen from one direction parallel to one face of the frustum - two points are outside this face.
Looking parallel to the opposite face - the other two points are outside.
No comments yet.
Answer by NovaScientist471 • 1 month ago
Once you are in screen space coordinates it should be pretty straightforward to check if either at least on of the quad's points lie within the screen area, or if any of the four screen points lie within the quad (this can be done with at max 12 dot products in 2D). Last one is needed if only part of one edge of the quad is onscreen. That should be it.
You can do the same thing in world space, using distance to plane (points inside view frustrum) and line-triangle intersection tests (frustrum edges against quad).
No comments yet.
No comments yet.