🌟 特征提取魔法指南
🎨 在图像处理的世界里,特征提取就像是寻找图像的"指纹",让我们能够识别和理解图像的独特性。让我们一起来探索这些神奇的特征提取术吧!
📚 目录
- 基础概念 - 特征的"体检"
- Harris角点 - 图像的"关节"
- SIFT特征 - 图像的"全身体检"
- SURF特征 - 图像的"快速体检"
- ORB特征 - 图像的"经济体检"
- 特征匹配 - 图像的"认亲"
- 性能优化 - "体检"的加速器
- 实战应用 - "体检"的实践
1. 什么是特征提取?
特征提取就像是给图像做"体检",主要目的是:
- 🔍 发现图像中的关键信息
- 🎯 提取有意义的特征
- 🛠️ 降低数据维度
- 📊 提高识别效率
常见的特征包括:
- 角点特征(图像的"关节")
- SIFT特征(图像的"指纹")
- SURF特征(图像的"快速指纹")
- ORB特征(图像的"经济指纹")
2. Harris角点检测
2.1 基本原理
角点检测就像是寻找图像中的"关节",这些点通常具有以下特点:
- 在两个方向上都有明显变化
- 对旋转和光照变化不敏感
- 具有局部唯一性
数学表达式:
Harris角点检测的响应函数:
R = det ( M ) − k ⋅ trace ( M ) 2 R = \det(M) - k \cdot \text{trace}(M)^2 R=det(M)−k⋅trace(M)2
其中:
- M M M 是自相关矩阵
- k k k 是经验常数(通常取0.04-0.06)
- det ( M ) \det(M) det(M) 是矩阵的行列式
- trace ( M ) \text{trace}(M) trace(M) 是矩阵的迹
2.2 手动实现
C++实现
void harris_corner_detection(const Mat& src, Mat& dst,
double k = 0.04, int blockSize = 3) {
CV_Assert(!src.empty() && src.channels() == 1);
// 计算梯度
Mat dx, dy;
Sobel(src, dx, CV_32F, 1, 0, 3);
Sobel(src, dy, CV_32F, 0, 1, 3);
// 计算自相关矩阵的元素
Mat dx2 = dx.mul(dx);
Mat dy2 = dy.mul(dy);
Mat dxdy = dx.mul(dy);
// 计算响应函数
dst.create(src.size(), CV_32F);
for (int y = 0; y < src.rows; y++) {
for (int x = 0; x < src.cols; x++) {
float a = 0, b = 0, c = 0;
for (int i = -blockSize/2; i <= blockSize/2; i++) {
for (int j = -blockSize/2; j <= blockSize/2; j++) {
int ny = y + i;
int nx = x + j;
if (ny >= 0 && ny < src.rows && nx >= 0 && nx < src.cols) {
a += dx2.at<float>(ny, nx);
b += dxdy.at<float>(ny, nx);
c += dy2.at<float>(ny, nx);
}
}
}
float det = a * c - b * b;
float trace = a + c;
dst.at<float>(y, x) = det - k * trace * trace;
}
}
}
Python实现
def harris_corner_detection_manual(image, k=0.04, block_size=3):
"""手动实现Harris角点检测
参数:
image: 输入灰度图像
k: Harris角点检测参数
block_size: 邻域大小
"""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
// 计算梯度
dx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
dy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
// 计算自相关矩阵的元素
dx2 = dx * dx
dy2 = dy * dy
dxdy = dx * dy
// 计算响应函数
height, width = gray.shape
response = np.zeros((height, width), dtype=np.float32)
offset = block_size // 2
for y in range(offset, height - offset):
for x in range(offset, width - offset):
// 计算局部窗口内的自相关矩阵
a = np.sum(dx2[y-offset:y+offset+1, x-offset:x+offset+1])
b = np.sum(dxdy[y-offset:y+offset+1, x-offset:x+offset+1])
c = np.sum(dy2[y-offset:y+offset+1, x-offset:x+offset+1])
// 计算响应值
det = a * c - b * b
trace = a + c
response[y, x] = det - k * trace * trace
return response
3. SIFT特征
3.1 基本原理
SIFT(Scale-Invariant Feature Transform)就像是图像的"全身体检",不管图像怎么变化(旋转、缩放),都能找到稳定的特征点。
主要步骤:
-
尺度空间构建(多角度检查):
L ( x , y , σ ) = G ( x , y , σ ) ∗ I ( x , y ) L(x,y,\sigma) = G(x,y,\sigma) * I(x,y) L(x,y,σ)=G(x,y,σ)∗I(x,y)
其中:- G ( x , y , σ ) G(x,y,\sigma) G(x,y,σ) 是高斯核
- I ( x , y ) I(x,y) I(x,y) 是输入图像
- σ \sigma σ 是尺度参数
-
关键点定位(找到重点):
D ( x , y , σ ) = L ( x , y , k σ ) − L ( x , y , σ ) D(x,y,\sigma) = L(x,y,k\sigma) - L(x,y,\sigma) D(x,y,σ)=L(x,y,kσ)−L(x,y,σ) -
方向分配(确定朝向):
- 计算梯度方向直方图
- 选择主方向
3.2 手动实现
C++实现
void sift_features(const Mat& src, vector<KeyPoint>& keypoints,
Mat& descriptors, int nfeatures = 0) {
CV_Assert(!src.empty());
// 构建高斯金字塔
vector<Mat> gaussian_pyramid;
buildGaussianPyramid(src, gaussian_pyramid, 4);
// 构建DOG金字塔
vector<Mat> dog_pyramid;
buildDoGPyramid(gaussian_pyramid, dog_pyramid);
// 检测关键点
detectKeypoints(dog_pyramid, keypoints);
// 计算描述子
computeDescriptors(gaussian_pyramid, keypoints, descriptors);
}
void buildGaussianPyramid(const Mat& src, vector<Mat>& pyramid, int nOctaves) {
pyramid.clear();
Mat current = src.clone();
for (int i = 0; i < nOctaves; i++) {
// 存储当前八度的图像
pyramid.push_back(current);
// 下采样
Mat down;
pyrDown(current, down);
current = down;
}
}
void buildDoGPyramid(const vector<Mat>& gaussian_pyramid,
vector<Mat>& dog_pyramid) {
dog_pyramid.clear();
for (size_t i = 0; i < gaussian_pyramid.size() - 1; i++) {
Mat dog;
subtract(gaussian_pyramid[i + 1], gaussian_pyramid[i], dog);
dog_pyramid.push_back(dog);
}
}
void detectKeypoints(const vector<Mat>& dog_pyramid,
vector<KeyPoint>& keypoints) {
keypoints.clear();
for (size_t i = 1; i < dog_pyramid.size() - 1; i++) {
const Mat& prev = dog_pyramid[i - 1];
const Mat& curr = dog_pyramid[i];
const Mat& next = dog_pyramid[i + 1];
for (int y = 1; y < curr.rows - 1; y++) {
for (int x = 1; x < curr.cols - 1; x++) {
// 检查当前点是否为极值点
float val = curr.at<float>(y, x);
bool is_max = true;
bool is_min = true;
// 检查3x3x3邻域
for (int dy = -1; dy <= 1 && (is_max || is_min); dy++) {
for (int dx = -1; dx <= 1 && (is_max || is_min); dx++) {
// 检查相邻层
if (val <= prev.at<float>(y + dy, x + dx)) is_max = false;
if (val >= prev.at<float>(y + dy, x + dx)) is_min = false;
if (val <= next.at<float>(y + dy, x + dx)) is_max = false;
if (val >= next.at<float>(y + dy, x + dx)) is_min = false;
// 检查当前层
if (dx != 0 || dy != 0) {
if (val <= curr.at<float>(y + dy, x + dx)) is_max = false;
if (val >= curr.at<float>(y + dy, x + dx)) is_min = false;
}
}
}
if (is_max || is_min) {
KeyPoint kp(x, y, 1.6 * pow(2, i/3.0));
keypoints.push_back(kp);
}
}
}
}
}
void computeDescriptors(const vector<Mat>& gaussian_pyramid,
const vector<KeyPoint>& keypoints,
Mat& descriptors) {
descriptors.create(keypoints.size(), 128, CV_32F);
for (size_t i = 0; i < keypoints.size(); i++) {
const KeyPoint& kp = keypoints[i];
float* desc = descriptors.ptr<float>(i);
// 计算梯度方向直方图
Mat mag, angle;
const Mat& img = gaussian_pyramid[kp.octave];
Sobel(img, mag, CV_32F, 1, 0);
Sobel(img, angle, CV_32F, 0, 1);
// 计算描述子
for (int y = -4; y < 4; y++) {
for (int x = -4; x < 4; x++) {
int px = kp.pt.x + x;
int py = kp.pt.y + y;
if (px >= 0 && px < img.cols && py >= 0 && py < img.rows) {
float m = mag.at<float>(py, px);
float a = angle.at<float>(py, px);
// 将梯度方向量化为8个方向
int bin = (int)(a * 8 / (2 * CV_PI)) % 8;
desc[bin] += m;
}
}
}
// 归一化描述子
float norm = 0;
for (int j = 0; j < 128; j++) {
norm += desc[j] * desc[j];
}
norm = sqrt(norm);
for (int j = 0; j < 128; j++) {
desc[j] /= norm;
}
}
}
Python实现
def sift_features_manual(image, n_features=0):
"""手动实现SIFT特征提取
参数:
image: 输入图像
n_features: 期望的特征点数量,0表示不限制
"""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 构建高斯金字塔
gaussian_pyramid = build_gaussian_pyramid(gray, n_octaves=4)
# 构建DOG金字塔
dog_pyramid = build_dog_pyramid(gaussian_pyramid)
# 检测关键点
keypoints = detect_keypoints(dog_pyramid)
# 计算描述子
descriptors = compute_descriptors(gaussian_pyramid, keypoints)
return keypoints, descriptors
def build_gaussian_pyramid(image, n_octaves):
"""构建高斯金字塔"""
pyramid = []
current = image.copy()
for i in range(n_octaves):
pyramid.append(current)
current = cv2.pyrDown(current)
return pyramid
def build_dog_pyramid(gaussian_pyramid):
"""构建DOG金字塔"""
dog_pyramid = []
for i in range(len(gaussian_pyramid) - 1):
dog = cv2.subtract(gaussian_pyramid[i + 1], gaussian_pyramid[i])
dog_pyramid.append(dog)
return dog_pyramid
def detect_keypoints(dog_pyramid):
"""检测关键点"""
keypoints = []
for i in range(1, len(dog_pyramid) - 1):
prev = dog_pyramid[i - 1]
curr = dog_pyramid[i]
next = dog_pyramid[i + 1]
for y in range(1, curr.shape[0] - 1):
for x in range(1, curr.shape[1] - 1):
val = curr[y, x]
is_max = True
is_min = True
# 检查3x3x3邻域
for dy in range(-1, 2):
for dx in range(-1, 2):
# 检查相邻层
if val <= prev[y + dy, x + dx]: is_max = False
if val >= prev[y + dy, x + dx]: is_min = False
if val <= next[y + dy, x + dx]: is_max = False
if val >= next[y + dy, x + dx]: is_min = False
# 检查当前层
if dx != 0 or dy != 0:
if val <= curr[y + dy, x + dx]: is_max = False
if val >= curr[y + dy, x + dx]: is_min = False
if is_max or is_min:
kp = cv2.KeyPoint(x, y, 1.6 * (2 ** (i/3.0)))
keypoints.append(kp)
return keypoints
def compute_descriptors(gaussian_pyramid, keypoints):
"""计算描述子"""
descriptors = np.zeros((len(keypoints), 128), dtype=np.float32)
for i, kp in enumerate(keypoints):
img = gaussian_pyramid[kp.octave]
desc = descriptors[i]
# 计算梯度
mag, angle = cv2.cartToPolar(
cv2.Sobel(img, cv2.CV_32F, 1, 0),
cv2.Sobel(img, cv2.CV_32F, 0, 1)
)
# 计算描述子
for y in range(-4, 4):
for x in range(-4, 4):
px = int(kp.pt[0] + x)
py = int(kp.pt[1] + y)
if 0 <= px < img.shape[1] and 0 <= py < img.shape[0]:
m = mag.at<float>(py, px)
a = angle.at<float>(py, px)
# 将梯度方向量化为8个方向
bin = int(a * 8 / (2 * np.pi)) % 8
desc[bin] += m
# 归一化描述子
norm = np.linalg.norm(desc)
if norm > 0:
desc /= norm
return descriptors
4. SURF特征
4.1 基本原理
SURF(Speeded-Up Robust Features)就像是SIFT的"快速体检版",用积分图像和盒子滤波器加速计算。
核心思想:
H
(
x
,
y
)
=
D
x
x
(
x
,
y
)
D
y
y
(
x
,
y
)
−
(
D
x
y
(
x
,
y
)
)
2
H(x,y) = D_{xx}(x,y)D_{yy}(x,y) - (D_{xy}(x,y))^2
H(x,y)=Dxx(x,y)Dyy(x,y)−(Dxy(x,y))2
其中:
- D x x D_{xx} Dxx 是x方向二阶导
- D y y D_{yy} Dyy 是y方向二阶导
- D x y D_{xy} Dxy 是xy方向二阶导
4.2 手动实现
C++实现
void surf_features(const Mat& src, vector<KeyPoint>& keypoints,
Mat& descriptors, int nfeatures = 0) {
CV_Assert(!src.empty());
// 计算积分图
Mat integral;
integral(src, integral, CV_32F);
// 使用Hessian矩阵检测特征点
detectSurfFeatures(integral, keypoints);
// 计算描述子
computeSurfDescriptors(integral, keypoints, descriptors);
}
void detectSurfFeatures(const Mat& integral, vector<KeyPoint>& keypoints) {
keypoints.clear();
// 使用不同尺度的Hessian矩阵检测特征点
vector<float> scales = {1.2f, 1.6f, 2.0f, 2.4f, 2.8f};
for (float scale : scales) {
int size = (int)(scale * 9);
if (size % 2 == 0) size++;
// 计算Hessian矩阵的行列式
Mat det = Mat::zeros(integral.rows, integral.cols, CV_32F);
for (int y = size/2; y < integral.rows - size/2; y++) {
for (int x = size/2; x < integral.cols - size/2; x++) {
// 计算Dxx, Dyy, Dxy
float dxx = calculateHessian(integral, x, y, size, 0);
float dyy = calculateHessian(integral, x, y, size, 1);
float dxy = calculateHessian(integral, x, y, size, 2);
// 计算Hessian行列式
float hessian = dxx * dyy - 0.81f * dxy * dxy;
if (hessian > 0) {
det.at<float>(y, x) = hessian;
}
}
}
// 非极大值抑制
for (int y = size/2 + 1; y < det.rows - size/2 - 1; y++) {
for (int x = size/2 + 1; x < det.cols - size/2 - 1; x++) {
float val = det.at<float>(y, x);
if (val > 0) {
bool is_max = true;
for (int dy = -1; dy <= 1 && is_max; dy++) {
for (int dx = -1; dx <= 1 && is_max; dx++) {
if (dx == 0 && dy == 0) continue;
if (val <= det.at<float>(y + dy, x + dx)) {
is_max = false;
}
}
}
if (is_max) {
KeyPoint kp(x, y, size);
keypoints.push_back(kp);
}
}
}
}
}
}
float calculateHessian(const Mat& integral, int x, int y, int size, int type) {
int half = size / 2;
float response = 0;
switch (type) {
case 0: // Dxx
response = boxFilter(integral, x - half, y - half, size, half) -
2 * boxFilter(integral, x - half/2, y - half, half, half) +
boxFilter(integral, x, y - half, size, half);
break;
case 1: // Dyy
response = boxFilter(integral, x - half, y - half, size, size) -
2 * boxFilter(integral, x - half, y - half/2, size, half) +
boxFilter(integral, x - half, y, size, size);
break;
case 2: // Dxy
response = boxFilter(integral, x - half, y - half, size, size) +
boxFilter(integral, x, y, size, size) -
boxFilter(integral, x - half, y, size, size) -
boxFilter(integral, x, y - half, size, size);
break;
}
return response;
}
float boxFilter(const Mat& integral, int x, int y, int width, int height) {
int x1 = max(0, x);
int y1 = max(0, y);
int x2 = min(integral.cols - 1, x + width - 1);
int y2 = min(integral.rows - 1, y + height - 1);
return integral.at<float>(y2, x2) -
integral.at<float>(y2, x1) -
integral.at<float>(y1, x2) +
integral.at<float>(y1, x1);
}
void computeSurfDescriptors(const Mat& integral,
const vector<KeyPoint>& keypoints,
Mat& descriptors) {
descriptors.create(keypoints.size(), 64, CV_32F);
for (size_t i = 0; i < keypoints.size(); i++) {
const KeyPoint& kp = keypoints[i];
float* desc = descriptors.ptr<float>(i);
// 计算主方向
float angle = computeOrientation(integral, kp);
// 计算描述子
for (int y = -2; y < 2; y++) {
for (int x = -2; x < 2; x++) {
float dx = 0, dy = 0, abs_dx = 0, abs_dy = 0;
// 计算4x4子区域内的Haar小波响应
for (int sy = 0; sy < 5; sy++) {
for (int sx = 0; sx < 5; sx++) {
int px = kp.pt.x + x * 5 + sx;
int py = kp.pt.y + y * 5 + sy;
if (px >= 0 && px < integral.cols && py >= 0 && py < integral.rows) {
float haar_x = haarWavelet(integral, px, py, 2, 0);
float haar_y = haarWavelet(integral, px, py, 2, 1);
// 旋转到主方向
float rot_x = haar_x * cos(angle) + haar_y * sin(angle);
float rot_y = -haar_x * sin(angle) + haar_y * cos(angle);
dx += rot_x;
dy += rot_y;
abs_dx += abs(rot_x);
abs_dy += abs(rot_y);
}
}
}
// 存储描述子
int idx = (y + 2) * 16 + (x + 2) * 4;
desc[idx] = dx;
desc[idx + 1] = dy;
desc[idx + 2] = abs_dx;
desc[idx + 3] = abs_dy;
}
}
// 归一化描述子
float norm = 0;
for (int j = 0; j < 64; j++) {
norm += desc[j] * desc[j];
}
norm = sqrt(norm);
for (int j = 0; j < 64; j++) {
desc[j] /= norm;
}
}
}
float computeOrientation(const Mat& integral, const KeyPoint& kp) {
float angle = 0;
float max_response = 0;
// 在60度扇形区域内搜索主方向
for (float theta = 0; theta < 2 * CV_PI; theta += 0.1f) {
float response = 0;
for (int r = 6; r < 20; r += 2) {
int x = kp.pt.x + r * cos(theta);
int y = kp.pt.y + r * sin(theta);
if (x >= 0 && x < integral.cols && y >= 0 && y < integral.rows) {
response += haarWavelet(integral, x, y, 4, 0) +
haarWavelet(integral, x, y, 4, 1);
}
}
if (response > max_response) {
max_response = response;
angle = theta;
}
}
return angle;
}
float haarWavelet(const Mat& integral, int x, int y, int size, int type) {
float response = 0;
switch (type) {
case 0: // x方向
response = boxFilter(integral, x + size/2, y - size/2, size/2, size) -
boxFilter(integral, x - size/2, y - size/2, size/2, size);
break;
case 1: // y方向
response = boxFilter(integral, x - size/2, y + size/2, size, size/2) -
boxFilter(integral, x - size/2, y - size/2, size, size/2);
break;
}
return response;
}
Python实现
def surf_features_manual(image, n_features=0):
"""手动实现SURF特征提取
参数:
image: 输入图像
n_features: 期望的特征点数量,0表示不限制
"""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 计算积分图
integral = cv2.integral(gray.astype(np.float32))
# 检测特征点
keypoints = detect_surf_features(integral)
# 计算描述子
descriptors = compute_surf_descriptors(integral, keypoints)
return keypoints, descriptors
def detect_surf_features(integral):
"""检测SURF特征点"""
keypoints = []
# 使用不同尺度的Hessian矩阵检测特征点
scales = [1.2, 1.6, 2.0, 2.4, 2.8]
for scale in scales:
size = int(scale * 9)
if size % 2 == 0:
size += 1
# 计算Hessian矩阵的行列式
det = np.zeros((integral.shape[0], integral.shape[1]), dtype=np.float32)
for y in range(size//2, integral.shape[0] - size//2):
for x in range(size//2, integral.shape[1] - size//2):
# 计算Dxx, Dyy, Dxy
dxx = calculate_hessian(integral, x, y, size, 0)
dyy = calculate_hessian(integral, x, y, size, 1)
dxy = calculate_hessian(integral, x, y, size, 2)
# 计算Hessian行列式
hessian = dxx * dyy - 0.81 * dxy * dxy
if hessian > 0:
det[y, x] = hessian
# 非极大值抑制
for y in range(size//2 + 1, det.shape[0] - size//2 - 1):
for x in range(size//2 + 1, det.shape[1] - size//2 - 1):
val = det[y, x]
if val > 0:
is_max = True
for dy in range(-1, 2):
for dx in range(-1, 2):
if dx == 0 and dy == 0:
continue
if val <= det[y + dy, x + dx]:
is_max = False
break
if not is_max:
break
if is_max:
kp = cv2.KeyPoint(x, y, size)
keypoints.append(kp)
return keypoints
def calculate_hessian(integral, x, y, size, type):
"""计算Hessian矩阵的元素"""
half = size // 2
response = 0
if type == 0: # Dxx
response = box_filter(integral, x - half, y - half, size, half) - \
2 * box_filter(integral, x - half//2, y - half, half, half) + \
box_filter(integral, x, y - half, size, half)
elif type == 1: # Dyy
response = box_filter(integral, x - half, y - half, size, size) - \
2 * box_filter(integral, x - half, y - half//2, size, half) + \
box_filter(integral, x - half, y, size, size)
else: # Dxy
response = box_filter(integral, x - half, y - half, size, size) + \
box_filter(integral, x, y, size, size) - \
box_filter(integral, x - half, y, size, size) - \
box_filter(integral, x, y - half, size, size)
return response
def box_filter(integral, x, y, width, height):
"""计算积分图上的盒式滤波"""
x1 = max(0, x)
y1 = max(0, y)
x2 = min(integral.shape[1] - 1, x + width - 1)
y2 = min(integral.shape[0] - 1, y + height - 1)
return integral[y2, x2] - integral[y2, x1] - integral[y1, x2] + integral[y1, x1]
def compute_surf_descriptors(integral, keypoints):
"""计算SURF描述子"""
descriptors = np.zeros((len(keypoints), 64), dtype=np.float32)
for i, kp in enumerate(keypoints):
desc = descriptors[i]
# 计算主方向
angle = compute_orientation(integral, kp)
# 计算描述子
for y in range(-2, 2):
for x in range(-2, 2):
dx = dy = abs_dx = abs_dy = 0
# 计算4x4子区域内的Haar小波响应
for sy in range(5):
for sx in range(5):
px = int(kp.pt[0] + x * 5 + sx)
py = int(kp.pt[1] + y * 5 + sy)
if 0 <= px < integral.shape[1] and 0 <= py < integral.shape[0]:
haar_x = haar_wavelet(integral, px, py, 2, 0)
haar_y = haar_wavelet(integral, px, py, 2, 1)
# 旋转到主方向
rot_x = haar_x * np.cos(angle) + haar_y * np.sin(angle)
rot_y = -haar_x * np.sin(angle) + haar_y * np.cos(angle)
dx += rot_x
dy += rot_y
abs_dx += abs(rot_x)
abs_dy += abs(rot_y)
# 存储描述子
idx = (y + 2) * 16 + (x + 2) * 4
desc[idx:idx+4] = [dx, dy, abs_dx, abs_dy]
# 归一化描述子
norm = np.linalg.norm(desc)
if norm > 0:
desc /= norm
return descriptors
def compute_orientation(integral, kp):
"""计算特征点的主方向"""
angle = 0
max_response = 0
# 在60度扇形区域内搜索主方向
for theta in np.arange(0, 2 * np.pi, 0.1):
response = 0
for r in range(6, 20, 2):
x = int(kp.pt[0] + r * np.cos(theta))
y = int(kp.pt[1] + r * np.sin(theta))
if 0 <= x < integral.shape[1] and 0 <= y < integral.shape[0]:
response += haar_wavelet(integral, x, y, 4, 0) + \
haar_wavelet(integral, x, y, 4, 1)
if response > max_response:
max_response = response
angle = theta
return angle
def haar_wavelet(integral, x, y, size, type):
"""计算Haar小波响应"""
if type == 0: # x方向
return box_filter(integral, x + size//2, y - size//2, size//2, size) - \
box_filter(integral, x - size//2, y - size//2, size//2, size)
else: # y方向
return box_filter(integral, x - size//2, y + size//2, size, size//2) - \
box_filter(integral, x - size//2, y - size//2, size, size//2)
5. ORB特征
5.1 基本原理
ORB(Oriented FAST and Rotated BRIEF)就像是"经济实惠型体检",速度快、效果好、还不要钱!
主要组成:
-
FAST角点检测:
- 检测像素圆周上的强度变化
- 快速筛选候选点
-
BRIEF描述子:
- 二进制描述子
- 汉明距离匹配
5.2 手动实现
C++实现
void orb_features(const Mat& src, vector<KeyPoint>& keypoints,
Mat& descriptors, int nfeatures = 500) {
CV_Assert(!src.empty());
// FAST角点检测
detectFASTFeatures(src, keypoints, nfeatures);
// 计算方向
computeOrientation(src, keypoints);
// 计算rBRIEF描述子
computeORBDescriptors(src, keypoints, descriptors);
}
void detectFASTFeatures(const Mat& src, vector<KeyPoint>& keypoints,
int nfeatures) {
keypoints.clear();
// FAST角点检测参数
const int threshold = 20;
const int min_arc = 9;
// 检测角点
for (int y = 3; y < src.rows - 3; y++) {
for (int x = 3; x < src.cols - 3; x++) {
uchar center = src.at<uchar>(y, x);
int brighter = 0, darker = 0;
// 检查圆周上的像素
for (int i = 0; i < 16; i++) {
int dx = fast_circle[i][0];
int dy = fast_circle[i][1];
uchar pixel = src.at<uchar>(y + dy, x + dx);
if (pixel > center + threshold) brighter++;
else if (pixel < center - threshold) darker++;
}
// 判断是否为角点
if (brighter >= min_arc || darker >= min_arc) {
keypoints.push_back(KeyPoint(x, y, 1));
}
}
}
// 如果特征点太多,选择响应最强的nfeatures个
if (nfeatures > 0 && keypoints.size() > nfeatures) {
// 计算角点响应值
vector<pair<float, int>> responses;
for (size_t i = 0; i < keypoints.size(); i++) {
float response = calculateFASTResponse(src, keypoints[i]);
responses.push_back(make_pair(response, i));
}
// 按响应值排序
sort(responses.begin(), responses.end(), greater<pair<float, int>>());
// 选择前nfeatures个特征点
vector<KeyPoint> selected;
for (int i = 0; i < nfeatures; i++) {
selected.push_back(keypoints[responses[i].second]);
}
keypoints = selected;
}
}
float calculateFASTResponse(const Mat& src, const KeyPoint& kp) {
float response = 0;
uchar center = src.at<uchar>(kp.pt.y, kp.pt.x);
// 计算与中心点的差异
for (int i = 0; i < 16; i++) {
int dx = fast_circle[i][0];
int dy = fast_circle[i][1];
uchar pixel = src.at<uchar>(kp.pt.y + dy, kp.pt.x + dx);
response += abs(pixel - center);
}
return response;
}
void computeOrientation(const Mat& src, vector<KeyPoint>& keypoints) {
for (size_t i = 0; i < keypoints.size(); i++) {
KeyPoint& kp = keypoints[i];
float m01 = 0, m10 = 0;
// 计算质心
for (int y = -7; y <= 7; y++) {
for (int x = -7; x <= 7; x++) {
if (x*x + y*y <= 49) { // 圆形区域
int px = kp.pt.x + x;
int py = kp.pt.y + y;
if (px >= 0 && px < src.cols && py >= 0 && py < src.rows) {
float intensity = src.at<uchar>(py, px);
m10 += x * intensity;
m01 += y * intensity;
}
}
}
}
// 计算方向
kp.angle = atan2(m01, m10) * 180 / CV_PI;
if (kp.angle < 0) kp.angle += 360;
}
}
void computeORBDescriptors(const Mat& src,
const vector<KeyPoint>& keypoints,
Mat& descriptors) {
descriptors.create(keypoints.size(), 32, CV_8U);
for (size_t i = 0; i < keypoints.size(); i++) {
const KeyPoint& kp = keypoints[i];
uchar* desc = descriptors.ptr<uchar>(i);
// 计算描述子
for (int j = 0; j < 32; j++) {
int pattern[4][4] = {
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}
};
// 生成随机模式
for (int k = 0; k < 4; k++) {
for (int l = 0; l < 4; l++) {
pattern[k][l] = (rand() % 2) ? 1 : -1;
}
}
// 计算描述子的一位
float sum = 0;
for (int k = 0; k < 4; k++) {
for (int l = 0; l < 4; l++) {
int px = kp.pt.x + k - 2;
int py = kp.pt.y + l - 2;
if (px >= 0 && px < src.cols && py >= 0 && py < src.rows) {
sum += pattern[k][l] * src.at<uchar>(py, px);
}
}
}
desc[j] = (sum > 0) ? 1 : 0;
}
}
}
Python实现
def orb_features_manual(image, n_features=500):
"""手动实现ORB特征提取
参数:
image: 输入图像
n_features: 期望的特征点数量
"""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# FAST角点检测
keypoints = detect_fast_features(gray, n_features)
# 计算方向
keypoints = compute_orientation(gray, keypoints)
# 计算rBRIEF描述子
descriptors = compute_orb_descriptors(gray, keypoints)
return keypoints, descriptors
def detect_fast_features(image, n_features):
"""检测FAST角点"""
keypoints = []
# FAST角点检测参数
threshold = 20
min_arc = 9
# 检测角点
for y in range(3, image.shape[0] - 3):
for x in range(3, image.shape[1] - 3):
center = image[y, x]
brighter = darker = 0
# 检查圆周上的像素
for i in range(16):
dx = fast_circle[i][0]
dy = fast_circle[i][1]
pixel = image[y + dy, x + dx]
if pixel > center + threshold:
brighter += 1
elif pixel < center - threshold:
darker += 1
# 判断是否为角点
if brighter >= min_arc or darker >= min_arc:
keypoints.append(cv2.KeyPoint(x, y, 1))
# 如果特征点太多,选择响应最强的n_features个
if n_features > 0 and len(keypoints) > n_features:
# 计算角点响应值
responses = []
for i, kp in enumerate(keypoints):
response = calculate_fast_response(image, kp)
responses.append((response, i))
# 按响应值排序
responses.sort(reverse=True)
# 选择前n_features个特征点
selected = []
for i in range(n_features):
selected.append(keypoints[responses[i][1]])
keypoints = selected
return keypoints
def calculate_fast_response(image, kp):
"""计算FAST角点响应值"""
response = 0
center = image[int(kp.pt[1]), int(kp.pt[0])]
# 计算与中心点的差异
for i in range(16):
dx = fast_circle[i][0]
dy = fast_circle[i][1]
pixel = image[int(kp.pt[1] + dy), int(kp.pt[0] + dx)]
response += abs(pixel - center)
return response
def compute_orientation(image, keypoints):
"""计算特征点方向"""
for kp in keypoints:
m01 = m10 = 0
# 计算质心
for y in range(-7, 8):
for x in range(-7, 8):
if x*x + y*y <= 49: # 圆形区域
px = int(kp.pt[0] + x)
py = int(kp.pt[1] + y)
if 0 <= px < image.shape[1] and 0 <= py < image.shape[0]:
intensity = image[py, px]
m10 += x * intensity
m01 += y * intensity
# 计算方向
kp.angle = np.arctan2(m01, m10) * 180 / np.pi
if kp.angle < 0:
kp.angle += 360
return keypoints
def compute_orb_descriptors(image, keypoints):
"""计算ORB描述子"""
descriptors = np.zeros((len(keypoints), 32), dtype=np.uint8)
for i, kp in enumerate(keypoints):
desc = descriptors[i]
# 计算描述子
for j in range(32):
# 生成随机模式
pattern = np.random.choice([-1, 1], size=(4, 4))
# 计算描述子的一位
sum_val = 0
for k in range(4):
for l in range(4):
px = int(kp.pt[0] + k - 2)
py = int(kp.pt[1] + l - 2)
if 0 <= px < image.shape[1] and 0 <= py < image.shape[0]:
sum_val += pattern[k, l] * image[py, px]
desc[j] = 1 if sum_val > 0 else 0
return descriptors
6. 特征匹配
6.1 基本原理
特征匹配就像是"认亲",通过比较特征描述子来找到对应的特征点。
匹配策略:
-
暴力匹配:
- 遍历所有可能
- 计算距离最小值
-
快速近似匹配:
- 构建搜索树
- 快速查找最近邻
6.2 手动实现
C++实现
void feature_matching(const Mat& src1, const Mat& src2,
vector<DMatch>& matches,
const vector<KeyPoint>& keypoints1,
const vector<KeyPoint>& keypoints2,
const Mat& descriptors1,
const Mat& descriptors2) {
matches.clear();
// 暴力匹配
for (int i = 0; i < descriptors1.rows; i++) {
double minDist = DBL_MAX;
int minIdx = -1;
for (int j = 0; j < descriptors2.rows; j++) {
double dist = 0;
// 计算欧氏距离
for (int k = 0; k < descriptors1.cols; k++) {
double diff = descriptors1.at<float>(i,k) -
descriptors2.at<float>(j,k);
dist += diff * diff;
}
dist = sqrt(dist);
if (dist < minDist) {
minDist = dist;
minIdx = j;
}
}
if (minIdx >= 0) {
DMatch match;
match.queryIdx = i;
match.trainIdx = minIdx;
match.distance = minDist;
matches.push_back(match);
}
}
}
Python实现
def feature_matching_manual(descriptors1, descriptors2, threshold=0.7):
"""手动实现特征匹配
参数:
descriptors1: 第一幅图像的特征描述子
descriptors2: 第二幅图像的特征描述子
threshold: 匹配阈值
"""
matches = []
# 暴力匹配
for i in range(len(descriptors1)):
dist = np.linalg.norm(descriptors2 - descriptors1[i], axis=1)
idx1, idx2 = np.argsort(dist)[:2]
# 比率测试
if dist[idx1] < threshold * dist[idx2]:
matches.append(cv2.DMatch(i, idx1, dist[idx1]))
return matches
7. 代码实现与优化
7.1 性能优化技巧
- SIMD加速:
// 使用AVX2指令集加速特征计算
inline void compute_features_simd(const float* src, float* dst, int width) {
alignas(32) float buffer[8];
__m256 sum = _mm256_setzero_ps();
for (int x = 0; x < width; x += 8) {
__m256 data = _mm256_loadu_ps(src + x);
sum = _mm256_add_ps(sum, data);
}
_mm256_store_ps(buffer, sum);
*dst = buffer[0] + buffer[1] + buffer[2] + buffer[3] +
buffer[4] + buffer[5] + buffer[6] + buffer[7];
}
- OpenMP并行化:
#pragma omp parallel for collapse(2)
for (int y = 0; y < src.rows; y++) {
for (int x = 0; x < src.cols; x++) {
// 处理每个像素
}
}
- 内存优化:
// 使用连续内存访问
Mat temp = src.clone();
temp = temp.reshape(1, src.total());
8. 实验效果与应用
8.1 应用场景
-
图像配准:
- 医学图像对齐
- 遥感图像拼接
- 全景图像合成
-
目标识别:
- 人脸识别
- 物体检测
- 场景匹配
-
运动跟踪:
- 视频监控
- 手势识别
- 增强现实
8.2 注意事项
-
特征提取过程中的注意点:
- 选择合适的特征类型
- 考虑计算效率
- 注意特征的可区分性
-
算法选择建议:
- 根据应用场景选择
- 考虑实时性要求
- 权衡准确性和效率
总结
特征提取就像是给图像做"体检"!通过Harris角点检测、SIFT、SURF、ORB等"检查项目",我们可以发现图像中隐藏的"特征"。在实际应用中,需要根据具体场景选择合适的"检查方案",就像医生为每个病人制定个性化的体检计划一样。
记住:好的特征提取就像是一个经验丰富的"医生",既要发现关键特征,又要保持效率!🏥
参考资料
- Harris C, Stephens M. A combined corner and edge detector[C]. Alvey vision conference, 1988
- Lowe D G. Distinctive image features from scale-invariant keypoints[J]. IJCV, 2004
- Bay H, et al. SURF: Speeded Up Robust Features[C]. ECCV, 2006
- Rublee E, et al. ORB: An efficient alternative to SIFT or SURF[C]. ICCV, 2011
- OpenCV官方文档: https://docs.opencv.org/
- 更多资源: IP101项目主页