OpenCV——特征匹配、标定、光流和 DNN




2021-10-17

blog_main_img

“工程深水区”的 OpenCV-Python:特征点匹配、单应性矩阵、相机标定、镜头去畸变、光流跟踪、DNN 推理和性能排查。

这些能力更适合解决下面这类问题:

  • 在画面里找到某张平面海报、票据或卡片
  • 把倾斜拍摄的平面目标矫正回来
  • 标定摄像头,修正镜头畸变
  • 跟踪相邻帧里的关键点运动
  • 用 ONNX 模型做轻量推理
  • 排查视觉流水线为什么慢

OpenCV Python 高阶技术封面

特征点匹配:让图片自己对上号

如果你要在大图里找一张小图,或者判断两张图片是不是拍到了同一个平面物体,特征点匹配很有用。

基本流程:

提取关键点和描述子
匹配描述子
过滤差匹配
估计单应性矩阵
把目标区域映射到大图

OpenCV 特征匹配和单应性矩阵

用 ORB 做一个完整例子:

from pathlib import Path

import cv2
import numpy as np


def locate_planar_object(template_path: str, scene_path: str, output_path: str) -> None:
    template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
    scene = cv2.imread(scene_path, cv2.IMREAD_GRAYSCALE)

    if template is None:
        raise FileNotFoundError(template_path)
    if scene is None:
        raise FileNotFoundError(scene_path)

    orb = cv2.ORB_create(nfeatures=2000)
    kp1, des1 = orb.detectAndCompute(template, None)
    kp2, des2 = orb.detectAndCompute(scene, None)

    if des1 is None or des2 is None:
        raise RuntimeError("not enough descriptors")

    matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
    pairs = matcher.knnMatch(des1, des2, k=2)

    good = []
    for pair in pairs:
        if len(pair) != 2:
            continue
        first, second = pair
        if first.distance < 0.75 * second.distance:
            good.append(first)

    if len(good) < 8:
        raise RuntimeError("not enough good matches")

    src_points = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
    dst_points = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

    matrix, mask = cv2.findHomography(src_points, dst_points, cv2.RANSAC, 5.0)
    if matrix is None:
        raise RuntimeError("homography failed")

    h, w = template.shape[:2]
    corners = np.float32([
        [0, 0],
        [w, 0],
        [w, h],
        [0, h],
    ]).reshape(-1, 1, 2)

    projected = cv2.perspectiveTransform(corners, matrix)

    scene_color = cv2.imread(scene_path)
    output = cv2.polylines(
        scene_color,
        [np.int32(projected)],
        isClosed=True,
        color=(0, 255, 0),
        thickness=3,
    )

    cv2.imwrite(output_path, output)


locate_planar_object("template.jpg", "scene.jpg", "matched.jpg")

这段代码里最关键的是 findHomography(..., cv2.RANSAC, ...)。真实图片里总会有错配点,RANSAC 能把明显不靠谱的点过滤掉,留下更可信的几何关系。

ORB、SIFT、AKAZE 怎么选

OpenCV 里常见特征有 ORB、SIFT、AKAZE。

ORB:

detector = cv2.ORB_create(nfeatures=2000)
norm = cv2.NORM_HAMMING

特点是快,描述子是二进制,适合对速度敏感的场景。

SIFT:

detector = cv2.SIFT_create()
norm = cv2.NORM_L2

对尺度、旋转更稳,代价通常更高。

AKAZE:

detector = cv2.AKAZE_create()
norm = cv2.NORM_HAMMING

在一些纹理不算丰富的场景里表现不错。

选择建议:

先用 ORB 做 baseline
匹配不稳再试 SIFT 或 AKAZE
纹理太少时别硬上特征匹配
平面目标更适合 homography
非刚体目标要换思路

单应性矩阵:把倾斜平面拉正

findHomography 不只是能定位平面目标,也能做透视矫正。

假设你已经有四个角点,可以把票据、屏幕、卡片拉成正视图:

import cv2
import numpy as np


def rectify_quad(image_path: str, points: list[tuple[float, float]], output_path: str) -> None:
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(image_path)

    src = np.float32(points)
    dst = np.float32([
        [0, 0],
        [800, 0],
        [800, 500],
        [0, 500],
    ])

    matrix = cv2.getPerspectiveTransform(src, dst)
    warped = cv2.warpPerspective(image, matrix, (800, 500))
    cv2.imwrite(output_path, warped)


rectify_quad(
    "receipt.jpg",
    [(120, 80), (720, 110), (760, 560), (90, 530)],
    "receipt_rectified.jpg",
)

这类操作常见于文档扫描、票据识别、屏幕内容提取。难点通常不是变换本身,而是如何稳定拿到四个角点。

相机标定:先认识镜头畸变

很多摄像头拍出来的图像边缘会弯,直线看起来像弧线。相机标定要估计两类东西:

camera matrix:焦距和主点
distortion coefficients:镜头畸变参数

标定常用棋盘格。OpenCV 提供 findChessboardCornerscornerSubPixcalibrateCameraundistort 这一整套工具。

OpenCV 相机标定和光流

from pathlib import Path

import cv2
import numpy as np


def calibrate_camera(image_dir: str, pattern_size: tuple[int, int]):
    cols, rows = pattern_size

    object_points_template = np.zeros((rows * cols, 3), np.float32)
    object_points_template[:, :2] = np.mgrid[0:cols, 0:rows].T.reshape(-1, 2)

    object_points = []
    image_points = []
    image_size = None

    for path in Path(image_dir).glob("*.jpg"):
        image = cv2.imread(str(path))
        if image is None:
            continue

        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image_size = gray.shape[::-1]

        ok, corners = cv2.findChessboardCorners(gray, pattern_size)
        if not ok:
            continue

        refined = cv2.cornerSubPix(
            gray,
            corners,
            winSize=(11, 11),
            zeroZone=(-1, -1),
            criteria=(
                cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,
                30,
                0.001,
            ),
        )

        object_points.append(object_points_template)
        image_points.append(refined)

    if not object_points or image_size is None:
        raise RuntimeError("no valid chessboard images")

    rms, camera_matrix, distortion, rvecs, tvecs = cv2.calibrateCamera(
        object_points,
        image_points,
        image_size,
        None,
        None,
    )

    return {
        "rms": rms,
        "camera_matrix": camera_matrix,
        "distortion": distortion,
    }

使用标定结果去畸变:

def undistort_image(image_path: str, output_path: str, camera_matrix, distortion) -> None:
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(image_path)

    h, w = image.shape[:2]
    new_matrix, roi = cv2.getOptimalNewCameraMatrix(
        camera_matrix,
        distortion,
        (w, h),
        1,
        (w, h),
    )

    undistorted = cv2.undistort(image, camera_matrix, distortion, None, new_matrix)

    x, y, rw, rh = roi
    undistorted = undistorted[y:y + rh, x:x + rw]

    cv2.imwrite(output_path, undistorted)

标定质量很依赖输入图片。棋盘格要覆盖不同位置和角度,角点检测失败的图片不要硬塞进去。

光流:跟踪相邻帧里的运动

光流描述相邻帧中像素或特征点的运动。常见两类:

稀疏光流:跟踪少量关键点
稠密光流:估计整张图的运动场

稀疏 Lucas-Kanade 光流:

import cv2
import numpy as np


cap = cv2.VideoCapture("input.mp4")
ok, first = cap.read()
if not ok:
    raise RuntimeError("cannot read first frame")

prev_gray = cv2.cvtColor(first, cv2.COLOR_BGR2GRAY)
points = cv2.goodFeaturesToTrack(
    prev_gray,
    maxCorners=200,
    qualityLevel=0.01,
    minDistance=8,
)

track_layer = np.zeros_like(first)

while True:
    ok, frame = cap.read()
    if not ok:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    next_points, status, error = cv2.calcOpticalFlowPyrLK(
        prev_gray,
        gray,
        points,
        None,
    )

    if next_points is None:
        break

    good_new = next_points[status == 1]
    good_old = points[status == 1]

    for new, old in zip(good_new, good_old):
        x1, y1 = new.ravel()
        x0, y0 = old.ravel()
        cv2.line(track_layer, (int(x0), int(y0)), (int(x1), int(y1)), (0, 255, 0), 2)
        cv2.circle(frame, (int(x1), int(y1)), 3, (0, 255, 0), -1)

    output = cv2.add(frame, track_layer)
    cv2.imshow("sparse flow", output)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    prev_gray = gray
    points = good_new.reshape(-1, 1, 2)

cap.release()
cv2.destroyAllWindows()

稀疏光流适合跟踪角点、纹理点、运动轨迹。点丢失后可以周期性重新检测关键点。

稠密光流:整张图都动起来

Farneback 光流会给每个像素估计一个运动向量。

import cv2
import numpy as np


def flow_to_bgr(flow):
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])

    hsv = np.zeros((flow.shape[0], flow.shape[1], 3), dtype=np.uint8)
    hsv[..., 0] = angle * 180 / np.pi / 2
    hsv[..., 1] = 255
    hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)

    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)


cap = cv2.VideoCapture("input.mp4")
ok, first = cap.read()
if not ok:
    raise RuntimeError("cannot read first frame")

prev_gray = cv2.cvtColor(first, cv2.COLOR_BGR2GRAY)

while True:
    ok, frame = cap.read()
    if not ok:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(
        prev_gray,
        gray,
        None,
        pyr_scale=0.5,
        levels=3,
        winsize=15,
        iterations=3,
        poly_n=5,
        poly_sigma=1.2,
        flags=0,
    )

    flow_view = flow_to_bgr(flow)
    cv2.imshow("dense flow", flow_view)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    prev_gray = gray

cap.release()
cv2.destroyAllWindows()

稠密光流更重,但能看到整体运动趋势,适合运动分析、背景变化观察、画面稳定前的运动估计。

跟踪器:检测一次,后面跟着走

如果每一帧都跑重模型太贵,可以先检测一次目标,再用 tracker 跟踪。

import cv2


cap = cv2.VideoCapture("input.mp4")
ok, frame = cap.read()
if not ok:
    raise RuntimeError("cannot read first frame")

roi = cv2.selectROI("select target", frame, fromCenter=False, showCrosshair=True)
cv2.destroyWindow("select target")

tracker = cv2.TrackerCSRT_create()
tracker.init(frame, roi)

while True:
    ok, frame = cap.read()
    if not ok:
        break

    tracked, box = tracker.update(frame)

    if tracked:
        x, y, w, h = [int(v) for v in box]
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
    else:
        cv2.putText(
            frame,
            "lost",
            (40, 60),
            cv2.FONT_HERSHEY_SIMPLEX,
            1.0,
            (0, 0, 255),
            2,
        )

    cv2.imshow("tracker", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()

TrackerCSRT_create 通常比较稳,但速度可能不是最轻。不同 OpenCV 安装包和版本里 tracker API 位置可能略有差异,如果报属性不存在,可以检查是否安装了 contrib 包。

OpenCV DNN:跑一个 ONNX 模型

OpenCV 的 dnn 模块可以加载 ONNX、Caffe、TensorFlow 等格式。它适合做轻量推理或模型部署前验证。

OpenCV DNN 和性能优化

一个通用 ONNX 分类模型推理骨架:

import cv2
import numpy as np


def classify_onnx(image_path: str, model_path: str) -> np.ndarray:
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(image_path)

    net = cv2.dnn.readNetFromONNX(model_path)

    blob = cv2.dnn.blobFromImage(
        image,
        scalefactor=1.0 / 255.0,
        size=(224, 224),
        mean=(0.485, 0.456, 0.406),
        swapRB=True,
        crop=False,
    )

    net.setInput(blob)
    output = net.forward()
    return output

注意 swapRB=True。OpenCV 读图是 BGR,而很多模型训练时使用 RGB。这个参数很容易决定结果是否正常。

DNN 检测后处理:NMS 很关键

目标检测模型通常会输出很多候选框,需要非极大值抑制。

import cv2


boxes = [
    [100, 80, 120, 160],
    [108, 86, 118, 152],
    [360, 200, 90, 120],
]
scores = [0.92, 0.85, 0.78]

indices = cv2.dnn.NMSBoxes(
    bboxes=boxes,
    scores=scores,
    score_threshold=0.5,
    nms_threshold=0.4,
)

for index in indices:
    i = int(index)
    x, y, w, h = boxes[i]
    print(x, y, w, h, scores[i])

不同模型输出格式差异很大。后处理一定要先看清楚模型输出维度、坐标格式、置信度位置和类别位置。

性能排查:先少做,再做快

OpenCV 性能优化的顺序别搞反。

先问这些问题:

  • 输入图能不能提前缩小
  • 是否每帧都必须跑完整流程
  • 中间结果有没有重复计算
  • 是否发生了过多复制
  • Python 循环能不能换成 OpenCV 或 NumPy 操作
  • 模型推理能否批处理或降频

一个常见坏例子:逐像素 Python 循环。

for y in range(image.shape[0]):
    for x in range(image.shape[1]):
        if image[y, x, 1] > 120:
            mask[y, x] = 255

改成 NumPy:

mask = (image[:, :, 1] > 120).astype("uint8") * 255

能交给 OpenCV 的就交给 OpenCV:

mask = cv2.inRange(image, (0, 120, 0), (255, 255, 255))

这类改法比微调几个参数更有效。

UMat 和硬件后端:别盲目开

OpenCV 有 UMat、OpenCL、DNN backend/target 等能力,但不是打开就一定更快。

DNN 可以设置后端:

net = cv2.dnn.readNetFromONNX("model.onnx")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

如果环境支持其他后端,也可以按部署环境尝试。但要用真实输入测端到端耗时,不要只看单算子。

有些小图、小操作上,数据搬运成本可能比计算更贵。

一个更像工程的 pipeline 写法

把处理步骤封装成类,后面更容易插拔。

from dataclasses import dataclass

import cv2
import numpy as np


@dataclass
class MatchResult:
    ok: bool
    polygon: np.ndarray | None
    matches: int


class PlanarMatcher:
    def __init__(self, template_path: str):
        self.template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
        if self.template is None:
            raise FileNotFoundError(template_path)

        self.detector = cv2.ORB_create(nfeatures=2000)
        self.kp_template, self.des_template = self.detector.detectAndCompute(
            self.template,
            None,
        )
        self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)

    def match(self, frame_bgr) -> MatchResult:
        gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
        kp_scene, des_scene = self.detector.detectAndCompute(gray, None)

        if self.des_template is None or des_scene is None:
            return MatchResult(False, None, 0)

        pairs = self.matcher.knnMatch(self.des_template, des_scene, k=2)
        good = []
        for pair in pairs:
            if len(pair) != 2:
                continue
            first, second = pair
            if first.distance < 0.75 * second.distance:
                good.append(first)

        if len(good) < 8:
            return MatchResult(False, None, len(good))

        src = np.float32([self.kp_template[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
        dst = np.float32([kp_scene[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

        matrix, _ = cv2.findHomography(src, dst, cv2.RANSAC, 5.0)
        if matrix is None:
            return MatchResult(False, None, len(good))

        h, w = self.template.shape[:2]
        corners = np.float32([[0, 0], [w, 0], [w, h], [0, h]]).reshape(-1, 1, 2)
        polygon = cv2.perspectiveTransform(corners, matrix)

        return MatchResult(True, polygon, len(good))

使用:

matcher = PlanarMatcher("template.jpg")
frame = cv2.imread("scene.jpg")
result = matcher.match(frame)

if result.ok and result.polygon is not None:
    cv2.polylines(frame, [np.int32(result.polygon)], True, (0, 255, 0), 3)

cv2.imwrite("pipeline_result.jpg", frame)

这种写法比一大段脚本更适合放到服务或批处理里。

调试高阶视觉任务的方式

别只看最终输出。把关键中间结果保存出来:

cv2.imwrite("debug_keypoints.jpg", keypoint_view)
cv2.imwrite("debug_matches.jpg", match_view)
cv2.imwrite("debug_mask.jpg", mask)
cv2.imwrite("debug_flow.jpg", flow_view)

特征匹配要看匹配线。

相机标定要看角点是否找准。

光流要看点是否漂移。

DNN 要看预处理后的输入尺寸、通道顺序和归一化方式。

视觉问题很多时候不是算法错,而是输入没处理成算法期待的样子。