火山引擎-你的AI云

Python实现OMR答题卡数据提取的技术求助

阿华AIGC实验室

2026-4-13

Python实现OMR答题卡数据提取的技术求助

我目前正在做一个OMR数据提取项目，需要识别学生的答题卡，下面是我的代码片段：

import cv2
import numpy as np

# read image
path = './data/images/5.jpg'
img = cv2.imread(path)
h, w = img.shape[:2]
# resize image  
img = cv2.resize(img, (w//2, h//2))

img = img[0:h-15, 0:w-5]
# threshold on white color
lower=(225,225,225)
upper=(255,255,255)
thresh = cv2.inRange(img, lower, upper)
thresh = 255 - thresh



imgCanny = cv2.Canny(thresh, 10, 50)

# # get contours
contoursImage = img.copy()
firstOMRBoxImage = img.copy()
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cv2.drawContours(contoursImage, contours[0], -1, (0,255,0), 2)


def rectContour(contours):
    rectContours = []
    for i in contours:
        area = cv2.contourArea(i)
        if area > 50:
            peri = cv2.arcLength(i, True)
            approx = cv2.approxPolyDP(i, 0.02*peri, True)
            if (len(approx) == 4):
                rectContours.append(i)
    rectContours = sorted(rectContours, key=cv2.contourArea, reverse=True)
    firstOMRBox = getCornerPoints(rectContours[0])
    secondOMRBox = getCornerPoints(rectContours[1])
    thirdOMRBox = getCornerPoints(rectContours[3])
    fourthOMRBox = getCornerPoints(rectContours[2])
    rollNoPoints = getCornerPoints(rectContours[4])
    districtPoints = getCornerPoints(rectContours[5])
    nameAndDatePoints = getCornerPoints(rectContours[7])
    candidateSign = getCornerPoints(rectContours[8])
    invigilatorSign = getCornerPoints(rectContours[9])
    groupPoints = getCornerPoints(rectContours[10])
    classPoints = getCornerPoints(rectContours[12])
        
    if firstOMRBox.size != 0 and secondOMRBox.size != 0:
        cv2.drawContours(firstOMRBoxImage, firstOMRBox, -1, (0,255,0), 30)
        cv2.drawContours(firstOMRBoxImage, secondOMRBox, -1, (255,0,0), 30)
        cv2.drawContours(firstOMRBoxImage, thirdOMRBox, -1, (0,0,255), 30)
        cv2.drawContours(firstOMRBoxImage, fourthOMRBox, -1, (255,255,0), 30)
        cv2.drawContours(firstOMRBoxImage, rollNoPoints, -1, (0,255,255), 30)
        cv2.drawContours(firstOMRBoxImage, districtPoints, -1, (255,0,255), 30)
        cv2.drawContours(firstOMRBoxImage, nameAndDatePoints, -1, (255,255,255), 30)
        cv2.drawContours(firstOMRBoxImage, candidateSign, -1, (0,0,0), 30)
        cv2.drawContours(firstOMRBoxImage, invigilatorSign, -1, (255,255,255), 30)
        cv2.drawContours(firstOMRBoxImage, groupPoints, -1, (0,0,255), 30)
        cv2.drawContours(firstOMRBoxImage, classPoints, -1, (255,0,0), 30)
        firstOMRBox = reorder(firstOMRBox)
        secondOMRBox = reorder(secondOMRBox)
        
        
        # Get the width and height of the first OMR box
        # Calculate the width and height of the first OMR box
        width_omr = np.linalg.norm(firstOMRBox[0][0] - firstOMRBox[1][0])
        height_omr = np.linalg.norm(firstOMRBox[0][0] - firstOMRBox[2][0])

        # Use the original aspect ratio for the destination points
        pt1 = np.float32(firstOMRBox)
        pt2 = np.float32([[0,0],[width_omr,0],[0,height_omr],[width_omr,height_omr]])
        matrix = cv2.getPerspectiveTransform(pt1, pt2)
        imgWarpColoured = cv2.warpPerspective(img, matrix, (int(width_omr), int(height_omr)))
        
        
        # max_side = max(w, h)
        # pt1 = np.float32(firstOMRBox)
        # pt2 = np.float32([[0,0],[max_side,0],[0,max_side],[max_side,max_side]])
        # matrix = cv2.getPerspectiveTransform(pt1, pt2)
        # imgWarpColoured = cv2.warpPerspective(img, matrix, (max_side,max_side))
        
        cv2.imwrite('5Wrap_contour.png', imgWarpColoured)
        
        # Apply Threshhold
        # imgWarpGray = cv2.cvtColor(imgWarpColoured, cv2.COLOR_BGR2GRAY)
        # imgThresh = cv2.threshold(imgWarpGray, 200, 255, cv2.THRESH_BINARY_INV)[1]
        # cv2.imwrite('6biggest_thresh.png', imgThresh)
        # print(imgThresh.shape)
        # x1 = int(w * 0.2)  # Start cropping from 70% width
        # y1 = 0             # Start from the top
        # x2 = w             # End at full width (rightmost)
        # y2 = h             # Full height
        # imgThresh = imgThresh[y1:y2, x1:x2]
        # cv2.imwrite('7after_crop.png', imgThresh)
        
        
        # Apply Threshhold
        imgWarpGray = cv2.cvtColor(imgWarpColoured, cv2.COLOR_BGR2GRAY)
        imgThresh = cv2.threshold(imgWarpGray, 200, 255, cv2.THRESH_BINARY_INV)[1]
        cv2.imwrite('6biggest_thresh.png', imgThresh)
        print(imgThresh.shape)

        afterContourIMage = imgThresh.copy()
        grey = cv2.cvtColor(imgWarpColoured, cv2.COLOR_BGR2GRAY) 
        # Find contours
        contours, hierarchy = cv2.findContours(grey, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        print(len(contours))
        cv2.drawContours(afterContourIMage, contours, -1, (0,255,0), 10)
        cv2.imwrite('7after_contour.png', afterContourIMage)
        
        # grey_inverted = cv2.bitwise_not(grey)
        # cv2.imwrite('7grey_inverted.png', grey_inverted)
        
        cv2.threshold(grey, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU, imgThresh)
        cv2.imwrite('7after_thresh.png', imgThresh)


        aginAfterContourIMage = imgWarpColoured.copy()
        # Find contours
        contours, hierarchy = cv2.findContours(imgThresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        print(len(contours))
        cv2.drawContours(aginAfterContourIMage, contours, -1, (0,255,0), 2)
        cv2.imwrite('7after_contour2.png', aginAfterContourIMage)

        # Get the current dimensions of imgThresh
        thresh_h, thresh_w = imgThresh.shape

        # Now use the dimensions of imgThresh for cropping
        x1 = int(thresh_w * 0.2)  # Start cropping from 20% width
        y1 = 0                    # Start from the top
        x2 = thresh_w             # End at full width
        y2 = thresh_h             # Full height

        # Make sure our cropping coordinates are valid
        if x1 < thresh_w and y2 <= thresh_h:
            imgThresh = imgThresh[y1:y2, x1:x2]
            cv2.imwrite('7after_crop.png', imgThresh)
        else:
            print("Cropping coordinates are out of bounds!")
            # Use the uncropped version instead
            cv2.imwrite('7after_crop.png', imgThresh)
        
        
        
        
        
        
        # biggestThreshContoursImage = imgThresh.copy()
        # threshContours = cv2.findContours(biggestThreshContoursImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        # cv2.drawContours(biggestThreshContoursImage, threshContours[0], -1, (0,255,0), 1)
        # cv2.imwrite('biggest_thresh_contours.png', biggestThreshContoursImage)
        
        splitBoxes(imgThresh)
        
    return firstOMRBox

def getCornerPoints(cont):
    peri = cv2.arcLength(cont, True)
    approx = cv2.approxPolyDP(cont, 0.02*peri, True)
    return approx

def reorder(points):
    points = points.reshape((4,2))
    pointsNew = np.zeros((4,1,2), np.int32)
    add = points.sum(1)
    # print(points)
    # print(add)
    pointsNew[0] = points[np.argmin(add)]
    pointsNew[3] = points[np.argmax(add)]
    diff = np.diff(points, axis=1)
    pointsNew[1] = points[np.argmin(diff)]
    pointsNew[2] = points[np.argmax(diff)]
    # print(pointsNew)
    
    return pointsNew


def splitBoxes(img):
    h, w = img.shape[:2]

    # Make sure height is divisible by 25
    new_h = (h // 25) * 25
    img = img[:new_h, :]  # Crop height to nearest multiple of 25
    
    # Make sure width is divisible by 5
    new_w = (w // 5) * 5
    img = img[:, :new_w]  # Crop width to nearest multiple of 5

    rows = np.vsplit(img, 25)  # Split into 25 vertical parts

    cv2.imwrite('8Split_image.png', rows[0])  # Save first row for debugging
    
    boxes = []
    for i, r in enumerate(rows):
        cols = np.hsplit(r, 5)  # Now width is divisible by 5
        for j, box in enumerate(cols):
            boxes.append(box)
            # cv2.imwrite(f'Split_image_{i}_{j}.png', box)  # Save each box for debugging

    return boxes

# Load image
# img = cv2.imread("image.jpg")  # Replace with your image path
# splitBoxes(img)


# Example usage
rectContour(contours[0])

# save results
cv2.imwrite('1omr_sheet_thresh2.png',thresh)
cv2.imwrite('2omr_sheet_canny2.png',imgCanny)
cv2.imwrite('3contours2.png',contoursImage)
cv2.imwrite('4biggest_contour2.png',firstOMRBoxImage)
cv2.waitKey(0)
cv2.destroyAllWindows()

当前遇到的瓶颈

我想把答题卡转换成网格，把每个区块定义成二进制值（0代表白色，1代表黑色），但我刚接触这方面的技术，不知道具体该怎么实现。

我试过通过识别轮廓来解决，但没成功——我没法区分出那些圆形且内部完全涂黑的气泡，如果能解决这个问题，后面的步骤我应该能搞定。

处理后的轮廓效果如下：
![处理后的轮廓图1]
![处理后的轮廓图2]

给你的解决方案建议

看了你的代码和问题，其实你已经完成了最关键的步骤：定位答题卡区域、分割出单个气泡。接下来只需要针对每个气泡做判断即可，给你几个实用的思路：

1. 基于像素值的简单判断（最易实现）

你已经用splitBoxes把每个气泡分成了单独的小图，直接对每个小图计算平均灰度值或者黑色像素占比：

def isBubbleFilled(box_img, threshold=100, fill_ratio=0.6):
    # 计算平均灰度值，涂黑的气泡平均灰度会很低
    avg_gray = np.mean(box_img)
    if avg_gray < threshold:
        return 1  # 标记为选中
    
    # 或者计算黑色像素占比
    black_pixels = np.sum(box_img < threshold)
    total_pixels = box_img.shape[0] * box_img.shape[1]
    if black_pixels / total_pixels > fill_ratio:
        return 1
    return 0

在splitBoxes函数末尾调用这个方法，就能得到每个气泡的二进制标记了。

2. 优化轮廓识别，筛选圆形气泡

如果想通过轮廓来判断，可以先过滤掉非圆形的轮廓，再看内部填充情况：

def isCircleContour(contour):
    perimeter = cv2.arcLength(contour, True)
    area = cv2.contourArea(contour)
    if area == 0:
        return False
    # 圆形度：完美圆形是1，越接近1越圆
    circularity = 4 * np.pi * (area / (perimeter ** 2))
    return circularity > 0.7  # 可以根据实际情况调整阈值

def checkFilledBubble(box_img):
    contours, _ = cv2.findContours(box_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contours:
        if isCircleContour(cnt):
            # 计算轮廓内部的黑色像素占比
            mask = np.zeros_like(box_img)
            cv2.drawContours(mask, [cnt], -1, 255, -1)
            filled_pixels = np.sum(cv2.bitwise_and(box_img, mask) < 100)
            contour_area = cv2.contourArea(cnt)
            if filled_pixels / contour_area > 0.5:
                return 1
    return 0

3. 改进阈值处理，适配不同光照

你当前用了固定阈值200，建议换成OTSU自动阈值，它能根据图像自动计算最合适的分割阈值，避免光照影响：

# 替换原来的阈值处理代码
_, imgThresh = cv2.threshold(imgWarpGray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

这些方法都可以快速集成到你的现有代码里，先从第一种方法开始试，最简单高效，后续再根据实际效果调整优化~

备注：内容来源于stack exchange，提问作者Dipan Nama

火山引擎最新活动

方舟 Coding Plan

模型自由，工具不限，免费解锁 ArkClaw，7*24 小时在线的专属智能伙伴

一键部署 OpenClaw

分钟级部署，云服务器包月低至￥9.9，与 CodingPlan 组合购买仅需19.8元

Seedance2.0 体验中心上线

注册即享免费500万Tokens，抢先领略新一代AI视频技术跃迁

新用户特惠专场

大模型19元起，Al应用9.9元畅享，新人首购爆款尽享优惠