Python实现OMR答题卡数据提取的技术求助
Python实现OMR答题卡数据提取的技术求助
我目前正在做一个OMR数据提取项目,需要识别学生的答题卡,下面是我的代码片段:
import cv2 import numpy as np # read image path = './data/images/5.jpg' img = cv2.imread(path) h, w = img.shape[:2] # resize image img = cv2.resize(img, (w//2, h//2)) img = img[0:h-15, 0:w-5] # threshold on white color lower=(225,225,225) upper=(255,255,255) thresh = cv2.inRange(img, lower, upper) thresh = 255 - thresh imgCanny = cv2.Canny(thresh, 10, 50) # # get contours contoursImage = img.copy() firstOMRBoxImage = img.copy() contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cv2.drawContours(contoursImage, contours[0], -1, (0,255,0), 2) def rectContour(contours): rectContours = [] for i in contours: area = cv2.contourArea(i) if area > 50: peri = cv2.arcLength(i, True) approx = cv2.approxPolyDP(i, 0.02*peri, True) if (len(approx) == 4): rectContours.append(i) rectContours = sorted(rectContours, key=cv2.contourArea, reverse=True) firstOMRBox = getCornerPoints(rectContours[0]) secondOMRBox = getCornerPoints(rectContours[1]) thirdOMRBox = getCornerPoints(rectContours[3]) fourthOMRBox = getCornerPoints(rectContours[2]) rollNoPoints = getCornerPoints(rectContours[4]) districtPoints = getCornerPoints(rectContours[5]) nameAndDatePoints = getCornerPoints(rectContours[7]) candidateSign = getCornerPoints(rectContours[8]) invigilatorSign = getCornerPoints(rectContours[9]) groupPoints = getCornerPoints(rectContours[10]) classPoints = getCornerPoints(rectContours[12]) if firstOMRBox.size != 0 and secondOMRBox.size != 0: cv2.drawContours(firstOMRBoxImage, firstOMRBox, -1, (0,255,0), 30) cv2.drawContours(firstOMRBoxImage, secondOMRBox, -1, (255,0,0), 30) cv2.drawContours(firstOMRBoxImage, thirdOMRBox, -1, (0,0,255), 30) cv2.drawContours(firstOMRBoxImage, fourthOMRBox, -1, (255,255,0), 30) cv2.drawContours(firstOMRBoxImage, rollNoPoints, -1, (0,255,255), 30) cv2.drawContours(firstOMRBoxImage, districtPoints, -1, (255,0,255), 30) cv2.drawContours(firstOMRBoxImage, nameAndDatePoints, -1, (255,255,255), 30) cv2.drawContours(firstOMRBoxImage, candidateSign, -1, (0,0,0), 30) cv2.drawContours(firstOMRBoxImage, invigilatorSign, -1, (255,255,255), 30) cv2.drawContours(firstOMRBoxImage, groupPoints, -1, (0,0,255), 30) cv2.drawContours(firstOMRBoxImage, classPoints, -1, (255,0,0), 30) firstOMRBox = reorder(firstOMRBox) secondOMRBox = reorder(secondOMRBox) # Get the width and height of the first OMR box # Calculate the width and height of the first OMR box width_omr = np.linalg.norm(firstOMRBox[0][0] - firstOMRBox[1][0]) height_omr = np.linalg.norm(firstOMRBox[0][0] - firstOMRBox[2][0]) # Use the original aspect ratio for the destination points pt1 = np.float32(firstOMRBox) pt2 = np.float32([[0,0],[width_omr,0],[0,height_omr],[width_omr,height_omr]]) matrix = cv2.getPerspectiveTransform(pt1, pt2) imgWarpColoured = cv2.warpPerspective(img, matrix, (int(width_omr), int(height_omr))) # max_side = max(w, h) # pt1 = np.float32(firstOMRBox) # pt2 = np.float32([[0,0],[max_side,0],[0,max_side],[max_side,max_side]]) # matrix = cv2.getPerspectiveTransform(pt1, pt2) # imgWarpColoured = cv2.warpPerspective(img, matrix, (max_side,max_side)) cv2.imwrite('5Wrap_contour.png', imgWarpColoured) # Apply Threshhold # imgWarpGray = cv2.cvtColor(imgWarpColoured, cv2.COLOR_BGR2GRAY) # imgThresh = cv2.threshold(imgWarpGray, 200, 255, cv2.THRESH_BINARY_INV)[1] # cv2.imwrite('6biggest_thresh.png', imgThresh) # print(imgThresh.shape) # x1 = int(w * 0.2) # Start cropping from 70% width # y1 = 0 # Start from the top # x2 = w # End at full width (rightmost) # y2 = h # Full height # imgThresh = imgThresh[y1:y2, x1:x2] # cv2.imwrite('7after_crop.png', imgThresh) # Apply Threshhold imgWarpGray = cv2.cvtColor(imgWarpColoured, cv2.COLOR_BGR2GRAY) imgThresh = cv2.threshold(imgWarpGray, 200, 255, cv2.THRESH_BINARY_INV)[1] cv2.imwrite('6biggest_thresh.png', imgThresh) print(imgThresh.shape) afterContourIMage = imgThresh.copy() grey = cv2.cvtColor(imgWarpColoured, cv2.COLOR_BGR2GRAY) # Find contours contours, hierarchy = cv2.findContours(grey, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) print(len(contours)) cv2.drawContours(afterContourIMage, contours, -1, (0,255,0), 10) cv2.imwrite('7after_contour.png', afterContourIMage) # grey_inverted = cv2.bitwise_not(grey) # cv2.imwrite('7grey_inverted.png', grey_inverted) cv2.threshold(grey, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU, imgThresh) cv2.imwrite('7after_thresh.png', imgThresh) aginAfterContourIMage = imgWarpColoured.copy() # Find contours contours, hierarchy = cv2.findContours(imgThresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) print(len(contours)) cv2.drawContours(aginAfterContourIMage, contours, -1, (0,255,0), 2) cv2.imwrite('7after_contour2.png', aginAfterContourIMage) # Get the current dimensions of imgThresh thresh_h, thresh_w = imgThresh.shape # Now use the dimensions of imgThresh for cropping x1 = int(thresh_w * 0.2) # Start cropping from 20% width y1 = 0 # Start from the top x2 = thresh_w # End at full width y2 = thresh_h # Full height # Make sure our cropping coordinates are valid if x1 < thresh_w and y2 <= thresh_h: imgThresh = imgThresh[y1:y2, x1:x2] cv2.imwrite('7after_crop.png', imgThresh) else: print("Cropping coordinates are out of bounds!") # Use the uncropped version instead cv2.imwrite('7after_crop.png', imgThresh) # biggestThreshContoursImage = imgThresh.copy() # threshContours = cv2.findContours(biggestThreshContoursImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # cv2.drawContours(biggestThreshContoursImage, threshContours[0], -1, (0,255,0), 1) # cv2.imwrite('biggest_thresh_contours.png', biggestThreshContoursImage) splitBoxes(imgThresh) return firstOMRBox def getCornerPoints(cont): peri = cv2.arcLength(cont, True) approx = cv2.approxPolyDP(cont, 0.02*peri, True) return approx def reorder(points): points = points.reshape((4,2)) pointsNew = np.zeros((4,1,2), np.int32) add = points.sum(1) # print(points) # print(add) pointsNew[0] = points[np.argmin(add)] pointsNew[3] = points[np.argmax(add)] diff = np.diff(points, axis=1) pointsNew[1] = points[np.argmin(diff)] pointsNew[2] = points[np.argmax(diff)] # print(pointsNew) return pointsNew def splitBoxes(img): h, w = img.shape[:2] # Make sure height is divisible by 25 new_h = (h // 25) * 25 img = img[:new_h, :] # Crop height to nearest multiple of 25 # Make sure width is divisible by 5 new_w = (w // 5) * 5 img = img[:, :new_w] # Crop width to nearest multiple of 5 rows = np.vsplit(img, 25) # Split into 25 vertical parts cv2.imwrite('8Split_image.png', rows[0]) # Save first row for debugging boxes = [] for i, r in enumerate(rows): cols = np.hsplit(r, 5) # Now width is divisible by 5 for j, box in enumerate(cols): boxes.append(box) # cv2.imwrite(f'Split_image_{i}_{j}.png', box) # Save each box for debugging return boxes # Load image # img = cv2.imread("image.jpg") # Replace with your image path # splitBoxes(img) # Example usage rectContour(contours[0]) # save results cv2.imwrite('1omr_sheet_thresh2.png',thresh) cv2.imwrite('2omr_sheet_canny2.png',imgCanny) cv2.imwrite('3contours2.png',contoursImage) cv2.imwrite('4biggest_contour2.png',firstOMRBoxImage) cv2.waitKey(0) cv2.destroyAllWindows()
当前遇到的瓶颈
我想把答题卡转换成网格,把每个区块定义成二进制值(0代表白色,1代表黑色),但我刚接触这方面的技术,不知道具体该怎么实现。
我试过通过识别轮廓来解决,但没成功——我没法区分出那些圆形且内部完全涂黑的气泡,如果能解决这个问题,后面的步骤我应该能搞定。
处理后的轮廓效果如下:
![处理后的轮廓图1]
![处理后的轮廓图2]
给你的解决方案建议
看了你的代码和问题,其实你已经完成了最关键的步骤:定位答题卡区域、分割出单个气泡。接下来只需要针对每个气泡做判断即可,给你几个实用的思路:
1. 基于像素值的简单判断(最易实现)
你已经用splitBoxes把每个气泡分成了单独的小图,直接对每个小图计算平均灰度值或者黑色像素占比:
def isBubbleFilled(box_img, threshold=100, fill_ratio=0.6): # 计算平均灰度值,涂黑的气泡平均灰度会很低 avg_gray = np.mean(box_img) if avg_gray < threshold: return 1 # 标记为选中 # 或者计算黑色像素占比 black_pixels = np.sum(box_img < threshold) total_pixels = box_img.shape[0] * box_img.shape[1] if black_pixels / total_pixels > fill_ratio: return 1 return 0
在splitBoxes函数末尾调用这个方法,就能得到每个气泡的二进制标记了。
2. 优化轮廓识别,筛选圆形气泡
如果想通过轮廓来判断,可以先过滤掉非圆形的轮廓,再看内部填充情况:
def isCircleContour(contour): perimeter = cv2.arcLength(contour, True) area = cv2.contourArea(contour) if area == 0: return False # 圆形度:完美圆形是1,越接近1越圆 circularity = 4 * np.pi * (area / (perimeter ** 2)) return circularity > 0.7 # 可以根据实际情况调整阈值 def checkFilledBubble(box_img): contours, _ = cv2.findContours(box_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for cnt in contours: if isCircleContour(cnt): # 计算轮廓内部的黑色像素占比 mask = np.zeros_like(box_img) cv2.drawContours(mask, [cnt], -1, 255, -1) filled_pixels = np.sum(cv2.bitwise_and(box_img, mask) < 100) contour_area = cv2.contourArea(cnt) if filled_pixels / contour_area > 0.5: return 1 return 0
3. 改进阈值处理,适配不同光照
你当前用了固定阈值200,建议换成OTSU自动阈值,它能根据图像自动计算最合适的分割阈值,避免光照影响:
# 替换原来的阈值处理代码 _, imgThresh = cv2.threshold(imgWarpGray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
这些方法都可以快速集成到你的现有代码里,先从第一种方法开始试,最简单高效,后续再根据实际效果调整优化~
备注:内容来源于stack exchange,提问作者Dipan Nama




