You need to enable JavaScript to run this app.
最新活动
大模型
产品
解决方案
定价
生态与合作
支持与服务
开发者
了解我们

Python自动化小游戏:Tesseract无法识别单个字符的问题求助

Python自动化小游戏:Tesseract无法识别单个字符的问题求助

我刚接触Python不久,一直用ChatGPT当导师,现在想自动化一个小游戏——需要识别屏幕上的单个字符(字母或数字),时机合适的时候按下对应按键。我已经改了三个版本的代码,可就是没法让程序识别出字符,真心希望有人能帮我推进一下!

相关图片说明

  • 游戏界面:显示了带有目标字符的小游戏场景
  • 裁剪区域:单独截取了包含目标字符的核心区域
  • 预处理后图像:经过灰度转换、对比度增强等处理后的字符图像

我的代码实现

import pyautogui
import pytesseract
from PIL import Image, ImageEnhance
import time
import re

def preprocess_image(image):
    """
    Preprocess the image to enhance OCR detection.

    Args:
        image (PIL.Image.Image): The input image.

    Returns:
        PIL.Image.Image: The preprocessed image.
    """
    # Convert to grayscale
    gray_image = image.convert("L")

    # Enhance contrast significantly
    enhancer = ImageEnhance.Contrast(gray_image)
    enhanced_image = enhancer.enhance(5.0)

    # Apply thresholding to retain only the most prominent white text
    threshold_image = enhanced_image.point(lambda p: p > 200 and 255)

    return threshold_image

def analyze_region(region, region_name):
    """
    Analyzes a specific region for characters.

    Args:
        region (tuple): The region to analyze (x, y, width, height).
        region_name (str): A name for the region for debugging purposes.

    Returns:
        str: Detected valid character, if any.
    """
    region_screenshot = pyautogui.screenshot(region=region)

    # Save the original scanned image for debugging
    region_screenshot.save(f"scanned_region_{region_name}_original.png")

    # Preprocess the image
    processed_image = preprocess_image(region_screenshot)

    # Save the processed image for debugging
    processed_image.save(f"scanned_region_{region_name}_processed.png")

    # Analyze the region for text
    config = '--psm 10 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    text = pytesseract.image_to_string(processed_image, config=config)
    print(f"Raw OCR output from {region_name}: {text.strip()}\nFiltered characters: {re.findall(r'[A-Z0-9]', text)}")  # Debugging output

    valid_characters = re.findall(r'[A-Z0-9]', text)
    return ''.join(valid_characters)

def find_color_on_screen(target_color):
    """
    Continuously scans the screen for a specific color.

    Args:
        target_color (tuple): RGB values of the target color (e.g., (255, 0, 0) for red).

    Outputs:
        Prints a message when the color is found and analyzes specific regions to identify a character.
    """
    print(f"Scanning for color: {target_color} (RGB)...")
    while True:
        screenshot = pyautogui.screenshot()
        width, height = screenshot.size

        pixels = screenshot.load()
        for y in range(height):
            for x in range(width):
                if pixels[x, y] == target_color:
                    print(f"Color {target_color} found at pixel ({x}, {y})!")

                    # Define regions for progress bar positions
                    TOP_BAR_REGION = (1010, 103, 530, 77)  # (x, y, width, height)
                    RIGHT_BAR_REGION = (1840, 120, 75, 695)  # (x, y, width, height)

                    for _ in range(10):  # Perform 10 checks within 2 seconds
                        top_bar_result = analyze_region(TOP_BAR_REGION, "top_bar")
                        right_bar_result = analyze_region(RIGHT_BAR_REGION, "right_bar")

                        if top_bar_result:
                            print(f"Detected character(s) in top bar: {top_bar_result}")
                            input("Press Enter to acknowledge and exit...")
                            return

                        if right_bar_result:
                            print(f"Detected character(s) in right bar: {right_bar_result}")
                            input("Press Enter to acknowledge and exit...")
                            return

                        time.sleep(0.2)  # Brief delay to ensure 10 checks within 2 seconds

                    print("No valid capital letters or numbers detected within the time frame.")
                    input("Press Enter to exit...")
                    return

# Example usage
if __name__ == "__main__":
    # Replace with the RGB value of the color you want to detect
    target_color = (0, 141, 146)  # 008D92 in HEX
    find_color_on_screen(target_color)

命令行输出结果

Raw OCR output from top_bar:
Filtered characters: []
Raw OCR output from right_bar:
Filtered characters: []
No valid capital letters or numbers detected within the time frame.
Press Enter to exit...

备注:内容来源于stack exchange,提问作者Christian Findninge

火山引擎 最新活动