如何用mbedtls实现大文件分块及递归哈希？

阿华AIGC实验室

2026-5-14

用mbedtls实现大文件分块哈希与递归哈希

嘿，我来帮你搞定这两个问题！先看你给出的代码，里面有几个关键的错误，我先帮你梳理清楚，再给出正确的实现方案。

一、先纠正你代码里的问题

你当前的代码逻辑存在明显偏差：

错误地先对每个读取的块单独计算哈希，再把块哈希重复两次更新到上下文，这相当于计算的是「所有块哈希的哈希」，而非整个文件的哈希。正确做法是直接把读取到的原始块数据更新到SHA256上下文，不需要单独对块哈希。
缺少SHA256上下文的初始化与销毁步骤，会导致未定义行为。
没有处理文件读取的实际字节数，默认每次读满缓冲区，但文件末尾的块往往是不满的。

二、大文件分块哈希的正确实现

分块哈希的核心是利用mbedtls的增量哈希接口：用mbedtls_sha256_init初始化上下文，循环用mbedtls_sha256_update_ret喂入读取的原始数据，最后用mbedtls_sha256_finish_ret得到最终哈希。

示例代码

#include <stdio.h>
#include <mbedtls/sha256.h>

#define BUFFER_SIZE 4096  // 选用4096字节缓冲区，适配多数系统页大小

int calculate_file_sha256(const char *file_path, unsigned char hash[32]) {
    mbedtls_sha256_context ctx;
    FILE *hFile = fopen(file_path, "rb");
    if (!hFile) {
        fprintf(stderr, "Failed to open file: %s\n", file_path);
        return -1;
    }

    // 初始化SHA256上下文
    mbedtls_sha256_init(&ctx);
    // 启动哈希计算（0表示不输出逆序哈希）
    if (mbedtls_sha256_starts_ret(&ctx, 0) != 0) {
        fclose(hFile);
        mbedtls_sha256_free(&ctx);
        fprintf(stderr, "Failed to start SHA256\n");
        return -1;
    }

    unsigned char binBuffer[BUFFER_SIZE];
    size_t bytes_read;
    // 循环读取文件块
    while ((bytes_read = fread(binBuffer, 1, BUFFER_SIZE, hFile)) > 0) {
        // 将实际读取的字节数据更新到哈希上下文
        if (mbedtls_sha256_update_ret(&ctx, binBuffer, bytes_read) != 0) {
            fclose(hFile);
            mbedtls_sha256_free(&ctx);
            fprintf(stderr, "Failed to update SHA256\n");
            return -1;
        }
    }

    // 检查文件读取是否出错
    if (ferror(hFile)) {
        fclose(hFile);
        mbedtls_sha256_free(&ctx);
        fprintf(stderr, "Error reading file\n");
        return -1;
    }

    // 完成哈希计算，输出最终结果
    if (mbedtls_sha256_finish_ret(&ctx, hash) != 0) {
        fclose(hFile);
        mbedtls_sha256_free(&ctx);
        fprintf(stderr, "Failed to finish SHA256\n");
        return -1;
    }

    // 清理资源
    mbedtls_sha256_free(&ctx);
    fclose(hFile);
    return 0;
}

三、递归哈希的实现

递归哈希通常指对**目录下所有文件（含子目录）**递归计算哈希，常见需求分两种：一是为每个文件单独计算哈希并记录；二是将所有文件的哈希结合路径信息，合并计算一个目录的总哈希。下面给出第二种需求的实现方案：

实现思路

遍历目标目录，区分文件与子目录
对于普通文件：计算其哈希，将**文件相对路径（避免绝对路径变化影响哈希）**与哈希值按固定格式拼接，更新到目录总哈希的上下文
对于子目录：递归调用遍历函数，覆盖子目录下所有内容
遍历完成后，生成目录的总哈希

示例代码（适配POSIX系统，Windows可替换为`FindFirstFile/FindNextFile`）

#include <dirent.h>
#include <string.h>
#include <sys/stat.h>
#include <stdlib.h>

int calculate_dir_sha256(const char *dir_path, mbedtls_sha256_context *global_ctx) {
    DIR *dir = opendir(dir_path);
    if (!dir) {
        fprintf(stderr, "Failed to open directory: %s\n", dir_path);
        return -1;
    }

    struct dirent *entry;
    while ((entry = readdir(dir)) != NULL) {
        // 跳过当前目录与上级目录
        if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
            continue;
        }

        // 拼接文件/子目录完整路径
        char full_path[1024];
        snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, entry->d_name);

        struct stat stat_buf;
        if (stat(full_path, &stat_buf) == -1) {
            fprintf(stderr, "Failed to stat file: %s\n", full_path);
            continue;
        }

        if (S_ISDIR(stat_buf.st_mode)) {
            // 递归处理子目录
            if (calculate_dir_sha256(full_path, global_ctx) != 0) {
                closedir(dir);
                return -1;
            }
        } else if (S_ISREG(stat_buf.st_mode)) {
            // 计算单个文件的哈希
            unsigned char file_hash[32];
            if (calculate_file_sha256(full_path, file_hash) != 0) {
                continue;
            }

            // 准备更新全局哈希的数据：相对路径 + 文件哈希
            size_t path_len = strlen(entry->d_name);
            unsigned char *data = malloc(path_len + 32);
            if (!data) {
                fprintf(stderr, "Malloc failed\n");
                continue;
            }

            memcpy(data, entry->d_name, path_len);
            memcpy(data + path_len, file_hash, 32);

            // 更新目录总哈希上下文
            if (mbedtls_sha256_update_ret(global_ctx, data, path_len + 32) != 0) {
                fprintf(stderr, "Failed to update dir SHA256\n");
                free(data);
                closedir(dir);
                return -1;
            }
            free(data);
        }
    }

    closedir(dir);
    return 0;
}

// 调用示例
int main() {
    const char *target_dir = "./test_dir";
    unsigned char dir_hash[32];
    mbedtls_sha256_context ctx;

    mbedtls_sha256_init(&ctx);
    mbedtls_sha256_starts_ret(&ctx, 0);

    if (calculate_dir_sha256(target_dir, &ctx) != 0) {
        mbedtls_sha256_free(&ctx);
        return -1;
    }

    mbedtls_sha256_finish_ret(&ctx, dir_hash);
    mbedtls_sha256_free(&ctx);

    // 打印十六进制哈希值
    printf("Directory SHA256: ");
    for (int i = 0; i < 32; i++) {
        printf("%02x", dir_hash[i]);
    }
    printf("\n");
    return 0;
}