You need to enable JavaScript to run this app.
最新活动
大模型
产品
解决方案
定价
生态与合作
支持与服务
开发者
了解我们

寻求适配Cloo的SHA256 OpenCL内核及大数组哈希异常问题解决

Hey there! Let's tackle your SHA256 OpenCL kernel issues step by step—sounds like you're close but hitting two specific roadblocks with byte arrays and larger input sizes. Here's how to fix them:

1. Fixing Byte Array Header Hash Mismatches

The most likely culprits here are data type mismatches or incorrect handling of raw byte data (vs. null-terminated strings):

  • Check data type alignment: Ensure your C# byte array is being passed to the kernel as uchar (unsigned 8-bit integers) rather than implicitly converted to a larger type like int. In Cloo, use ComputeBuffer<byte> to create your input buffer to enforce this match.
  • Avoid string-specific logic: If your original kernel was built for strings, it might be stopping at a null byte (0x00) in your byte array. Remove any logic that checks for termination characters—SHA256 needs to process every byte in the input, regardless of content.
  • Validate initial vectors: Double-check that your kernel uses the standard SHA256 initial hash values:
    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
    
  • Test with a known input: Use a short byte array with a precomputed SHA256 hash (e.g., byte[] test = { 0x48, 0x65, 0x6c, 0x6c, 0x6f } which hashes to 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824). Compare the kernel's output to C#'s SHA256Managed result to pinpoint where the calculation diverges.

2. Handling Inputs Longer Than 32 Bytes

SHA256 processes data in 512-bit (64-byte) blocks, with mandatory padding for inputs that aren't multiples of this size. Your original kernel likely only handles a single block and skips the padding/block iteration logic. Here's a corrected, full-featured kernel that addresses this:

__kernel void sha256(__global const uchar* input, uint inputLength, __global uint* output) {
    // Standard SHA256 initial hash values
    uint h0 = 0x6a09e667;
    uint h1 = 0xbb67ae85;
    uint h2 = 0x3c6ef372;
    uint h3 = 0xa54ff53a;
    uint h4 = 0x510e527f;
    uint h5 = 0x9b05688c;
    uint h6 = 0x1f83d9ab;
    uint h7 = 0x5be0cd19;

    // SHA256 round constants
    __constant uint k[64] = {
        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
    };

    // Calculate number of 512-bit blocks needed
    uint numBlocks = (inputLength + 8 + 63) / 64;
    __local uchar block[64];

    for (uint blockIdx = 0; blockIdx < numBlocks; blockIdx++) {
        // Load current block with padding
        uint offset = blockIdx * 64;
        for (uint i = 0; i < 64; i++) {
            if (offset + i < inputLength) {
                block[i] = input[offset + i];
            } else if (offset + i == inputLength) {
                block[i] = 0x80; // Add padding start bit
            } else {
                // Fill with zeros, then add input length in bits (last 8 bytes)
                if (i >= 64 - 8) {
                    uint64_t bitLength = (uint64_t)inputLength * 8;
                    block[i] = (uchar)((bitLength >> ((63 - i) * 8)) & 0xff);
                } else {
                    block[i] = 0x00;
                }
            }
        }

        // Convert block to big-endian 32-bit words
        uint w[64];
        for (uint i = 0; i < 16; i++) {
            w[i] = (uint)block[i*4] << 24 | (uint)block[i*4+1] << 16 | (uint)block[i*4+2] << 8 | (uint)block[i*4+3];
        }

        // Extend words to 64 total
        for (uint i = 16; i < 64; i++) {
            uint s0 = rotate(w[i-15], 7) ^ rotate(w[i-15], 18) ^ (w[i-15] >> 3);
            uint s1 = rotate(w[i-2], 17) ^ rotate(w[i-2], 19) ^ (w[i-2] >> 10);
            w[i] = w[i-16] + s0 + w[i-7] + s1;
        }

        // Initialize working variables
        uint a = h0;
        uint b = h1;
        uint c = h2;
        uint d = h3;
        uint e = h4;
        uint f = h5;
        uint g = h6;
        uint h = h7;

        // Compression loop
        for (uint i = 0; i < 64; i++) {
            uint S1 = rotate(e, 6) ^ rotate(e, 11) ^ rotate(e, 25);
            uint ch = (e & f) ^ (~e & g);
            uint temp1 = h + S1 + ch + k[i] + w[i];
            uint S0 = rotate(a, 2) ^ rotate(a, 13) ^ rotate(a, 22);
            uint maj = (a & b) ^ (a & c) ^ (b & c);
            uint temp2 = S0 + maj;

            h = g;
            g = f;
            f = e;
            e = d + temp1;
            d = c;
            c = b;
            b = a;
            a = temp1 + temp2;
        }

        // Update hash values
        h0 += a;
        h1 += b;
        h2 += c;
        h3 += d;
        h4 += e;
        h5 += f;
        h6 += g;
        h7 += h;
    }

    // Output hash in big-endian format
    output[0] = h0;
    output[1] = h1;
    output[2] = h2;
    output[3] = h3;
    output[4] = h4;
    output[5] = h5;
    output[6] = h6;
    output[7] = h7;
}

// Helper rotation function
uint rotate(uint x, uint n) {
    return (x << n) | (x >> (32 - n));
}

Key Usage Notes for Cloo/WPF:

  • Buffer Setup: Create your input buffer as new ComputeBuffer<byte>(context, ComputeMemoryFlags.ReadOnly, inputBytes) and output buffer as new ComputeBuffer<uint>(context, ComputeMemoryFlags.WriteOnly, 8).
  • Kernel Arguments: Pass the input buffer, input length (as a uint), and output buffer to the kernel.
  • Result Conversion: The kernel outputs 8 big-endian uints. Convert these to a byte array by splitting each uint into 4 bytes (e.g., BitConverter.GetBytes(h0).Reverse() to match big-endian order) and concatenate them to get the final SHA256 hash string.

内容的提问来源于stack exchange,提问作者Sameh Tohamy

火山引擎 最新活动