寻求适配Cloo的SHA256 OpenCL内核及大数组哈希异常问题解决
Hey there! Let's tackle your SHA256 OpenCL kernel issues step by step—sounds like you're close but hitting two specific roadblocks with byte arrays and larger input sizes. Here's how to fix them:
1. Fixing Byte Array Header Hash Mismatches
The most likely culprits here are data type mismatches or incorrect handling of raw byte data (vs. null-terminated strings):
- Check data type alignment: Ensure your C# byte array is being passed to the kernel as
uchar(unsigned 8-bit integers) rather than implicitly converted to a larger type likeint. In Cloo, useComputeBuffer<byte>to create your input buffer to enforce this match. - Avoid string-specific logic: If your original kernel was built for strings, it might be stopping at a null byte (
0x00) in your byte array. Remove any logic that checks for termination characters—SHA256 needs to process every byte in the input, regardless of content. - Validate initial vectors: Double-check that your kernel uses the standard SHA256 initial hash values:
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 - Test with a known input: Use a short byte array with a precomputed SHA256 hash (e.g.,
byte[] test = { 0x48, 0x65, 0x6c, 0x6c, 0x6f }which hashes to2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824). Compare the kernel's output to C#'sSHA256Managedresult to pinpoint where the calculation diverges.
2. Handling Inputs Longer Than 32 Bytes
SHA256 processes data in 512-bit (64-byte) blocks, with mandatory padding for inputs that aren't multiples of this size. Your original kernel likely only handles a single block and skips the padding/block iteration logic. Here's a corrected, full-featured kernel that addresses this:
__kernel void sha256(__global const uchar* input, uint inputLength, __global uint* output) { // Standard SHA256 initial hash values uint h0 = 0x6a09e667; uint h1 = 0xbb67ae85; uint h2 = 0x3c6ef372; uint h3 = 0xa54ff53a; uint h4 = 0x510e527f; uint h5 = 0x9b05688c; uint h6 = 0x1f83d9ab; uint h7 = 0x5be0cd19; // SHA256 round constants __constant uint k[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; // Calculate number of 512-bit blocks needed uint numBlocks = (inputLength + 8 + 63) / 64; __local uchar block[64]; for (uint blockIdx = 0; blockIdx < numBlocks; blockIdx++) { // Load current block with padding uint offset = blockIdx * 64; for (uint i = 0; i < 64; i++) { if (offset + i < inputLength) { block[i] = input[offset + i]; } else if (offset + i == inputLength) { block[i] = 0x80; // Add padding start bit } else { // Fill with zeros, then add input length in bits (last 8 bytes) if (i >= 64 - 8) { uint64_t bitLength = (uint64_t)inputLength * 8; block[i] = (uchar)((bitLength >> ((63 - i) * 8)) & 0xff); } else { block[i] = 0x00; } } } // Convert block to big-endian 32-bit words uint w[64]; for (uint i = 0; i < 16; i++) { w[i] = (uint)block[i*4] << 24 | (uint)block[i*4+1] << 16 | (uint)block[i*4+2] << 8 | (uint)block[i*4+3]; } // Extend words to 64 total for (uint i = 16; i < 64; i++) { uint s0 = rotate(w[i-15], 7) ^ rotate(w[i-15], 18) ^ (w[i-15] >> 3); uint s1 = rotate(w[i-2], 17) ^ rotate(w[i-2], 19) ^ (w[i-2] >> 10); w[i] = w[i-16] + s0 + w[i-7] + s1; } // Initialize working variables uint a = h0; uint b = h1; uint c = h2; uint d = h3; uint e = h4; uint f = h5; uint g = h6; uint h = h7; // Compression loop for (uint i = 0; i < 64; i++) { uint S1 = rotate(e, 6) ^ rotate(e, 11) ^ rotate(e, 25); uint ch = (e & f) ^ (~e & g); uint temp1 = h + S1 + ch + k[i] + w[i]; uint S0 = rotate(a, 2) ^ rotate(a, 13) ^ rotate(a, 22); uint maj = (a & b) ^ (a & c) ^ (b & c); uint temp2 = S0 + maj; h = g; g = f; f = e; e = d + temp1; d = c; c = b; b = a; a = temp1 + temp2; } // Update hash values h0 += a; h1 += b; h2 += c; h3 += d; h4 += e; h5 += f; h6 += g; h7 += h; } // Output hash in big-endian format output[0] = h0; output[1] = h1; output[2] = h2; output[3] = h3; output[4] = h4; output[5] = h5; output[6] = h6; output[7] = h7; } // Helper rotation function uint rotate(uint x, uint n) { return (x << n) | (x >> (32 - n)); }
Key Usage Notes for Cloo/WPF:
- Buffer Setup: Create your input buffer as
new ComputeBuffer<byte>(context, ComputeMemoryFlags.ReadOnly, inputBytes)and output buffer asnew ComputeBuffer<uint>(context, ComputeMemoryFlags.WriteOnly, 8). - Kernel Arguments: Pass the input buffer, input length (as a
uint), and output buffer to the kernel. - Result Conversion: The kernel outputs 8 big-endian
uints. Convert these to a byte array by splitting eachuintinto 4 bytes (e.g.,BitConverter.GetBytes(h0).Reverse()to match big-endian order) and concatenate them to get the final SHA256 hash string.
内容的提问来源于stack exchange,提问作者Sameh Tohamy




