From 9ddb210f5bd9486853df160e6742b9d2a34a3fcd Mon Sep 17 00:00:00 2001 From: aviram fireberger Date: Thu, 5 Dec 2019 18:09:58 +0200 Subject: [PATCH 1/3] Upgrade to CUDA 10.2 --- BitCrack.props | 8 ++++---- CudaKeySearchDevice/CudaKeySearchDevice.vcxproj | 4 ++-- cudaInfo/cudaInfo.vcxproj | 4 ++-- cudaMath/cudaMath.vcxproj | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/BitCrack.props b/BitCrack.props index 0f980a5..8656a07 100644 --- a/BitCrack.props +++ b/BitCrack.props @@ -2,10 +2,10 @@ - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64 - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\lib\x64 + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\lib\x64 diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj b/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj index 490c53e..054ac1b 100644 --- a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj +++ b/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj @@ -56,7 +56,7 @@ - + @@ -114,6 +114,6 @@ - + \ No newline at end of file diff --git a/cudaInfo/cudaInfo.vcxproj b/cudaInfo/cudaInfo.vcxproj index 6636506..5283261 100644 --- a/cudaInfo/cudaInfo.vcxproj +++ b/cudaInfo/cudaInfo.vcxproj @@ -39,7 +39,7 @@ - + @@ -89,6 +89,6 @@ - + \ No newline at end of file diff --git a/cudaMath/cudaMath.vcxproj b/cudaMath/cudaMath.vcxproj index fcf4097..a62f356 100644 --- a/cudaMath/cudaMath.vcxproj +++ b/cudaMath/cudaMath.vcxproj @@ -31,7 +31,7 @@ - + @@ -85,6 +85,6 @@ - + \ No newline at end of file From 2b728bf44c0f86855aaf2c103e7b69c113994557 Mon Sep 17 00:00:00 2001 From: aviram fireberger Date: Tue, 22 Dec 2020 15:40:37 +0200 Subject: [PATCH 2/3] Working SHA 256 strings from file on CUDA --- BitCrack.sln | 20 +- CLKeySearchDevice/bitcrack.cl | 1702 ------------------- CryptoUtil/CryptoUtil.vcxproj | 1 + CryptoUtil/picosha2.h | 379 +++++ FileStringsToHash/FileStringsToHash.cpp | 47 + FileStringsToHash/FileStringsToHash.vcxproj | 171 ++ StringsHasher/CKSD.cpp | 41 + StringsHasher/CKSD.cu | 20 + StringsHasher/CKSD.h | 51 + StringsHasher/StringsHasher.vcxproj | 115 ++ StringsHasher/getopt.c | 117 ++ StringsHasher/getopt.h | 6 + StringsHasher/kernal.cu | 201 +++ StringsHasher/kernelb.cu | 140 ++ StringsHasher/main.cu | 225 +++ StringsHasher/sha256.cuh | 250 +++ StringsHasher/unistd.h | 56 + TestLib/TestLib.vcxproj | 86 + TestLib/kernel.cu | 121 ++ util/util.cpp | 7 + util/util.h | 1 + 21 files changed, 2053 insertions(+), 1704 deletions(-) create mode 100644 CryptoUtil/picosha2.h create mode 100644 FileStringsToHash/FileStringsToHash.cpp create mode 100644 FileStringsToHash/FileStringsToHash.vcxproj create mode 100644 StringsHasher/CKSD.cpp create mode 100644 StringsHasher/CKSD.cu create mode 100644 StringsHasher/CKSD.h create mode 100644 StringsHasher/StringsHasher.vcxproj create mode 100644 StringsHasher/getopt.c create mode 100644 StringsHasher/getopt.h create mode 100644 StringsHasher/kernal.cu create mode 100644 StringsHasher/kernelb.cu create mode 100644 StringsHasher/main.cu create mode 100644 StringsHasher/sha256.cuh create mode 100644 StringsHasher/unistd.h create mode 100644 TestLib/TestLib.vcxproj create mode 100644 TestLib/kernel.cu diff --git a/BitCrack.sln b/BitCrack.sln index 9913b17..57428ca 100644 --- a/BitCrack.sln +++ b/BitCrack.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27703.2018 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29519.87 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "secp256k1lib", "secp256k1lib\secp256k1lib.vcxproj", "{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}" EndProject @@ -79,6 +79,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLUnitTests", "CLUnitTests\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clMath", "clMath\clMath.vcxproj", "{83327841-C283-4D46-A873-97AC674C68AC}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StringsHasher", "StringsHasher\StringsHasher.vcxproj", "{691AB22B-0E94-4C8A-8C63-E33CF8768726}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestLib", "TestLib\TestLib.vcxproj", "{FEBBC324-1FF3-4393-9249-535549A85B80}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -231,6 +235,18 @@ Global {83327841-C283-4D46-A873-97AC674C68AC}.Release|x64.Build.0 = Release|x64 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x86.ActiveCfg = Release|Win32 {83327841-C283-4D46-A873-97AC674C68AC}.Release|x86.Build.0 = Release|Win32 + {691AB22B-0E94-4C8A-8C63-E33CF8768726}.Debug|x64.ActiveCfg = Debug|x64 + {691AB22B-0E94-4C8A-8C63-E33CF8768726}.Debug|x64.Build.0 = Debug|x64 + {691AB22B-0E94-4C8A-8C63-E33CF8768726}.Debug|x86.ActiveCfg = Debug|x64 + {691AB22B-0E94-4C8A-8C63-E33CF8768726}.Release|x64.ActiveCfg = Release|x64 + {691AB22B-0E94-4C8A-8C63-E33CF8768726}.Release|x64.Build.0 = Release|x64 + {691AB22B-0E94-4C8A-8C63-E33CF8768726}.Release|x86.ActiveCfg = Release|x64 + {FEBBC324-1FF3-4393-9249-535549A85B80}.Debug|x64.ActiveCfg = Debug|x64 + {FEBBC324-1FF3-4393-9249-535549A85B80}.Debug|x64.Build.0 = Debug|x64 + {FEBBC324-1FF3-4393-9249-535549A85B80}.Debug|x86.ActiveCfg = Debug|x64 + {FEBBC324-1FF3-4393-9249-535549A85B80}.Release|x64.ActiveCfg = Release|x64 + {FEBBC324-1FF3-4393-9249-535549A85B80}.Release|x64.Build.0 = Release|x64 + {FEBBC324-1FF3-4393-9249-535549A85B80}.Release|x86.ActiveCfg = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl index 55f0907..aa907b8 100644 --- a/CLKeySearchDevice/bitcrack.cl +++ b/CLKeySearchDevice/bitcrack.cl @@ -1,1705 +1,3 @@ -#ifndef _RIPEMD160_CL -#define _RIPEMD160_CL - - -__constant unsigned int _RIPEMD160_IV[5] = { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - 0xc3d2e1f0 -}; - -__constant unsigned int _K0 = 0x5a827999; -__constant unsigned int _K1 = 0x6ed9eba1; -__constant unsigned int _K2 = 0x8f1bbcdc; -__constant unsigned int _K3 = 0xa953fd4e; - -__constant unsigned int _K4 = 0x7a6d76e9; -__constant unsigned int _K5 = 0x6d703ef3; -__constant unsigned int _K6 = 0x5c4dd124; -__constant unsigned int _K7 = 0x50a28be6; - -#define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) - -#define F(x, y, z) ((x) ^ (y) ^ (z)) - -#define G(x, y, z) (((x) & (y)) | (~(x) & (z))) - -#define H(x, y, z) (((x) | ~(y)) ^ (z)) - -#define I(x, y, z) (((x) & (z)) | ((y) & ~(z))) - -#define J(x, y, z) ((x) ^ ((y) | ~(z))) - -#define FF(a, b, c, d, e, m, s)\ - a += (F((b), (c), (d)) + (m));\ - a = (rotl((a), (s)) + (e));\ - c = rotl((c), 10) - -#define GG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + (x) + _K0;\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - -#define HH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + _K1;\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - -#define II(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + _K2;\ - a = rotl((a), (s)) + e;\ - c = rotl((c), 10) - -#define JJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + _K3;\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - -#define FFF(a, b, c, d, e, x, s)\ - a += F((b), (c), (d)) + (x);\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - -#define GGG(a, b, c, d, e, x, s)\ - a += G((b), (c), (d)) + x + _K4;\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - -#define HHH(a, b, c, d, e, x, s)\ - a += H((b), (c), (d)) + (x) + _K5;\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - -#define III(a, b, c, d, e, x, s)\ - a += I((b), (c), (d)) + (x) + _K6;\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - -#define JJJ(a, b, c, d, e, x, s)\ - a += J((b), (c), (d)) + (x) + _K7;\ - a = rotl((a), (s)) + (e);\ - c = rotl((c), 10) - - -void ripemd160sha256(const unsigned int x[8], unsigned int digest[5]) -{ - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; - - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - - /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); - FF(a1, b1, c1, d1, e1, 0, 8); - - /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); - - /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); - - /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); - - /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; - - /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); - - /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); - - /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); - - /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); - - /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = _RIPEMD160_IV[1] + c1 + d2; - digest[1] = _RIPEMD160_IV[2] + d1 + e2; - digest[2] = _RIPEMD160_IV[3] + e1 + a2; - digest[3] = _RIPEMD160_IV[4] + a1 + b2; - digest[4] = _RIPEMD160_IV[0] + b1 + c2; -} - - -void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5]) -{ - unsigned int a1 = _RIPEMD160_IV[0]; - unsigned int b1 = _RIPEMD160_IV[1]; - unsigned int c1 = _RIPEMD160_IV[2]; - unsigned int d1 = _RIPEMD160_IV[3]; - unsigned int e1 = _RIPEMD160_IV[4]; - - const unsigned int x8 = 0x00000080; - const unsigned int x14 = 256; - - /* round 1 */ - FF(a1, b1, c1, d1, e1, x[0], 11); - FF(e1, a1, b1, c1, d1, x[1], 14); - FF(d1, e1, a1, b1, c1, x[2], 15); - FF(c1, d1, e1, a1, b1, x[3], 12); - FF(b1, c1, d1, e1, a1, x[4], 5); - FF(a1, b1, c1, d1, e1, x[5], 8); - FF(e1, a1, b1, c1, d1, x[6], 7); - FF(d1, e1, a1, b1, c1, x[7], 9); - FF(c1, d1, e1, a1, b1, x8, 11); - FF(b1, c1, d1, e1, a1, 0, 13); - FF(a1, b1, c1, d1, e1, 0, 14); - FF(e1, a1, b1, c1, d1, 0, 15); - FF(d1, e1, a1, b1, c1, 0, 6); - FF(c1, d1, e1, a1, b1, 0, 7); - FF(b1, c1, d1, e1, a1, x14, 9); - FF(a1, b1, c1, d1, e1, 0, 8); - - /* round 2 */ - GG(e1, a1, b1, c1, d1, x[7], 7); - GG(d1, e1, a1, b1, c1, x[4], 6); - GG(c1, d1, e1, a1, b1, 0, 8); - GG(b1, c1, d1, e1, a1, x[1], 13); - GG(a1, b1, c1, d1, e1, 0, 11); - GG(e1, a1, b1, c1, d1, x[6], 9); - GG(d1, e1, a1, b1, c1, 0, 7); - GG(c1, d1, e1, a1, b1, x[3], 15); - GG(b1, c1, d1, e1, a1, 0, 7); - GG(a1, b1, c1, d1, e1, x[0], 12); - GG(e1, a1, b1, c1, d1, 0, 15); - GG(d1, e1, a1, b1, c1, x[5], 9); - GG(c1, d1, e1, a1, b1, x[2], 11); - GG(b1, c1, d1, e1, a1, x14, 7); - GG(a1, b1, c1, d1, e1, 0, 13); - GG(e1, a1, b1, c1, d1, x8, 12); - - /* round 3 */ - HH(d1, e1, a1, b1, c1, x[3], 11); - HH(c1, d1, e1, a1, b1, 0, 13); - HH(b1, c1, d1, e1, a1, x14, 6); - HH(a1, b1, c1, d1, e1, x[4], 7); - HH(e1, a1, b1, c1, d1, 0, 14); - HH(d1, e1, a1, b1, c1, 0, 9); - HH(c1, d1, e1, a1, b1, x8, 13); - HH(b1, c1, d1, e1, a1, x[1], 15); - HH(a1, b1, c1, d1, e1, x[2], 14); - HH(e1, a1, b1, c1, d1, x[7], 8); - HH(d1, e1, a1, b1, c1, x[0], 13); - HH(c1, d1, e1, a1, b1, x[6], 6); - HH(b1, c1, d1, e1, a1, 0, 5); - HH(a1, b1, c1, d1, e1, 0, 12); - HH(e1, a1, b1, c1, d1, x[5], 7); - HH(d1, e1, a1, b1, c1, 0, 5); - - /* round 4 */ - II(c1, d1, e1, a1, b1, x[1], 11); - II(b1, c1, d1, e1, a1, 0, 12); - II(a1, b1, c1, d1, e1, 0, 14); - II(e1, a1, b1, c1, d1, 0, 15); - II(d1, e1, a1, b1, c1, x[0], 14); - II(c1, d1, e1, a1, b1, x8, 15); - II(b1, c1, d1, e1, a1, 0, 9); - II(a1, b1, c1, d1, e1, x[4], 8); - II(e1, a1, b1, c1, d1, 0, 9); - II(d1, e1, a1, b1, c1, x[3], 14); - II(c1, d1, e1, a1, b1, x[7], 5); - II(b1, c1, d1, e1, a1, 0, 6); - II(a1, b1, c1, d1, e1, x14, 8); - II(e1, a1, b1, c1, d1, x[5], 6); - II(d1, e1, a1, b1, c1, x[6], 5); - II(c1, d1, e1, a1, b1, x[2], 12); - - /* round 5 */ - JJ(b1, c1, d1, e1, a1, x[4], 9); - JJ(a1, b1, c1, d1, e1, x[0], 15); - JJ(e1, a1, b1, c1, d1, x[5], 5); - JJ(d1, e1, a1, b1, c1, 0, 11); - JJ(c1, d1, e1, a1, b1, x[7], 6); - JJ(b1, c1, d1, e1, a1, 0, 8); - JJ(a1, b1, c1, d1, e1, x[2], 13); - JJ(e1, a1, b1, c1, d1, 0, 12); - JJ(d1, e1, a1, b1, c1, x14, 5); - JJ(c1, d1, e1, a1, b1, x[1], 12); - JJ(b1, c1, d1, e1, a1, x[3], 13); - JJ(a1, b1, c1, d1, e1, x8, 14); - JJ(e1, a1, b1, c1, d1, 0, 11); - JJ(d1, e1, a1, b1, c1, x[6], 8); - JJ(c1, d1, e1, a1, b1, 0, 5); - JJ(b1, c1, d1, e1, a1, 0, 6); - - unsigned int a2 = _RIPEMD160_IV[0]; - unsigned int b2 = _RIPEMD160_IV[1]; - unsigned int c2 = _RIPEMD160_IV[2]; - unsigned int d2 = _RIPEMD160_IV[3]; - unsigned int e2 = _RIPEMD160_IV[4]; - - /* parallel round 1 */ - JJJ(a2, b2, c2, d2, e2, x[5], 8); - JJJ(e2, a2, b2, c2, d2, x14, 9); - JJJ(d2, e2, a2, b2, c2, x[7], 9); - JJJ(c2, d2, e2, a2, b2, x[0], 11); - JJJ(b2, c2, d2, e2, a2, 0, 13); - JJJ(a2, b2, c2, d2, e2, x[2], 15); - JJJ(e2, a2, b2, c2, d2, 0, 15); - JJJ(d2, e2, a2, b2, c2, x[4], 5); - JJJ(c2, d2, e2, a2, b2, 0, 7); - JJJ(b2, c2, d2, e2, a2, x[6], 7); - JJJ(a2, b2, c2, d2, e2, 0, 8); - JJJ(e2, a2, b2, c2, d2, x8, 11); - JJJ(d2, e2, a2, b2, c2, x[1], 14); - JJJ(c2, d2, e2, a2, b2, 0, 14); - JJJ(b2, c2, d2, e2, a2, x[3], 12); - JJJ(a2, b2, c2, d2, e2, 0, 6); - - /* parallel round 2 */ - III(e2, a2, b2, c2, d2, x[6], 9); - III(d2, e2, a2, b2, c2, 0, 13); - III(c2, d2, e2, a2, b2, x[3], 15); - III(b2, c2, d2, e2, a2, x[7], 7); - III(a2, b2, c2, d2, e2, x[0], 12); - III(e2, a2, b2, c2, d2, 0, 8); - III(d2, e2, a2, b2, c2, x[5], 9); - III(c2, d2, e2, a2, b2, 0, 11); - III(b2, c2, d2, e2, a2, x14, 7); - III(a2, b2, c2, d2, e2, 0, 7); - III(e2, a2, b2, c2, d2, x8, 12); - III(d2, e2, a2, b2, c2, 0, 7); - III(c2, d2, e2, a2, b2, x[4], 6); - III(b2, c2, d2, e2, a2, 0, 15); - III(a2, b2, c2, d2, e2, x[1], 13); - III(e2, a2, b2, c2, d2, x[2], 11); - - /* parallel round 3 */ - HHH(d2, e2, a2, b2, c2, 0, 9); - HHH(c2, d2, e2, a2, b2, x[5], 7); - HHH(b2, c2, d2, e2, a2, x[1], 15); - HHH(a2, b2, c2, d2, e2, x[3], 11); - HHH(e2, a2, b2, c2, d2, x[7], 8); - HHH(d2, e2, a2, b2, c2, x14, 6); - HHH(c2, d2, e2, a2, b2, x[6], 6); - HHH(b2, c2, d2, e2, a2, 0, 14); - HHH(a2, b2, c2, d2, e2, 0, 12); - HHH(e2, a2, b2, c2, d2, x8, 13); - HHH(d2, e2, a2, b2, c2, 0, 5); - HHH(c2, d2, e2, a2, b2, x[2], 14); - HHH(b2, c2, d2, e2, a2, 0, 13); - HHH(a2, b2, c2, d2, e2, x[0], 13); - HHH(e2, a2, b2, c2, d2, x[4], 7); - HHH(d2, e2, a2, b2, c2, 0, 5); - - /* parallel round 4 */ - GGG(c2, d2, e2, a2, b2, x8, 15); - GGG(b2, c2, d2, e2, a2, x[6], 5); - GGG(a2, b2, c2, d2, e2, x[4], 8); - GGG(e2, a2, b2, c2, d2, x[1], 11); - GGG(d2, e2, a2, b2, c2, x[3], 14); - GGG(c2, d2, e2, a2, b2, 0, 14); - GGG(b2, c2, d2, e2, a2, 0, 6); - GGG(a2, b2, c2, d2, e2, x[0], 14); - GGG(e2, a2, b2, c2, d2, x[5], 6); - GGG(d2, e2, a2, b2, c2, 0, 9); - GGG(c2, d2, e2, a2, b2, x[2], 12); - GGG(b2, c2, d2, e2, a2, 0, 9); - GGG(a2, b2, c2, d2, e2, 0, 12); - GGG(e2, a2, b2, c2, d2, x[7], 5); - GGG(d2, e2, a2, b2, c2, 0, 15); - GGG(c2, d2, e2, a2, b2, x14, 8); - - /* parallel round 5 */ - FFF(b2, c2, d2, e2, a2, 0, 8); - FFF(a2, b2, c2, d2, e2, 0, 5); - FFF(e2, a2, b2, c2, d2, 0, 12); - FFF(d2, e2, a2, b2, c2, x[4], 9); - FFF(c2, d2, e2, a2, b2, x[1], 12); - FFF(b2, c2, d2, e2, a2, x[5], 5); - FFF(a2, b2, c2, d2, e2, x8, 14); - FFF(e2, a2, b2, c2, d2, x[7], 6); - FFF(d2, e2, a2, b2, c2, x[6], 8); - FFF(c2, d2, e2, a2, b2, x[2], 13); - FFF(b2, c2, d2, e2, a2, 0, 6); - FFF(a2, b2, c2, d2, e2, x14, 5); - FFF(e2, a2, b2, c2, d2, x[0], 15); - FFF(d2, e2, a2, b2, c2, x[3], 13); - FFF(c2, d2, e2, a2, b2, 0, 11); - FFF(b2, c2, d2, e2, a2, 0, 11); - - digest[0] = c1 + d2; - digest[1] = d1 + e2; - digest[2] = e1 + a2; - digest[3] = a1 + b2; - digest[4] = b1 + c2; -} -#endif -#ifndef _SECP256K1_CL -#define _SECP256K1_CL - -typedef ulong uint64_t; - -/** - Prime modulus 2^256 - 2^32 - 977 - */ -__constant unsigned int _P[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F -}; - -__constant unsigned int _P_MINUS1[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F -}; - -/** - Base point X - */ -__constant unsigned int _GX[8] = { - 0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798 -}; - -/** - Base point Y - */ -__constant unsigned int _GY[8] = { - 0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8 -}; - - -/** - * Group order - */ -__constant unsigned int _N[8] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141 -}; - - - -// Add with carry -unsigned int addc(unsigned int a, unsigned int b, unsigned int *carry) -{ - unsigned int sum = a + *carry; - - unsigned int c1 = (sum < a) ? 1 : 0; - - sum = sum + b; - - unsigned int c2 = (sum < b) ? 1 : 0; - - *carry = c1 | c2; - - return sum; -} - -// Subtract with borrow -unsigned int subc(unsigned int a, unsigned int b, unsigned int *borrow) -{ - unsigned int diff = a - *borrow; - - *borrow = (diff > a) ? 1 : 0; - - unsigned int diff2 = diff - b; - - *borrow |= (diff2 > diff) ? 1 : 0; - - return diff2; -} - -#ifdef DEVICE_VENDOR_INTEL -unsigned int mul_hi977(unsigned int x) -{ - unsigned int high = x >> 16; - unsigned int low = x & 0xffff; - - return (((low * 977) >> 16) + (high * 977)) >> 16; -} - -// 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c) -{ - *low = a * 977; - unsigned int tmp = *low + c; - unsigned int carry = tmp < *low ? 1 : 0; - *low = tmp; - *high = mul_hi977(a) + carry; -} - -#else -// 32 x 32 multiply-add -void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c) -{ - *low = a * 977; - unsigned int tmp = *low + c; - unsigned int carry = tmp < *low ? 1 : 0; - *low = tmp; - *high = mad_hi(a, (unsigned int)977, carry); -} -#endif - -// 32 x 32 multiply-add -void madd(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b, unsigned int c) -{ - *low = a * b; - unsigned int tmp = *low + c; - unsigned int carry = tmp < *low ? 1 : 0; - *low = tmp; - *high = mad_hi(a, b, carry); -} - -void mull(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b) -{ - *low = a * b; - *high = mul_hi(a, b); -} - -unsigned int sub256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - unsigned int borrow = 0; - for(int i = 7; i >= 0; i--) { - c[i] = subc(a[i], b[i], &borrow); - } - - return borrow; -} - -bool greaterThanEqualToP(const unsigned int a[8]) -{ - for(int i = 0; i < 8; i++) { - if(a[i] > _P_MINUS1[i]) { - return true; - } else if(a[i] < _P_MINUS1[i]) { - return false; - } - } - - return false; -} - -void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int z[16]) -{ - unsigned int high = 0; - - // First round, overwrite z - for(int j = 7; j >= 0; j--) { - - uint64_t product = (uint64_t)x[7] * y[j]; - - product = product + high; - - z[7 + j + 1] = (unsigned int)product; - high = (unsigned int)(product >> 32); - } - z[7] = high; - - for(int i = 6; i >= 0; i--) { - - high = 0; - - for(int j = 7; j >= 0; j--) { - - uint64_t product = (uint64_t)x[i] * y[j]; - - product = product + z[i + j + 1] + high; - - z[i + j + 1] = (unsigned int)product; - - high = product >> 32; - } - - z[i] = high; - } -} - -unsigned int add256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - unsigned int carry = 0; - - for(int i = 7; i >= 0; i--) { - c[i] = addc(a[i], b[i], &carry); - } - - return carry; -} - - - -bool isInfinity(const unsigned int x[8]) -{ - bool isf = true; - - for(int i = 0; i < 8; i++) { - if(x[i] != 0xffffffff) { - isf = false; - } - } - - return isf; -} - -void copyBigInt(const unsigned int src[8], unsigned int dest[8]) -{ - for(int i = 0; i < 8; i++) { - dest[i] = src[i]; - } -} - -bool equal(const unsigned int a[8], const unsigned int b[8]) -{ - for(int i = 0; i < 8; i++) { - if(a[i] != b[i]) { - return false; - } - } - - return true; -} - -/** - * Reads an 8-word big integer from device memory - */ -void readInt(__global const unsigned int *ara, int idx, unsigned int x[8]) -{ - size_t totalThreads = get_global_size(0); - - size_t base = idx * totalThreads * 8; - - size_t threadId = get_local_size(0) * get_group_id(0) + get_local_id(0); - - for(int i = 0; i < 8; i++) { - x[i] = ara[base + threadId * 8 + i]; - } -} - -/* - * Read least-significant word - */ -unsigned int readLSW(__global const unsigned int *ara, int idx) -{ - size_t totalThreads = get_global_size(0); - - size_t base = idx * totalThreads * 8; - - size_t threadId = get_local_size(0) * get_group_id(0) + get_local_id(0); - - return ara[base + threadId * 8 + 7]; -} - -/** - * Writes an 8-word big integer to device memory - */ -void writeInt(__global unsigned int *ara, int idx, const unsigned int x[8]) -{ - size_t totalThreads = get_global_size(0); - - size_t base = idx * totalThreads * 8; - - size_t threadId = get_local_size(0) * get_group_id(0) + get_local_id(0); - - for(int i = 0; i < 8; i++) { - ara[base + threadId * 8 + i] = x[i]; - } -} - -unsigned int addP(const unsigned int a[8], unsigned int c[8]) -{ - unsigned int carry = 0; - - for(int i = 7; i >= 0; i--) { - c[i] = addc(a[i], _P[i], &carry); - } - - return carry; -} - -unsigned int subP(const unsigned int a[8], unsigned int c[8]) -{ - unsigned int borrow = 0; - for(int i = 7; i >= 0; i--) { - c[i] = subc(a[i], _P[i], &borrow); - } - - return borrow; -} - -/** - * Subtraction mod p - */ -void subModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - if(sub256(a, b, c)) { - addP(c, c); - } -} - - -void addModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - unsigned int carry = 0; - - carry = add256(a, b, c); - - bool gt = false; - for(int i = 0; i < 8; i++) { - if(c[i] > _P[i]) { - gt = true; - break; - } else if(c[i] < _P[i]) { - break; - } - } - - if(carry || gt) { - subP(c, c); - } -} - - - -void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8]) -{ - unsigned int product[16]; - unsigned int hWord = 0; - unsigned int carry = 0; - - // 256 x 256 multiply - multiply256(a, b, product); - - // Copy the high 256 bits - unsigned int high[8]; - - for(int i = 0; i < 8; i++) { - high[i] = product[i]; - } - - for(int i = 0; i < 8; i++) { - product[i] = 0; - } - - // Add 2^32 * high to the low 256 bits (shift left 1 word and add) - // Affects product[14] to product[6] - for(int i = 7; i >= 0; i--) { - product[i + 7] = addc(product[i + 7], high[i], &carry); - } - product[6] = addc(product[6], 0, &carry); - - carry = 0; - - // Multiply high by 977 and add to low - // Affects product[15] to product[5] - for(int i = 7; i >= 0; i--) { - unsigned int t = 0; - madd977(&hWord, &t, high[i], hWord); - product[8 + i] = addc(product[8 + i], t, &carry); - } - product[7] = addc(product[7], hWord, &carry); - product[6] = addc(0, 0, &carry); - - // Multiply high 2 words by 2^32 and add to low - // Affects product[14] to product[7] - carry = 0; - high[7] = product[7]; - high[6] = product[6]; - - product[7] = 0; - product[6] = 0; - - product[14] = addc(product[14], high[7], &carry); - product[13] = addc(product[13], high[6], &carry); - - // Propagate the carry - for(int i = 12; i >= 7; i--) { - product[i] = addc(product[i], 0, &carry); - } - - // Multiply top 2 words by 977 and add to low - // Affects product[15] to product[7] - carry = 0; - hWord = 0; - unsigned int t = 0; - madd977(&hWord, &t, high[7], hWord); - product[15] = addc(product[15], t, &carry); - madd977(&hWord, &t, high[6], hWord); - product[14] = addc(product[14], t, &carry); - product[13] = addc(product[13], hWord, &carry); - - // Propagate carry - for(int i = 12; i >= 7; i--) { - product[i] = addc(product[i], 0, &carry); - } - - // Reduce if >= P - if(product[7] || greaterThanEqualToP(&product[8])) { - subP(&product[8], &product[8]); - } - - for(int i = 0; i < 8; i++) { - c[i] = product[8 + i]; - } -} - -/** - * Multiply mod P - * c = a * c - */ -void mulModP_d(const unsigned int a[8], unsigned int c[8]) -{ - unsigned int tmp[8]; - mulModP(a, c, tmp); - - copyBigInt(tmp, c); -} - -/** - * Square mod P - * b = a * a - */ -void squareModP(const unsigned int a[8], unsigned int b[8]) -{ - mulModP(a, a, b); -} - -/** - * Square mod P - * x = x * x - */ -void squareModP_d(unsigned int x[8]) -{ - unsigned int tmp[8]; - squareModP(x, tmp); - copyBigInt(tmp, x); -} - - - -/** - * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains - */ -void invModP(unsigned int value[8]) -{ - unsigned int x[8]; - - copyBigInt(value, x); - - unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1}; - - // 0xd - 1101 - mulModP_d(x, y); - squareModP_d(x); - //mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - - // 0x2 - 0010 - //mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - //mulModP_d(x, y); - squareModP_d(x); - //mulModP_d(x, y); - squareModP_d(x); - - // 0xc = 0x1100 - //mulModP_d(x, y); - squareModP_d(x); - //mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - - - // 0xfffff - // Strange behavior here: Incorrect results if in a single loop - for(int i = 0; i < 19; i++) { - mulModP_d(x, y); - squareModP_d(x); - } - for(int i = 0; i < 1; i++) { - mulModP_d(x, y); - squareModP_d(x); - } - - - // 0xe - 1110 - //mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - mulModP_d(x, y); - squareModP_d(x); - - // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff - for(int i = 0; i < 219; i++) { - mulModP_d(x, y); - squareModP_d(x); - } - - - mulModP_d(x, y); - - copyBigInt(y, value); -} - - -void beginBatchAdd(const unsigned int *px, const unsigned int *x, __global unsigned int *chain, int i, int batchIdx, unsigned int inverse[8]) -{ - // x = Gx - x - unsigned int t[8]; - subModP(px, x, t); - - // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, - // c[2] = diff2 * diff1 * diff0, etc - mulModP_d(t, inverse); - - writeInt(chain, batchIdx, inverse); -} - - -void beginBatchAddWithDouble(const unsigned int *px, const unsigned int *py, __global unsigned int *xPtr, __global unsigned int *chain, int i, int batchIdx, unsigned int inverse[8]) -{ - unsigned int x[8]; - readInt(xPtr, i, x); - - if(equal(px, x)) { - addModP(py, py, x); - } else { - // x = Gx - x - subModP(px, x, x); - } - - // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1, - // c[2] = diff2 * diff1 * diff0, etc - mulModP_d(x, inverse); - - writeInt(chain, batchIdx, inverse); -} - -void completeBatchAddWithDouble( - const unsigned int *px, - const unsigned int *py, - __global const unsigned int *xPtr, - __global const unsigned int *yPtr, - int i, - int batchIdx, - __global unsigned int *chain, - unsigned int *inverse, - unsigned int newX[8], - unsigned int newY[8]) -{ - unsigned int s[8]; - unsigned int x[8]; - unsigned int y[8]; - - readInt(xPtr, i, x); - readInt(yPtr, i, y); - - if(batchIdx >= 1) { - unsigned int c[8]; - - readInt(chain, batchIdx - 1, c); - - mulModP(inverse, c, s); - - unsigned int diff[8]; - if(equal(px, x)) { - addModP(py, py, diff); - } else { - subModP(px, x, diff); - } - - mulModP_d(diff, inverse); - } else { - copyBigInt(inverse, s); - } - - - if(equal(px, x)) { - // currently s = 1 / 2y - - unsigned int x2[8]; - unsigned int tx2[8]; - - // 3x^2 - mulModP(x, x, x2); - addModP(x2, x2, tx2); - addModP(x2, tx2, tx2); - - - // s = 3x^2 * 1/2y - mulModP_d(tx2, s); - - // s^2 - unsigned int s2[8]; - mulModP(s, s, s2); - - // Rx = s^2 - 2px - subModP(s2, x, newX); - subModP(newX, x, newX); - - // Ry = s(px - rx) - py - unsigned int k[8]; - subModP(px, newX, k); - mulModP(s, k, newY); - subModP(newY, py, newY); - - } else { - - unsigned int rise[8]; - subModP(py, y, rise); - - mulModP_d(rise, s); - - // Rx = s^2 - Gx - Qx - unsigned int s2[8]; - mulModP(s, s, s2); - - subModP(s2, px, newX); - subModP(newX, x, newX); - - // Ry = s(px - rx) - py - unsigned int k[8]; - subModP(px, newX, k); - mulModP(s, k, newY); - subModP(newY, py, newY); - } -} - -void completeBatchAdd( - const unsigned int *px, - const unsigned int *py, - __global unsigned int *xPtr, - __global unsigned int *yPtr, - int i, - int batchIdx, - __global unsigned int *chain, - unsigned int *inverse, - unsigned int newX[8], - unsigned int newY[8]) -{ - unsigned int s[8]; - unsigned int x[8]; - - readInt(xPtr, i, x); - - if(batchIdx >= 1) { - unsigned int c[8]; - - readInt(chain, batchIdx - 1, c); - mulModP(inverse, c, s); - - unsigned int diff[8]; - subModP(px, x, diff); - mulModP_d(diff, inverse); - } else { - copyBigInt(inverse, s); - } - - unsigned int y[8]; - readInt(yPtr, i, y); - - unsigned int rise[8]; - subModP(py, y, rise); - - mulModP_d(rise, s); - - // Rx = s^2 - Gx - Qx - unsigned int s2[8]; - mulModP(s, s, s2); - subModP(s2, px, newX); - subModP(newX, x, newX); - - // Ry = s(px - rx) - py - unsigned int k[8]; - subModP(px, newX, k); - mulModP(s, k, newY); - subModP(newY, py, newY); -} - - -void doBatchInverse(unsigned int inverse[8]) -{ - invModP(inverse); -} - -#endif -#ifndef _SHA256_CL -#define _SHA256_CL - - -__constant unsigned int _K[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -__constant unsigned int _IV[8] = { - 0x6a09e667, - 0xbb67ae85, - 0x3c6ef372, - 0xa54ff53a, - 0x510e527f, - 0x9b05688c, - 0x1f83d9ab, - 0x5be0cd19 -}; - -#define rotr(x, n) ((x) >> (n)) ^ ((x) << (32 - (n))) - - -#define MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c))) - -#define CH(e, f, g) (((e) & (f)) ^ (~(e) & (g))) - -#define s0(x) (rotr((x), 7) ^ rotr((x), 18) ^ ((x) >> 3)) - -#define s1(x) (rotr((x), 17) ^ rotr((x), 19) ^ ((x) >> 10)) - -#define round(a, b, c, d, e, f, g, h, m, k)\ - t = CH((e), (f), (g)) + (rotr((e), 6) ^ rotr((e), 11) ^ rotr((e), 25)) + (k) + (m);\ - (d) += (t) + (h);\ - (h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22)) - - -void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8]) -{ - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - unsigned int t; - - // 0x04 || x || y - w[0] = (x[0] >> 8) | 0x04000000; - w[1] = (x[1] >> 8) | (x[0] << 24); - w[2] = (x[2] >> 8) | (x[1] << 24); - w[3] = (x[3] >> 8) | (x[2] << 24); - w[4] = (x[4] >> 8) | (x[3] << 24); - w[5] = (x[5] >> 8) | (x[4] << 24); - w[6] = (x[6] >> 8) | (x[5] << 24); - w[7] = (x[7] >> 8) | (x[6] << 24); - w[8] = (y[0] >> 8) | (x[7] << 24); - w[9] = (y[1] >> 8) | (y[0] << 24); - w[10] = (y[2] >> 8) | (y[1] << 24); - w[11] = (y[3] >> 8) | (y[2] << 24); - w[12] = (y[4] >> 8) | (y[3] << 24); - w[13] = (y[5] >> 8) | (y[4] << 24); - w[14] = (y[6] >> 8) | (y[5] << 24); - w[15] = (y[7] >> 8) | (y[6] << 24); - - a = _IV[0]; - b = _IV[1]; - c = _IV[2]; - d = _IV[3]; - e = _IV[4]; - f = _IV[5]; - g = _IV[6]; - h = _IV[7]; - - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, w[9], _K[9]); - round(g, h, a, b, c, d, e, f, w[10], _K[10]); - round(f, g, h, a, b, c, d, e, w[11], _K[11]); - round(e, f, g, h, a, b, c, d, w[12], _K[12]); - round(d, e, f, g, h, a, b, c, w[13], _K[13]); - round(c, d, e, f, g, h, a, b, w[14], _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); - - a += _IV[0]; - b += _IV[1]; - c += _IV[2]; - d += _IV[3]; - e += _IV[4]; - f += _IV[5]; - g += _IV[6]; - h += _IV[7]; - - // store the intermediate hash value - unsigned int tmp[8]; - tmp[0] = a; - tmp[1] = b; - tmp[2] = c; - tmp[3] = d; - tmp[4] = e; - tmp[5] = f; - tmp[6] = g; - tmp[7] = h; - - w[0] = (y[7] << 24) | 0x00800000; - w[15] = 65 * 8; - - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, 0, _K[1]); - round(g, h, a, b, c, d, e, f, 0, _K[2]); - round(f, g, h, a, b, c, d, e, 0, _K[3]); - round(e, f, g, h, a, b, c, d, 0, _K[4]); - round(d, e, f, g, h, a, b, c, 0, _K[5]); - round(c, d, e, f, g, h, a, b, 0, _K[6]); - round(b, c, d, e, f, g, h, a, 0, _K[7]); - round(a, b, c, d, e, f, g, h, 0, _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); - - w[0] = w[0] + s0(0) + 0 + s1(0); - w[1] = 0 + s0(0) + 0 + s1(w[15]); - w[2] = 0 + s0(0) + 0 + s1(w[0]); - w[3] = 0 + s0(0) + 0 + s1(w[1]); - w[4] = 0 + s0(0) + 0 + s1(w[2]); - w[5] = 0 + s0(0) + 0 + s1(w[3]); - w[6] = 0 + s0(0) + w[15] + s1(w[4]); - w[7] = 0 + s0(0) + w[0] + s1(w[5]); - w[8] = 0 + s0(0) + w[1] + s1(w[6]); - w[9] = 0 + s0(0) + w[2] + s1(w[7]); - w[10] = 0 + s0(0) + w[3] + s1(w[8]); - w[11] = 0 + s0(0) + w[4] + s1(w[9]); - w[12] = 0 + s0(0) + w[5] + s1(w[10]); - w[13] = 0 + s0(0) + w[6] + s1(w[11]); - w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); - - digest[0] = tmp[0] + a; - digest[1] = tmp[1] + b; - digest[2] = tmp[2] + c; - digest[3] = tmp[3] + d; - digest[4] = tmp[4] + e; - digest[5] = tmp[5] + f; - digest[6] = tmp[6] + g; - digest[7] = tmp[7] + h; -} - -void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8]) -{ - unsigned int a, b, c, d, e, f, g, h; - unsigned int w[16]; - unsigned int t; - - // 0x03 || x or 0x02 || x - w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8); - - w[1] = (x[1] >> 8) | (x[0] << 24); - w[2] = (x[2] >> 8) | (x[1] << 24); - w[3] = (x[3] >> 8) | (x[2] << 24); - w[4] = (x[4] >> 8) | (x[3] << 24); - w[5] = (x[5] >> 8) | (x[4] << 24); - w[6] = (x[6] >> 8) | (x[5] << 24); - w[7] = (x[7] >> 8) | (x[6] << 24); - w[8] = (x[7] << 24) | 0x00800000; - w[15] = 33 * 8; - - a = _IV[0]; - b = _IV[1]; - c = _IV[2]; - d = _IV[3]; - e = _IV[4]; - f = _IV[5]; - g = _IV[6]; - h = _IV[7]; - - round(a, b, c, d, e, f, g, h, w[0], _K[0]); - round(h, a, b, c, d, e, f, g, w[1], _K[1]); - round(g, h, a, b, c, d, e, f, w[2], _K[2]); - round(f, g, h, a, b, c, d, e, w[3], _K[3]); - round(e, f, g, h, a, b, c, d, w[4], _K[4]); - round(d, e, f, g, h, a, b, c, w[5], _K[5]); - round(c, d, e, f, g, h, a, b, w[6], _K[6]); - round(b, c, d, e, f, g, h, a, w[7], _K[7]); - round(a, b, c, d, e, f, g, h, w[8], _K[8]); - round(h, a, b, c, d, e, f, g, 0, _K[9]); - round(g, h, a, b, c, d, e, f, 0, _K[10]); - round(f, g, h, a, b, c, d, e, 0, _K[11]); - round(e, f, g, h, a, b, c, d, 0, _K[12]); - round(d, e, f, g, h, a, b, c, 0, _K[13]); - round(c, d, e, f, g, h, a, b, 0, _K[14]); - round(b, c, d, e, f, g, h, a, w[15], _K[15]); - - w[0] = w[0] + s0(w[1]) + 0 + s1(0); - w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]); - w[2] = w[2] + s0(w[3]) + 0 + s1(w[0]); - w[3] = w[3] + s0(w[4]) + 0 + s1(w[1]); - w[4] = w[4] + s0(w[5]) + 0 + s1(w[2]); - w[5] = w[5] + s0(w[6]) + 0 + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(0) + w[1] + s1(w[6]); - w[9] = 0 + s0(0) + w[2] + s1(w[7]); - w[10] = 0 + s0(0) + w[3] + s1(w[8]); - w[11] = 0 + s0(0) + w[4] + s1(w[9]); - w[12] = 0 + s0(0) + w[5] + s1(w[10]); - w[13] = 0 + s0(0) + w[6] + s1(w[11]); - w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[16]); - round(h, a, b, c, d, e, f, g, w[1], _K[17]); - round(g, h, a, b, c, d, e, f, w[2], _K[18]); - round(f, g, h, a, b, c, d, e, w[3], _K[19]); - round(e, f, g, h, a, b, c, d, w[4], _K[20]); - round(d, e, f, g, h, a, b, c, w[5], _K[21]); - round(c, d, e, f, g, h, a, b, w[6], _K[22]); - round(b, c, d, e, f, g, h, a, w[7], _K[23]); - round(a, b, c, d, e, f, g, h, w[8], _K[24]); - round(h, a, b, c, d, e, f, g, w[9], _K[25]); - round(g, h, a, b, c, d, e, f, w[10], _K[26]); - round(f, g, h, a, b, c, d, e, w[11], _K[27]); - round(e, f, g, h, a, b, c, d, w[12], _K[28]); - round(d, e, f, g, h, a, b, c, w[13], _K[29]); - round(c, d, e, f, g, h, a, b, w[14], _K[30]); - round(b, c, d, e, f, g, h, a, w[15], _K[31]); - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[32]); - round(h, a, b, c, d, e, f, g, w[1], _K[33]); - round(g, h, a, b, c, d, e, f, w[2], _K[34]); - round(f, g, h, a, b, c, d, e, w[3], _K[35]); - round(e, f, g, h, a, b, c, d, w[4], _K[36]); - round(d, e, f, g, h, a, b, c, w[5], _K[37]); - round(c, d, e, f, g, h, a, b, w[6], _K[38]); - round(b, c, d, e, f, g, h, a, w[7], _K[39]); - round(a, b, c, d, e, f, g, h, w[8], _K[40]); - round(h, a, b, c, d, e, f, g, w[9], _K[41]); - round(g, h, a, b, c, d, e, f, w[10], _K[42]); - round(f, g, h, a, b, c, d, e, w[11], _K[43]); - round(e, f, g, h, a, b, c, d, w[12], _K[44]); - round(d, e, f, g, h, a, b, c, w[13], _K[45]); - round(c, d, e, f, g, h, a, b, w[14], _K[46]); - round(b, c, d, e, f, g, h, a, w[15], _K[47]); - - - w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); - w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); - w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); - w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); - w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); - w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); - w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); - w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); - w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); - w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); - w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); - w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); - w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); - w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); - w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); - w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); - - round(a, b, c, d, e, f, g, h, w[0], _K[48]); - round(h, a, b, c, d, e, f, g, w[1], _K[49]); - round(g, h, a, b, c, d, e, f, w[2], _K[50]); - round(f, g, h, a, b, c, d, e, w[3], _K[51]); - round(e, f, g, h, a, b, c, d, w[4], _K[52]); - round(d, e, f, g, h, a, b, c, w[5], _K[53]); - round(c, d, e, f, g, h, a, b, w[6], _K[54]); - round(b, c, d, e, f, g, h, a, w[7], _K[55]); - round(a, b, c, d, e, f, g, h, w[8], _K[56]); - round(h, a, b, c, d, e, f, g, w[9], _K[57]); - round(g, h, a, b, c, d, e, f, w[10], _K[58]); - round(f, g, h, a, b, c, d, e, w[11], _K[59]); - round(e, f, g, h, a, b, c, d, w[12], _K[60]); - round(d, e, f, g, h, a, b, c, w[13], _K[61]); - round(c, d, e, f, g, h, a, b, w[14], _K[62]); - round(b, c, d, e, f, g, h, a, w[15], _K[63]); - - a += _IV[0]; - b += _IV[1]; - c += _IV[2]; - d += _IV[3]; - e += _IV[4]; - f += _IV[5]; - g += _IV[6]; - h += _IV[7]; - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} -#endif #define COMPRESSED 0 #define UNCOMPRESSED 1 #define BOTH 2 diff --git a/CryptoUtil/CryptoUtil.vcxproj b/CryptoUtil/CryptoUtil.vcxproj index 8b0ed8b..32f9a37 100644 --- a/CryptoUtil/CryptoUtil.vcxproj +++ b/CryptoUtil/CryptoUtil.vcxproj @@ -20,6 +20,7 @@ + diff --git a/CryptoUtil/picosha2.h b/CryptoUtil/picosha2.h new file mode 100644 index 0000000..5e4c78f --- /dev/null +++ b/CryptoUtil/picosha2.h @@ -0,0 +1,379 @@ +/* +The MIT License (MIT) + +Copyright (C) 2017 okdshin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef PICOSHA2_H +#define PICOSHA2_H +// picosha2:20140213 + +#ifndef PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR +#define PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR \ + 1048576 //=1024*1024: default is 1MB memory +#endif + +#include +#include +#include +#include +#include +#include +namespace picosha2 { + typedef unsigned long word_t; + typedef unsigned char byte_t; + + static const size_t k_digest_size = 32; + + namespace detail { + inline byte_t mask_8bit(byte_t x) { return x & 0xff; } + + inline word_t mask_32bit(word_t x) { return x & 0xffffffff; } + + const word_t add_constant[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; + + const word_t initial_message_digest[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, + 0xa54ff53a, 0x510e527f, 0x9b05688c, + 0x1f83d9ab, 0x5be0cd19 }; + + inline word_t ch(word_t x, word_t y, word_t z) { return (x & y) ^ ((~x) & z); } + + inline word_t maj(word_t x, word_t y, word_t z) { + return (x & y) ^ (x & z) ^ (y & z); + } + + inline word_t rotr(word_t x, std::size_t n) { + assert(n < 32); + return mask_32bit((x >> n) | (x << (32 - n))); + } + + inline word_t bsig0(word_t x) { return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22); } + + inline word_t bsig1(word_t x) { return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25); } + + inline word_t shr(word_t x, std::size_t n) { + assert(n < 32); + return x >> n; + } + + inline word_t ssig0(word_t x) { return rotr(x, 7) ^ rotr(x, 18) ^ shr(x, 3); } + + inline word_t ssig1(word_t x) { return rotr(x, 17) ^ rotr(x, 19) ^ shr(x, 10); } + + template + void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 last) { + assert(first + 64 == last); + static_cast(last); // for avoiding unused-variable warning + word_t w[64]; + std::fill(w, w + 64, 0); + for (std::size_t i = 0; i < 16; ++i) { + w[i] = (static_cast(mask_8bit(*(first + i * 4))) << 24) | + (static_cast(mask_8bit(*(first + i * 4 + 1))) << 16) | + (static_cast(mask_8bit(*(first + i * 4 + 2))) << 8) | + (static_cast(mask_8bit(*(first + i * 4 + 3)))); + } + for (std::size_t i = 16; i < 64; ++i) { + w[i] = mask_32bit(ssig1(w[i - 2]) + w[i - 7] + ssig0(w[i - 15]) + + w[i - 16]); + } + + word_t a = *message_digest; + word_t b = *(message_digest + 1); + word_t c = *(message_digest + 2); + word_t d = *(message_digest + 3); + word_t e = *(message_digest + 4); + word_t f = *(message_digest + 5); + word_t g = *(message_digest + 6); + word_t h = *(message_digest + 7); + + for (std::size_t i = 0; i < 64; ++i) { + word_t temp1 = h + bsig1(e) + ch(e, f, g) + add_constant[i] + w[i]; + word_t temp2 = bsig0(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = mask_32bit(d + temp1); + d = c; + c = b; + b = a; + a = mask_32bit(temp1 + temp2); + } + *message_digest += a; + *(message_digest + 1) += b; + *(message_digest + 2) += c; + *(message_digest + 3) += d; + *(message_digest + 4) += e; + *(message_digest + 5) += f; + *(message_digest + 6) += g; + *(message_digest + 7) += h; + for (std::size_t i = 0; i < 8; ++i) { + *(message_digest + i) = mask_32bit(*(message_digest + i)); + } + } + + } // namespace detail + + template + void output_hex(InIter first, InIter last, std::ostream& os) { + os.setf(std::ios::hex, std::ios::basefield); + while (first != last) { + os.width(2); + os.fill('0'); + os << static_cast(*first); + ++first; + } + os.setf(std::ios::dec, std::ios::basefield); + } + + template + void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str) { + std::ostringstream oss; + output_hex(first, last, oss); + hex_str.assign(oss.str()); + } + + template + void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str) { + bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str); + } + + template + std::string bytes_to_hex_string(InIter first, InIter last) { + std::string hex_str; + bytes_to_hex_string(first, last, hex_str); + return hex_str; + } + + template + std::string bytes_to_hex_string(const InContainer& bytes) { + std::string hex_str; + bytes_to_hex_string(bytes, hex_str); + return hex_str; + } + + class hash256_one_by_one { + public: + hash256_one_by_one() { init(); } + + void init() { + buffer_.clear(); + std::fill(data_length_digits_, data_length_digits_ + 4, 0); + std::copy(detail::initial_message_digest, + detail::initial_message_digest + 8, h_); + } + + template + void process(RaIter first, RaIter last) { + add_to_data_length(static_cast(std::distance(first, last))); + std::copy(first, last, std::back_inserter(buffer_)); + std::size_t i = 0; + for (; i + 64 <= buffer_.size(); i += 64) { + detail::hash256_block(h_, buffer_.begin() + i, + buffer_.begin() + i + 64); + } + buffer_.erase(buffer_.begin(), buffer_.begin() + i); + } + + void finish() { + byte_t temp[64]; + std::fill(temp, temp + 64, 0); + std::size_t remains = buffer_.size(); + std::copy(buffer_.begin(), buffer_.end(), temp); + temp[remains] = 0x80; + + if (remains > 55) { + std::fill(temp + remains + 1, temp + 64, 0); + detail::hash256_block(h_, temp, temp + 64); + std::fill(temp, temp + 64 - 4, 0); + } + else { + std::fill(temp + remains + 1, temp + 64 - 4, 0); + } + + write_data_bit_length(&(temp[56])); + detail::hash256_block(h_, temp, temp + 64); + } + + template + void get_hash_bytes(OutIter first, OutIter last) const { + for (const word_t* iter = h_; iter != h_ + 8; ++iter) { + for (std::size_t i = 0; i < 4 && first != last; ++i) { + *(first++) = detail::mask_8bit( + static_cast((*iter >> (24 - 8 * i)))); + } + } + } + + private: + void add_to_data_length(word_t n) { + word_t carry = 0; + data_length_digits_[0] += n; + for (std::size_t i = 0; i < 4; ++i) { + data_length_digits_[i] += carry; + if (data_length_digits_[i] >= 65536u) { + carry = data_length_digits_[i] >> 16; + data_length_digits_[i] &= 65535u; + } + else { + break; + } + } + } + void write_data_bit_length(byte_t* begin) { + word_t data_bit_length_digits[4]; + std::copy(data_length_digits_, data_length_digits_ + 4, + data_bit_length_digits); + + // convert byte length to bit length (multiply 8 or shift 3 times left) + word_t carry = 0; + for (std::size_t i = 0; i < 4; ++i) { + word_t before_val = data_bit_length_digits[i]; + data_bit_length_digits[i] <<= 3; + data_bit_length_digits[i] |= carry; + data_bit_length_digits[i] &= 65535u; + carry = (before_val >> (16 - 3)) & 65535u; + } + + // write data_bit_length + for (int i = 3; i >= 0; --i) { + (*begin++) = static_cast(data_bit_length_digits[i] >> 8); + (*begin++) = static_cast(data_bit_length_digits[i]); + } + } + std::vector buffer_; + word_t data_length_digits_[4]; // as 64bit integer (16bit x 4 integer) + word_t h_[8]; + }; + + inline void get_hash_hex_string(const hash256_one_by_one& hasher, + std::string& hex_str) { + byte_t hash[k_digest_size]; + hasher.get_hash_bytes(hash, hash + k_digest_size); + return bytes_to_hex_string(hash, hash + k_digest_size, hex_str); + } + + inline std::string get_hash_hex_string(const hash256_one_by_one& hasher) { + std::string hex_str; + get_hash_hex_string(hasher, hex_str); + return hex_str; + } + + namespace impl { + template + void hash256_impl(RaIter first, RaIter last, OutIter first2, OutIter last2, int, + std::random_access_iterator_tag) { + hash256_one_by_one hasher; + // hasher.init(); + hasher.process(first, last); + hasher.finish(); + hasher.get_hash_bytes(first2, last2); + } + + template + void hash256_impl(InputIter first, InputIter last, OutIter first2, + OutIter last2, int buffer_size, std::input_iterator_tag) { + std::vector buffer(buffer_size); + hash256_one_by_one hasher; + // hasher.init(); + while (first != last) { + int size = buffer_size; + for (int i = 0; i != buffer_size; ++i, ++first) { + if (first == last) { + size = i; + break; + } + buffer[i] = *first; + } + hasher.process(buffer.begin(), buffer.begin() + size); + } + hasher.finish(); + hasher.get_hash_bytes(first2, last2); + } + } + + template + void hash256(InIter first, InIter last, OutIter first2, OutIter last2, + int buffer_size = PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR) { + picosha2::impl::hash256_impl( + first, last, first2, last2, buffer_size, + typename std::iterator_traits::iterator_category()); + } + + template + void hash256(InIter first, InIter last, OutContainer& dst) { + hash256(first, last, dst.begin(), dst.end()); + } + + template + void hash256(const InContainer& src, OutIter first, OutIter last) { + hash256(src.begin(), src.end(), first, last); + } + + template + void hash256(const InContainer& src, OutContainer& dst) { + hash256(src.begin(), src.end(), dst.begin(), dst.end()); + } + + template + void hash256_hex_string(InIter first, InIter last, std::string& hex_str) { + byte_t hashed[k_digest_size]; + hash256(first, last, hashed, hashed + k_digest_size); + std::ostringstream oss; + output_hex(hashed, hashed + k_digest_size, oss); + hex_str.assign(oss.str()); + } + + template + std::string hash256_hex_string(InIter first, InIter last) { + std::string hex_str; + hash256_hex_string(first, last, hex_str); + return hex_str; + } + + inline void hash256_hex_string(const std::string& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); + } + + template + void hash256_hex_string(const InContainer& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); + } + + template + std::string hash256_hex_string(const InContainer& src) { + return hash256_hex_string(src.begin(), src.end()); + } + templatevoid hash256(std::ifstream& f, OutIter first, OutIter last) { + hash256(std::istreambuf_iterator(f), std::istreambuf_iterator(), first, last); + + } +}// namespace picosha2 +#endif // PICOSHA2_H \ No newline at end of file diff --git a/FileStringsToHash/FileStringsToHash.cpp b/FileStringsToHash/FileStringsToHash.cpp new file mode 100644 index 0000000..d8b50ab --- /dev/null +++ b/FileStringsToHash/FileStringsToHash.cpp @@ -0,0 +1,47 @@ +// FileStringsToHash.cpp : This file contains the 'main' function. Program execution begins and ends there. +// + +#include +#include +#include +#include "util.h" +#include +#include "picosha2.h" +#include +#include +#include + +using namespace std; + + + +std::vector ReadFileLines(const std::string& fileName) +{ + std::vector lines; + util::readLinesFromStream(fileName, lines); + return lines; +} + +int main() +{ + cout << "Hello World!\n"; + + //std::string path("C:/Users/avira/Documents/Passwords/example.txt"); + string path("C:/Users/avira/Documents/Passwords/10-million-password-list-top-100000.txt"); + vector lines = ReadFileLines(path); + + + + cudaError_t cudaStatus; + + // Choose which GPU to run on, change this on a multi-GPU system. + cudaStatus = cudaSetDevice(0); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?"); + //goto Error; + } + + + +} + diff --git a/FileStringsToHash/FileStringsToHash.vcxproj b/FileStringsToHash/FileStringsToHash.vcxproj new file mode 100644 index 0000000..b402d68 --- /dev/null +++ b/FileStringsToHash/FileStringsToHash.vcxproj @@ -0,0 +1,171 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 16.0 + {4C13BA07-5037-45D4-BBEA-F4AB74E3FC0B} + Win32Proj + FileStringsToHash + 10.0 + + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + $(SolutionDir)secp256k1lib;$(SolutionDir)util;$(SolutionDir)AddressUtil;$(SolutionDir)CryptoUtil;$(SolutionDir)CmdParse;%(AdditionalIncludeDirectories) + %(AdditionalUsingDirectories) + + + Console + true + $(CUDA_LIB);%(AdditionalLibraryDirectories) + + + + + + + Level3 + Disabled + true + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + + + + + + + Level3 + MaxSpeed + true + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + true + true + + + + + + + Level3 + MaxSpeed + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + $(SolutionDir)secp256k1lib;$(SolutionDir)util;$(SolutionDir)AddressUtil;$(SolutionDir)CryptoUtil;$(SolutionDir)CmdParse;%(AdditionalIncludeDirectories) + + + Console + true + true + true + + + + + + + + {ca46856a-1d1e-4f6f-a69c-6707d540bf36} + + + {93b89bf6-32b9-4eba-aa44-bcfec4836b76} + + + + + + \ No newline at end of file diff --git a/StringsHasher/CKSD.cpp b/StringsHasher/CKSD.cpp new file mode 100644 index 0000000..a871812 --- /dev/null +++ b/StringsHasher/CKSD.cpp @@ -0,0 +1,41 @@ +#include "CKSD.h" +#include "Logger.h" +#include "util.h" +#include "cudabridge.h" +#include "AddressUtil.h" + +void CKSD::cudaCall(cudaError_t err) +{ + if(err) { + std::string errStr = cudaGetErrorString(err); + + throw KeySearchException(errStr); + } +} + +CKSD::CKSD(int device, int threads, int pointsPerThread, int blocks) +{ + + _device = device; + + _pointsPerThread = pointsPerThread; +} + +void CKSD::init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride) +{ + if(start.cmp(secp256k1::N) >= 0) { + throw KeySearchException("Starting key is out of range"); + } + + _compression = compression; + + cudaCall(cudaSetDevice(_device)); + + // Block on kernel calls + cudaCall(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync)); + + // Use a larger portion of shared memory for L1 cache + cudaCall(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); + + +} diff --git a/StringsHasher/CKSD.cu b/StringsHasher/CKSD.cu new file mode 100644 index 0000000..6e3cacb --- /dev/null +++ b/StringsHasher/CKSD.cu @@ -0,0 +1,20 @@ +#include +#include +#include +#include "KeySearchTypes.h" +#include "CKSD.h" +#include "ptx.cuh" +#include "secp256k1.cuh" + +#include "sha256.cuh" +#include "ripemd160.cuh" + +#include "secp256k1.h" + +#include "CudaHashLookup.cuh" +#include "CudaAtomicList.cuh" +#include "CudaDeviceKeys.cuh" + +__constant__ unsigned int _INC_X[8]; + +__constant__ unsigned int _INC_Y[8]; diff --git a/StringsHasher/CKSD.h b/StringsHasher/CKSD.h new file mode 100644 index 0000000..0d0730d --- /dev/null +++ b/StringsHasher/CKSD.h @@ -0,0 +1,51 @@ +#ifndef _CUDA_KEY_SEARCH_DEVICE +#define _CUDA_KEY_SEARCH_DEVICE + +#include "KeySearchDevice.h" +#include +#include +#include "secp256k1.h" +#include "CudaDeviceKeys.h" +#include "CudaHashLookup.h" +#include "CudaAtomicList.h" +#include "cudaUtil.h" + +// Structures that exist on both host and device side +struct CudaDeviceResult { + int thread; + int block; + int idx; + bool compressed; + unsigned int x[8]; + unsigned int y[8]; + unsigned int digest[5]; +}; + +class CKSD : public KeySearchDevice { + +private: + + int _device; + + int _blocks; + + int _threads; + + int _pointsPerThread; + + int _compression; + + std::string _deviceName; + + void cudaCall(cudaError_t err); + + +public: + + CKSD(int device, int threads, int pointsPerThread, int blocks = 0); + + virtual void init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride); + +}; + +#endif \ No newline at end of file diff --git a/StringsHasher/StringsHasher.vcxproj b/StringsHasher/StringsHasher.vcxproj new file mode 100644 index 0000000..eeaf225 --- /dev/null +++ b/StringsHasher/StringsHasher.vcxproj @@ -0,0 +1,115 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + {ca46856a-1d1e-4f6f-a69c-6707d540bf36} + + + {cca3d02c-5e5a-4a24-b34b-5961dfa93946} + + + {150af404-1f80-4a13-855b-4383c4a3326f} + + + {93b89bf6-32b9-4eba-aa44-bcfec4836b76} + + + + + + + + + + + {691AB22B-0E94-4C8A-8C63-E33CF8768726} + StringsHasher + 10.0 + + + + Application + true + MultiByte + v142 + + + StaticLibrary + false + true + MultiByte + v142 + + + + + + + + + + + + + + + + true + + + + Level4 + Disabled + WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + %(AdditionalUsingDirectories) + $(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\CryptoUtil;$(SolutionDir)\AddressUtil;$(SolutionDir)\CudaKeySearchDevice;$(CUDA_INCLUDE) + + + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + true + %(CodeGeneration) + + + + + Level4 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + %(AdditionalUsingDirectories) + $(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\AddressUtil;$(CUDA_INCLUDE) + + + true + true + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + true + %(CodeGeneration) + + + + + + + \ No newline at end of file diff --git a/StringsHasher/getopt.c b/StringsHasher/getopt.c new file mode 100644 index 0000000..4ac1e44 --- /dev/null +++ b/StringsHasher/getopt.c @@ -0,0 +1,117 @@ +// Put this in a separate .h file (called "getopt.h"). +// The prototype for the header file is: +/* +#ifndef GETOPT_H +#define GETOPT_H + +int getopt(int nargc, char * const nargv[], const char *ostr) ; + +#endif +*/ + +#include "getopt.h" // make sure you construct the header file as dictated above + +/* +* Copyright (c) 1987, 1993, 1994 +* The Regents of the University of California. All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* 3. All advertising materials mentioning features or use of this software +* must display the following acknowledgement: +* This product includes software developed by the University of +* California, Berkeley and its contributors. +* 4. Neither the name of the University nor the names of its contributors +* may be used to endorse or promote products derived from this software +* without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGE. +*/ + +#include +#include + +int opterr = 1, /* if error message should be printed */ +optind = 1, /* index into parent argv vector */ +optopt, /* character checked for validity */ +optreset; /* reset getopt */ +char* optarg; /* argument associated with option */ + +#define BADCH (int)'?' +#define BADARG (int)':' +#define EMSG "" + +/* + * getopt -- + * Parse argc/argv argument vector. + */ +int getopt(int nargc, char* const nargv[], const char* ostr) +{ + static char* place = EMSG; /* option letter processing */ + const char* oli; /* option letter list index */ + + if (optreset || !*place) { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc || *(place = nargv[optind]) != '-') { + place = EMSG; + return (-1); + } + if (place[1] && *++place == '-') { /* found "--" */ + ++optind; + place = EMSG; + return (-1); + } + } /* option letter okay? */ + if ((optopt = (int)*place++) == (int)':' || + !(oli = strchr(ostr, optopt))) { + /* + * if the user didn't specify '-' as an option, + * assume it means -1. + */ + if (optopt == (int)'-') + return (-1); + if (!*place) + ++optind; + if (opterr && *ostr != ':') + (void)printf("illegal option -- %c\n", optopt); + return (BADCH); + } + if (*++oli != ':') { /* don't need argument */ + optarg = NULL; + if (!*place) + ++optind; + } + else { /* need an argument */ + if (*place) /* no white space */ + optarg = place; + else if (nargc <= ++optind) { /* no arg */ + place = EMSG; + if (*ostr == ':') + return (BADARG); + if (opterr) + (void)printf("option requires an argument -- %c\n", optopt); + return (BADCH); + } + else /* white space */ + optarg = nargv[optind]; + place = EMSG; + ++optind; + } + return (optopt); /* dump back option letter */ +} diff --git a/StringsHasher/getopt.h b/StringsHasher/getopt.h new file mode 100644 index 0000000..eb418c3 --- /dev/null +++ b/StringsHasher/getopt.h @@ -0,0 +1,6 @@ +#ifndef GETOPT_H +#define GETOPT_H + +int getopt(int nargc, char * const nargv[], const char *ostr) ; + +#endif diff --git a/StringsHasher/kernal.cu b/StringsHasher/kernal.cu new file mode 100644 index 0000000..2ca5190 --- /dev/null +++ b/StringsHasher/kernal.cu @@ -0,0 +1,201 @@ + +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include +#include +#include "util.h" +#include +#include +#include "picosha2.h" + +cudaError_t addWithCuda(std::string* combined, std::vector* indexes); + + +__global__ void printStringKernel( + char* lines, //combined string data, + int* indexes, //indexes telling us the beginning and end of each string, + int indexes_size //number of strings being analyzed +) +{ + printf("Starting 'printStringKernel' on device\n"); + printf("lines value: %s \n", lines); + printf("indexes value: %d \n", indexes[0]); + + int i = threadIdx.x; + + size_t id = threadIdx.x;//"Which String are we examining?" + + if (id >= indexes_size) {//Bounds Checking + printf("thread id: %d EXIT.\n", id); + return; + } + char* string; //Beginning of the string + int string_length = 0; //Beginning of the string + if (id == 0) {//First String + string = lines; + string_length = indexes[0]; + } + else { + string_length = indexes[id] - indexes[id - 1]; + string = (lines + indexes[id - 1]); + } + printf("string length value: %d \n", string_length); + for (int i = 0 ; i ReadFileLines(const std::string& fileName) +{ + std::vector lines; + util::readLinesFromStream(fileName, lines); + return lines; +} + + +int main2() +{ + + + std::string path("C:/Users/avira/Documents/Passwords/example.txt"); + //std::string path("C:/Users/avira/Documents/Passwords/10-million-password-list-top-10000.txt"); + std::vector lines = ReadFileLines(path); + + + + std::string path2("SourceFiles/list.txt"); + std::vector lines2 = ReadFileLines(path2); + + + + std::string path3("list.txt"); + std::vector lines3 = ReadFileLines(path2); + + std::cout << "Before Loop." << std::endl; + std::string combined; //Works perfectly fine so long as it is contiguously allocated + std::vector indexes; //You *might* be able to use int instead of size_t to save space + for (std::string const& line : lines) { + std::cout << "In Loop." << std::endl; + combined += line; + indexes.emplace_back(combined.size()); + } + std::cout << "After Loop." << std::endl; + + /* + for(int i = 0 ; i < 100; i++) + { + std::string hash_hex_str; + picosha2::hash256_hex_string(lines[0], hash_hex_str); + } + */ + + /* If 'lines' initially consisted of ["Dog", "Cat", "Tree", "Yard"], 'combined' is now + * "DogCatTreeYard", and 'indexes' is now [3, 6, 10, 14]. + */ + + // Add vectors in parallel. + cudaError_t cudaStatus = addWithCuda(&combined, &indexes); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "addWithCuda failed!"); + return 1; + } + + // cudaDeviceReset must be called before exiting in order for profiling and + // tracing tools such as Nsight and Visual Profiler to show complete traces. + cudaStatus = cudaDeviceReset(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceReset failed!"); + return 1; + } + + return 0; +} + + + +// Helper function for using CUDA to add vectors in parallel. +cudaError_t addWithCuda(std::string* combined, std::vector* indexes) +{ + char* dev_combined = 0; + int* dev_indexes = 0; + cudaError_t cudaStatus; + + // Choose which GPU to run on, change this on a multi-GPU system. + cudaStatus = cudaSetDevice(0); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?"); + goto Error; + } + + + // Allocate GPU buffers for three vectors (two input, one output) . + + cudaStatus = cudaMalloc((void**)&dev_combined, combined->size() * sizeof(char)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + cudaStatus = cudaMalloc((void**)&dev_indexes, indexes->size() * sizeof(int)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + // Copy input vectors from host memory to GPU buffers. + cudaStatus = cudaMemcpy(dev_combined, combined->data(), combined->size() * sizeof(char), cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + cudaStatus = cudaMemcpy(dev_indexes, indexes->data(), indexes->size() * sizeof(int), cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + + + // Launch a kernel on the GPU with one thread for each element. + printStringKernel <<< 1, 8 >>> (dev_combined, dev_indexes, indexes->size()); + + // Check for any errors launching the kernel + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + // cudaDeviceSynchronize waits for the kernel to finish, and returns + // any errors encountered during the launch. + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus); + goto Error; + } + + + +Error: + cudaFree(dev_combined); + cudaFree(dev_indexes); + + return cudaStatus; +} diff --git a/StringsHasher/kernelb.cu b/StringsHasher/kernelb.cu new file mode 100644 index 0000000..6df1da6 --- /dev/null +++ b/StringsHasher/kernelb.cu @@ -0,0 +1,140 @@ + +#include "cuda_runtime.h" +#include "device_launch_parameters.h" +#include +#include +#include +#include +#include + +cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size); + +__global__ void addKernel(int *c, const int *a, const int *b) +{ + int i = threadIdx.x; + c[i] = a[i] + b[i]; +} + + + +std::vector ReadFileLines(const std::string& fileName) +{ + std::vector lines; + util::readLinesFromStream(fileName, lines); + return lines; +} + +int main() +{ + + + std::string path("C:/Users/avira/Documents/Passwords/example.txt"); + std::vector lines3 = ReadFileLines(path); + + return 0; + + const int arraySize = 5; + const int a[arraySize] = { 1, 2, 3, 4, 5 }; + const int b[arraySize] = { 10, 20, 30, 40, 50 }; + int c[arraySize] = { 0 }; + + // Add vectors in parallel. + cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "addWithCuda failed!"); + return 1; + } + + printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n", + c[0], c[1], c[2], c[3], c[4]); + + // cudaDeviceReset must be called before exiting in order for profiling and + // tracing tools such as Nsight and Visual Profiler to show complete traces. + cudaStatus = cudaDeviceReset(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceReset failed!"); + return 1; + } + + return 0; +} + +// Helper function for using CUDA to add vectors in parallel. +cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size) +{ + int *dev_a = 0; + int *dev_b = 0; + int *dev_c = 0; + cudaError_t cudaStatus; + + // Choose which GPU to run on, change this on a multi-GPU system. + cudaStatus = cudaSetDevice(0); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?"); + goto Error; + } + + // Allocate GPU buffers for three vectors (two input, one output) . + cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + // Copy input vectors from host memory to GPU buffers. + cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + // Launch a kernel on the GPU with one thread for each element. + //addKernel<<<1, size>>>(dev_c, dev_a, dev_b); + + // Check for any errors launching the kernel + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + // cudaDeviceSynchronize waits for the kernel to finish, and returns + // any errors encountered during the launch. + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus); + goto Error; + } + + // Copy output vector from GPU buffer to host memory. + cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + +Error: + cudaFree(dev_c); + cudaFree(dev_a); + cudaFree(dev_b); + + return cudaStatus; +} diff --git a/StringsHasher/main.cu b/StringsHasher/main.cu new file mode 100644 index 0000000..625b845 --- /dev/null +++ b/StringsHasher/main.cu @@ -0,0 +1,225 @@ +// cd /home/hork/cuda-workspace/CudaSHA256/Debug/files +// time ~/Dropbox/FIIT/APS/Projekt/CpuSHA256/a.out -f ../file-list +// time ../CudaSHA256 -f ../file-list + + +#include +#include +#include +#include "sha256.cuh" +#include +#include +#include "cudabridge.h" +#include "util.h" +#include + +char* trim(char* str) { + size_t len = 0; + char* frontp = str; + char* endp = NULL; + + if (str == NULL) { return NULL; } + if (str[0] == '\0') { return str; } + + len = strlen(str); + endp = str + len; + + /* Move the front and back pointers to address the first non-whitespace + * characters from each end. + */ + while (isspace((unsigned char)*frontp)) { ++frontp; } + if (endp != frontp) + { + while (isspace((unsigned char)*(--endp)) && endp != frontp) {} + } + + if (str + len - 1 != endp) + *(endp + 1) = '\0'; + else if (frontp != str && endp == frontp) + *str = '\0'; + + /* Shift the string so that it starts at str so that if it's dynamically + * allocated, we can still free it on the returned pointer. Note the reuse + * of endp to mean the front of the string buffer now. + */ + endp = str; + if (frontp != str) + { + while (*frontp) { *endp++ = *frontp++; } + *endp = '\0'; + } + + + return str; +} + +__global__ void sha256_cuda(JOB** jobs, int n) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + // perform sha256 calculation here + if (i < n) { + SHA256_CTX ctx; + sha256_init(&ctx); + sha256_update(&ctx, jobs[i]->data, jobs[i]->size); + sha256_final(&ctx, jobs[i]->digest); + } +} + +void pre_sha256() { + // compy symbols + checkCudaErrors(cudaMemcpyToSymbol(dev_k, host_k, sizeof(host_k), 0, cudaMemcpyHostToDevice)); +} + + +void runJobs(JOB** jobs, int n) { + int blockSize = 4; + int numBlocks = (n + blockSize - 1) / blockSize; + sha256_cuda << < numBlocks, blockSize >> > (jobs, n); +} + + +JOB* JOB_init(BYTE* data, long size, char* fname) { + JOB* j; + checkCudaErrors(cudaMallocManaged(&j, sizeof(JOB))); //j = (JOB *)malloc(sizeof(JOB)); + checkCudaErrors(cudaMallocManaged(&(j->data), size)); + j->data = data; + j->size = size; + for (int i = 0; i < 64; i++) + { + j->digest[i] = 0xff; + } + strcpy(j->fname, fname); + return j; +} + + +BYTE* get_file_data(char* fname, unsigned long* size) { + FILE* f = 0; + BYTE* buffer = 0; + unsigned long fsize = 0; + + f = fopen(fname, "rb"); + if (!f) { + fprintf(stderr, "get_file_data Unable to open '%s'\n", fname); + return 0; + } + fflush(f); + + if (fseek(f, 0, SEEK_END)) { + fprintf(stderr, "Unable to fseek %s\n", fname); + return 0; + } + fflush(f); + fsize = ftell(f); + rewind(f); + + //buffer = (char *)malloc((fsize+1)*sizeof(char)); + checkCudaErrors(cudaMallocManaged(&buffer, (fsize + 1) * sizeof(char))); + fread(buffer, fsize, 1, f); + fclose(f); + *size = fsize; + return buffer; +} + +void print_usage() { + printf("Usage: CudaSHA256 [OPTION] [FILE]...\n"); + printf("Calculate sha256 hash of given FILEs\n\n"); + printf("OPTIONS:\n"); + printf("\t-f FILE1 \tRead a list of files (separeted by \\n) from FILE1, output hash for each file\n"); + printf("\t-h \tPrint this help\n"); + printf("\nIf no OPTIONS are supplied, then program reads the content of FILEs and outputs hash for each FILEs \n"); + printf("\nOutput format:\n"); + printf("Hash following by two spaces following by file name (same as sha256sum).\n"); + printf("\nNotes:\n"); + printf("Calculations are performed on GPU, each seperate file is hashed in its own thread\n"); +} + +int main(int argc, char** argv) { + int i = 0, n = 0; + size_t len; + unsigned long temp; + char* a_file = 0, * line = 0; + BYTE* buff = 0; + char option, index; + //ssize_t read; + JOB** jobs; + + /* + // parse input + while ((option = getopt(argc, argv, "hf:")) != -1) + switch (option) { + case 'h': + print_usage(); + break; + case 'f': + a_file = optarg; + break; + default: + break; + } + */ + + /* + FILE* f = 0; + f = fopen(a_file, "r"); + if (!f) { + fprintf(stderr, "Unable to open %s\n", a_file); + return 0; + } + */ + + std::string path("C:/Users/avira/Documents/Passwords/example.txt"); + std::vector lines = util::ReadFileLines(path); + + n = lines.size(); + checkCudaErrors(cudaMallocManaged(&jobs, n * sizeof(JOB*))); + //fseek(f, 0, SEEK_SET); + n = 0; + std::cout << "Before Loop." << std::endl; + //std::string combined; //Works perfectly fine so long as it is contiguously allocated + //std::vector indexes; //You *might* be able to use int instead of size_t to save space + for (std::string const& line : lines) { + std::cout << "In Loop." << std::endl; + //std::copy(line.begin(), line.end(), buff); + BYTE* buffer = 0; + size_t length = line.size() + 1; + checkCudaErrors(cudaMallocManaged(&buffer, length * sizeof(char))); + std::copy(line.begin(), line.end(), buffer); + jobs[n++] = JOB_init(buffer, length - 1, "test"); + } + std::cout << "After Loop." << std::endl; + + + //line = "C:/Users/avira/Documents/Passwords/example.txt"; + //line = trim(line); + //buff = get_file_data(line, &temp); + //jobs[n++] = JOB_init(buff, temp, line); + + pre_sha256(); + runJobs(jobs, n); + + + /* + else { + // get number of arguments = files = jobs + n = argc - optind; + if (n > 0) { + + checkCudaErrors(cudaMallocManaged(&jobs, n * sizeof(JOB*))); + + // iterate over file list - non optional arguments + for (i = 0, index = optind; index < argc; index++, i++) { + buff = get_file_data(argv[index], &temp); + jobs[i] = JOB_init(buff, temp, argv[index]); + } + + pre_sha256(); + runJobs(jobs, n); + } + } + */ + + cudaDeviceSynchronize(); + print_jobs(jobs, n); + cudaDeviceReset(); + return 0; +} diff --git a/StringsHasher/sha256.cuh b/StringsHasher/sha256.cuh new file mode 100644 index 0000000..9bd6977 --- /dev/null +++ b/StringsHasher/sha256.cuh @@ -0,0 +1,250 @@ +#ifndef SHA256_H +#define SHA256_H + + +/****************************** MACROS ******************************/ +#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest + +#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) +#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) + +#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) +#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) +#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) +#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) + +#define checkCudaErrors(x) \ +{ \ + cudaGetLastError(); \ + x; \ + cudaError_t err = cudaGetLastError(); \ + if (err != cudaSuccess) \ + printf("GPU: cudaError %d (%s)\n", err, cudaGetErrorString(err)); \ +} +#include +#include +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte +typedef uint32_t WORD; // 32-bit word, change to "long" for 16-bit machines + +typedef struct JOB { + BYTE* data; + unsigned long long size; + BYTE digest[64]; + char fname[128]; +}JOB; + + +typedef struct { + BYTE data[64]; + WORD datalen; + unsigned long long bitlen; + WORD state[8]; +} SHA256_CTX; + +__constant__ WORD dev_k[64]; + +static const WORD host_k[64] = { + 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +}; + +/*********************** FUNCTION DECLARATIONS **********************/ +char* print_sha(BYTE* buff); +__device__ void sha256_init(SHA256_CTX* ctx); +__device__ void sha256_update(SHA256_CTX* ctx, const BYTE data[], size_t len); +__device__ void sha256_final(SHA256_CTX* ctx, BYTE hash[]); + + +char* hash_to_string(BYTE* buff) { + char* string = (char*)malloc(70); + int k, i; + for (i = 0, k = 0; i < 32; i++, k += 2) + { + sprintf(string + k, "%.2x", buff[i]); + //printf("%02x", buff[i]); + } + string[64] = 0; + return string; +} + +void print_job(JOB* j) { + printf("%s %s\n", hash_to_string(j->digest), j->data); +} + +void print_jobs(JOB** jobs, int n) { + for (int i = 0; i < n; i++) + { + print_job(jobs[i]); + // printf("@ %p JOB[%i] \n", jobs[i], i); + // printf("\t @ 0x%p data = %x \n", jobs[i]->data, (jobs[i]->data == 0)? 0 : jobs[i]->data[0]); + // printf("\t @ 0x%p size = %llu \n", &(jobs[i]->size), jobs[i]->size); + // printf("\t @ 0x%p fname = %s \n", &(jobs[i]->fname), jobs[i]->fname); + // printf("\t @ 0x%p digest = %s \n------\n", jobs[i]->digest, hash_to_string(jobs[i]->digest)); + } +} + +__device__ void mycpy12(uint32_t* d, const uint32_t* s) { +#pragma unroll 3 + for (int k = 0; k < 3; k++) d[k] = s[k]; +} + +__device__ void mycpy16(uint32_t* d, const uint32_t* s) { +#pragma unroll 4 + for (int k = 0; k < 4; k++) d[k] = s[k]; +} + +__device__ void mycpy32(uint32_t* d, const uint32_t* s) { +#pragma unroll 8 + for (int k = 0; k < 8; k++) d[k] = s[k]; +} + +__device__ void mycpy44(uint32_t* d, const uint32_t* s) { +#pragma unroll 11 + for (int k = 0; k < 11; k++) d[k] = s[k]; +} + +__device__ void mycpy48(uint32_t* d, const uint32_t* s) { +#pragma unroll 12 + for (int k = 0; k < 12; k++) d[k] = s[k]; +} + +__device__ void mycpy64(uint32_t* d, const uint32_t* s) { +#pragma unroll 16 + for (int k = 0; k < 16; k++) d[k] = s[k]; +} + +__device__ void sha256_transform(SHA256_CTX* ctx, const BYTE data[]) +{ + WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; + WORD S[8]; + + //mycpy32(S, ctx->state); + +#pragma unroll 16 + for (i = 0, j = 0; i < 16; ++i, j += 4) + m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]); + +#pragma unroll 64 + for (; i < 64; ++i) + m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + +#pragma unroll 64 + for (i = 0; i < 64; ++i) { + t1 = h + EP1(e) + CH(e, f, g) + dev_k[i] + m[i]; + t2 = EP0(a) + MAJ(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + ctx->state[5] += f; + ctx->state[6] += g; + ctx->state[7] += h; +} + +__device__ void sha256_init(SHA256_CTX* ctx) +{ + ctx->datalen = 0; + ctx->bitlen = 0; + ctx->state[0] = 0x6a09e667; + ctx->state[1] = 0xbb67ae85; + ctx->state[2] = 0x3c6ef372; + ctx->state[3] = 0xa54ff53a; + ctx->state[4] = 0x510e527f; + ctx->state[5] = 0x9b05688c; + ctx->state[6] = 0x1f83d9ab; + ctx->state[7] = 0x5be0cd19; +} + +__device__ void sha256_update(SHA256_CTX* ctx, const BYTE data[], size_t len) +{ + WORD i; + + // for each byte in message + for (i = 0; i < len; ++i) { + // ctx->data == message 512 bit chunk + ctx->data[ctx->datalen] = data[i]; + ctx->datalen++; + if (ctx->datalen == 64) { + sha256_transform(ctx, ctx->data); + ctx->bitlen += 512; + ctx->datalen = 0; + } + } +} + +__device__ void sha256_final(SHA256_CTX* ctx, BYTE hash[]) +{ + WORD i; + + i = ctx->datalen; + + // Pad whatever data is left in the buffer. + if (ctx->datalen < 56) { + ctx->data[i++] = 0x80; + while (i < 56) + ctx->data[i++] = 0x00; + } + else { + ctx->data[i++] = 0x80; + while (i < 64) + ctx->data[i++] = 0x00; + sha256_transform(ctx, ctx->data); + memset(ctx->data, 0, 56); + } + + // Append to the padding the total message's length in bits and transform. + ctx->bitlen += ctx->datalen * 8; + ctx->data[63] = ctx->bitlen; + ctx->data[62] = ctx->bitlen >> 8; + ctx->data[61] = ctx->bitlen >> 16; + ctx->data[60] = ctx->bitlen >> 24; + ctx->data[59] = ctx->bitlen >> 32; + ctx->data[58] = ctx->bitlen >> 40; + ctx->data[57] = ctx->bitlen >> 48; + ctx->data[56] = ctx->bitlen >> 56; + sha256_transform(ctx, ctx->data); + + // Since this implementation uses little endian byte ordering and SHA uses big endian, + // reverse all the bytes when copying the final state to the output hash. + for (i = 0; i < 4; ++i) { + hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; + hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; + hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; + hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; + hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; + hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; + hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; + hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; + } +} + +#endif // SHA256_H diff --git a/StringsHasher/unistd.h b/StringsHasher/unistd.h new file mode 100644 index 0000000..3611d90 --- /dev/null +++ b/StringsHasher/unistd.h @@ -0,0 +1,56 @@ +#ifndef _UNISTD_H +#define _UNISTD_H 1 + +/* This is intended as a drop-in replacement for unistd.h on Windows. + * Please add functionality as neeeded. + * https://stackoverflow.com/a/826027/1202830 + */ + +#include +#include +#include "getopt.h" /* getopt at: https://gist.github.com/ashelly/7776712 */ +#include /* for getpid() and the exec..() family */ +#include /* for _getcwd() and _chdir() */ + +#define srandom srand +#define random rand + + /* Values for the second argument to access. + These may be OR'd together. */ +#define R_OK 4 /* Test for read permission. */ +#define W_OK 2 /* Test for write permission. */ + //#define X_OK 1 /* execute permission - unsupported in windows*/ +#define F_OK 0 /* Test for existence. */ + +#define access _access +#define dup2 _dup2 +#define execve _execve +#define ftruncate _chsize +#define unlink _unlink +#define fileno _fileno +#define getcwd _getcwd +#define chdir _chdir +#define isatty _isatty +#define lseek _lseek +/* read, write, and close are NOT being #defined here, because while there are file handle specific versions for Windows, they probably don't work for sockets. You need to look at your app and consider whether to call e.g. closesocket(). */ + +#ifdef _WIN64 +#define ssize_t __int64 +#else +#define ssize_t long +#endif + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 +/* should be in some equivalent to */ +//typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; + +#endif /* unistd.h */ \ No newline at end of file diff --git a/TestLib/TestLib.vcxproj b/TestLib/TestLib.vcxproj new file mode 100644 index 0000000..87929b8 --- /dev/null +++ b/TestLib/TestLib.vcxproj @@ -0,0 +1,86 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {FEBBC324-1FF3-4393-9249-535549A85B80} + TestLib + + + + Application + true + MultiByte + v142 + + + Application + false + true + MultiByte + v142 + + + + + + + + + + + + + + true + + + + Level3 + Disabled + WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + + + + + Level3 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + + + + + + + + + + \ No newline at end of file diff --git a/TestLib/kernel.cu b/TestLib/kernel.cu new file mode 100644 index 0000000..90d489d --- /dev/null +++ b/TestLib/kernel.cu @@ -0,0 +1,121 @@ + +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include + +cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size); + +__global__ void addKernel(int *c, const int *a, const int *b) +{ + int i = threadIdx.x; + c[i] = a[i] + b[i]; +} + +int main() +{ + const int arraySize = 5; + const int a[arraySize] = { 1, 2, 3, 4, 5 }; + const int b[arraySize] = { 10, 20, 30, 40, 50 }; + int c[arraySize] = { 0 }; + + // Add vectors in parallel. + cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "addWithCuda failed!"); + return 1; + } + + printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n", + c[0], c[1], c[2], c[3], c[4]); + + // cudaDeviceReset must be called before exiting in order for profiling and + // tracing tools such as Nsight and Visual Profiler to show complete traces. + cudaStatus = cudaDeviceReset(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceReset failed!"); + return 1; + } + + return 0; +} + +// Helper function for using CUDA to add vectors in parallel. +cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size) +{ + int *dev_a = 0; + int *dev_b = 0; + int *dev_c = 0; + cudaError_t cudaStatus; + + // Choose which GPU to run on, change this on a multi-GPU system. + cudaStatus = cudaSetDevice(0); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?"); + goto Error; + } + + // Allocate GPU buffers for three vectors (two input, one output) . + cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + // Copy input vectors from host memory to GPU buffers. + cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + // Launch a kernel on the GPU with one thread for each element. + addKernel<<<1, size>>>(dev_c, dev_a, dev_b); + + // Check for any errors launching the kernel + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + // cudaDeviceSynchronize waits for the kernel to finish, and returns + // any errors encountered during the launch. + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus); + goto Error; + } + + // Copy output vector from GPU buffer to host memory. + cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + +Error: + cudaFree(dev_c); + cudaFree(dev_a); + cudaFree(dev_b); + + return cudaStatus; +} diff --git a/util/util.cpp b/util/util.cpp index b48750b..42314c1 100644 --- a/util/util.cpp +++ b/util/util.cpp @@ -295,4 +295,11 @@ namespace util { return s.substr(left, right - left + 1); } + + std::vector ReadFileLines(const std::string& fileName) + { + std::vector lines; + util::readLinesFromStream(fileName, lines); + return lines; + } } \ No newline at end of file diff --git a/util/util.h b/util/util.h index 83b81b2..ce410ea 100644 --- a/util/util.h +++ b/util/util.h @@ -41,6 +41,7 @@ unsigned int endian(unsigned int x); std::string toLower(const std::string &s); std::string trim(const std::string &s, char c=' '); +std::vector ReadFileLines(const std::string& fileName); } #endif \ No newline at end of file From 77794b2b5e1f6b5a19147d1f668d714b415368f6 Mon Sep 17 00:00:00 2001 From: aviram fireberger Date: Tue, 29 Dec 2020 15:04:28 +0200 Subject: [PATCH 3/3] Measering time on GPU --- StringsHasher/main.cu | 137 ++++++++++++--------------------------- StringsHasher/sha256.cuh | 9 ++- 2 files changed, 49 insertions(+), 97 deletions(-) diff --git a/StringsHasher/main.cu b/StringsHasher/main.cu index 625b845..e49ecac 100644 --- a/StringsHasher/main.cu +++ b/StringsHasher/main.cu @@ -12,6 +12,7 @@ #include "cudabridge.h" #include "util.h" #include +#include char* trim(char* str) { size_t len = 0; @@ -61,6 +62,35 @@ __global__ void sha256_cuda(JOB** jobs, int n) { sha256_init(&ctx); sha256_update(&ctx, jobs[i]->data, jobs[i]->size); sha256_final(&ctx, jobs[i]->digest); + + if (i == 0) + { + printf("jobs[i]->data = \n"); + /* + char* string = (char*)malloc(70); + int k, i; + for (i = 0, k = 0; i < 32; i++, k += 2) + { + sprintf(string + k, "%.2x", buff[i]); + //printf("%02x", buff[i]); + } + string[64] = 0; + return string; + */ + + } + /* + SHA256_CTX ctx2; + sha256_init(&ctx2); + sha256_update(&ctx2, jobs[i]->digest, 64); + sha256_final(&ctx2, jobs[i]->digest2); + + + SHA256_CTX ctx3; + sha256_init(&ctx3); + sha256_update(&ctx3, jobs[i]->digest, 32); + sha256_final(&ctx3, jobs[i]->digest3); + */ } } @@ -86,53 +116,14 @@ JOB* JOB_init(BYTE* data, long size, char* fname) { for (int i = 0; i < 64; i++) { j->digest[i] = 0xff; + //j->digest2[i] = 0xff; + //j->digest3[i] = 0xff; } strcpy(j->fname, fname); return j; } -BYTE* get_file_data(char* fname, unsigned long* size) { - FILE* f = 0; - BYTE* buffer = 0; - unsigned long fsize = 0; - - f = fopen(fname, "rb"); - if (!f) { - fprintf(stderr, "get_file_data Unable to open '%s'\n", fname); - return 0; - } - fflush(f); - - if (fseek(f, 0, SEEK_END)) { - fprintf(stderr, "Unable to fseek %s\n", fname); - return 0; - } - fflush(f); - fsize = ftell(f); - rewind(f); - - //buffer = (char *)malloc((fsize+1)*sizeof(char)); - checkCudaErrors(cudaMallocManaged(&buffer, (fsize + 1) * sizeof(char))); - fread(buffer, fsize, 1, f); - fclose(f); - *size = fsize; - return buffer; -} - -void print_usage() { - printf("Usage: CudaSHA256 [OPTION] [FILE]...\n"); - printf("Calculate sha256 hash of given FILEs\n\n"); - printf("OPTIONS:\n"); - printf("\t-f FILE1 \tRead a list of files (separeted by \\n) from FILE1, output hash for each file\n"); - printf("\t-h \tPrint this help\n"); - printf("\nIf no OPTIONS are supplied, then program reads the content of FILEs and outputs hash for each FILEs \n"); - printf("\nOutput format:\n"); - printf("Hash following by two spaces following by file name (same as sha256sum).\n"); - printf("\nNotes:\n"); - printf("Calculations are performed on GPU, each seperate file is hashed in its own thread\n"); -} - int main(int argc, char** argv) { int i = 0, n = 0; size_t len; @@ -140,35 +131,12 @@ int main(int argc, char** argv) { char* a_file = 0, * line = 0; BYTE* buff = 0; char option, index; - //ssize_t read; JOB** jobs; - /* - // parse input - while ((option = getopt(argc, argv, "hf:")) != -1) - switch (option) { - case 'h': - print_usage(); - break; - case 'f': - a_file = optarg; - break; - default: - break; - } - */ - - /* - FILE* f = 0; - f = fopen(a_file, "r"); - if (!f) { - fprintf(stderr, "Unable to open %s\n", a_file); - return 0; - } - */ - std::string path("C:/Users/avira/Documents/Passwords/example.txt"); std::vector lines = util::ReadFileLines(path); + + auto t0 = std::chrono::high_resolution_clock::now(); n = lines.size(); checkCudaErrors(cudaMallocManaged(&jobs, n * sizeof(JOB*))); @@ -178,7 +146,6 @@ int main(int argc, char** argv) { //std::string combined; //Works perfectly fine so long as it is contiguously allocated //std::vector indexes; //You *might* be able to use int instead of size_t to save space for (std::string const& line : lines) { - std::cout << "In Loop." << std::endl; //std::copy(line.begin(), line.end(), buff); BYTE* buffer = 0; size_t length = line.size() + 1; @@ -187,39 +154,19 @@ int main(int argc, char** argv) { jobs[n++] = JOB_init(buffer, length - 1, "test"); } std::cout << "After Loop." << std::endl; - - - //line = "C:/Users/avira/Documents/Passwords/example.txt"; - //line = trim(line); - //buff = get_file_data(line, &temp); - //jobs[n++] = JOB_init(buff, temp, line); + auto t1 = std::chrono::high_resolution_clock::now(); pre_sha256(); runJobs(jobs, n); - - /* - else { - // get number of arguments = files = jobs - n = argc - optind; - if (n > 0) { - - checkCudaErrors(cudaMallocManaged(&jobs, n * sizeof(JOB*))); - - // iterate over file list - non optional arguments - for (i = 0, index = optind; index < argc; index++, i++) { - buff = get_file_data(argv[index], &temp); - jobs[i] = JOB_init(buff, temp, argv[index]); - } - - pre_sha256(); - runJobs(jobs, n); - } - } - */ - cudaDeviceSynchronize(); - print_jobs(jobs, n); + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration_total = std::chrono::duration_cast(t2 - t0).count(); + auto duration_gpu_work = std::chrono::duration_cast(t2 - t1).count(); + printf("\t duration_gpu_work = %d microseconds \n", duration_gpu_work); + printf("\t duration_total = %d microseconds \n", duration_total); + + //print_jobs(jobs, n); cudaDeviceReset(); return 0; } diff --git a/StringsHasher/sha256.cuh b/StringsHasher/sha256.cuh index 9bd6977..90768bf 100644 --- a/StringsHasher/sha256.cuh +++ b/StringsHasher/sha256.cuh @@ -26,6 +26,7 @@ #include #include /**************************** DATA TYPES ****************************/ +//typedef unsigned char BYTE; // 8-bit byte typedef unsigned char BYTE; // 8-bit byte typedef uint32_t WORD; // 32-bit word, change to "long" for 16-bit machines @@ -33,6 +34,8 @@ typedef struct JOB { BYTE* data; unsigned long long size; BYTE digest[64]; + //BYTE digest2[64]; + //BYTE digest3[64]; char fname[128]; }JOB; @@ -67,7 +70,7 @@ __device__ void sha256_final(SHA256_CTX* ctx, BYTE hash[]); char* hash_to_string(BYTE* buff) { char* string = (char*)malloc(70); int k, i; - for (i = 0, k = 0; i < 32; i++, k += 2) + for (i = 0, k = 0; i < 32; i++, k += 1) { sprintf(string + k, "%.2x", buff[i]); //printf("%02x", buff[i]); @@ -77,7 +80,9 @@ char* hash_to_string(BYTE* buff) { } void print_job(JOB* j) { - printf("%s %s\n", hash_to_string(j->digest), j->data); + printf("d1 %s %s\n", hash_to_string(j->digest), j->data); + //printf("d2 %s %s\n", hash_to_string(j->digest2), j->data); + //printf("d3 %s %s\n", hash_to_string(j->digest3), j->data); } void print_jobs(JOB** jobs, int n) {