Dateien nach "/" hochladen

This commit is contained in:
w12
2024-12-14 19:18:09 +01:00
commit ed17769e53
16 changed files with 3667 additions and 0 deletions

71
Makefile Normal file
View File

@@ -0,0 +1,71 @@
# Copyright 2013-2018 Alexander Peslyak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
CC = gcc
LD = $(CC)
RM = rm -f
OMPFLAGS = -fopenmp
CFLAGS = -Wall -O2 -fomit-frame-pointer
#CFLAGS = -Wall -msse -O2 -fomit-frame-pointer
#CFLAGS = -Wall -msse2 -O2 -fomit-frame-pointer
#CFLAGS = -Wall -O2 -march=native -fomit-frame-pointer
# -lrt is for benchmark's use of clock_gettime()
LDFLAGS = -s -lrt
PROJ = tests benchmark
OBJS_CORE = yespower-opt.o
OBJS_COMMON = sha256.o
OBJS_TESTS = $(OBJS_CORE) $(OBJS_COMMON) tests.o
OBJS_BENCHMARK = $(OBJS_CORE) $(OBJS_COMMON) benchmark.o
OBJS_RM = yespower-*.o
all: $(PROJ)
check: tests
@echo 'Running tests'
@time ./tests | tee TESTS-OUT
@diff -U0 TESTS-OK TESTS-OUT && echo PASSED || echo FAILED
ref:
$(MAKE) $(PROJ) OBJS_CORE=yespower-ref.o
check-ref:
$(MAKE) check OBJS_CORE=yespower-ref.o
tests: $(OBJS_TESTS)
$(LD) $(LDFLAGS) $(OBJS_TESTS) -o $@
benchmark: $(OBJS_BENCHMARK)
$(LD) $(LDFLAGS) $(OMPFLAGS) -lpthread $(OBJS_BENCHMARK) -o $@
benchmark.o: benchmark.c
$(CC) -c $(CFLAGS) $(OMPFLAGS) $*.c
.c.o:
$(CC) -c $(CFLAGS) $*.c
yespower-ref.o: yespower.h
yespower-opt.o: yespower-platform.c yespower.h
tests.o: yespower.h
benchmark.o: yespower.h
clean:
$(RM) $(PROJ)
$(RM) $(OBJS_TESTS) $(OBJS_BENCHMARK)
$(RM) $(OBJS_RM)
$(RM) TESTS-OUT

95
PERFORMANCE Normal file
View File

@@ -0,0 +1,95 @@
Included with yespower is the "benchmark" program, which is built by
simply invoking "make". When invoked without parameters, it tests
yespower 0.5 at N = 2048, r = 8, which appears to be the lowest setting
in use by existing cryptocurrencies. On an i7-4770K with 4x DDR3-1600
(on two memory channels) running CentOS 7 for x86-64 (and built with
CentOS 7's default version of gcc) and with thread affinity set, this
reports between 3700 and 3800 hashes per second for both SSE2 and AVX
builds, e.g.:
$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark
version=0.5 N=2048 r=8
Will use 2048.00 KiB RAM
a5 9f ec 4c 4f dd a1 6e 3b 14 05 ad da 66 d5 25 b6 8e 7c ad fc fe 6a c0 66 c7 ad 11 8c d8 05 90
Benchmarking 1 thread ...
1018 H/s real, 1018 H/s virtual (2047 hashes in 2.01 seconds)
Benchmarking 4 threads ...
3773 H/s real, 950 H/s virtual (8188 hashes in 2.17 seconds)
min 0.984 ms, avg 1.052 ms, max 1.074 ms
Running 8 threads (to match the logical rather than the physical CPU
core count) results in very slightly worse performance on this system,
but this might be the other way around on another and/or with other
parameters. Upgrading to yespower 1.0, performance at these parameters
improves to almost 4000 hashes per second:
$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark 10
version=1.0 N=2048 r=8
Will use 2048.00 KiB RAM
d0 78 cd d4 cf 3f 5a a8 4e 3c 4a 58 66 29 81 d8 2d 27 e5 67 36 37 c4 be 77 63 61 32 24 c1 8a 93
Benchmarking 1 thread ...
1080 H/s real, 1080 H/s virtual (4095 hashes in 3.79 seconds)
Benchmarking 4 threads ...
3995 H/s real, 1011 H/s virtual (16380 hashes in 4.10 seconds)
min 0.923 ms, avg 0.989 ms, max 1.137 ms
Running 8 threads results in substantial slowdown with this new version
(to between 3200 and 3400 hashes per second) because of cache thrashing.
For higher settings such as those achieving 8 MiB instead of the 2 MiB
above, this system performs at around 800 hashes per second for yespower
0.5 and at around 830 hashes per second for yespower 1.0:
$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark 5 2048 32
version=0.5 N=2048 r=32
Will use 8192.00 KiB RAM
56 0a 89 1b 5c a2 e1 c6 36 11 1a 9f f7 c8 94 a5 d0 a2 60 2f 43 fd cf a5 94 9b 95 e2 2f e4 46 1e
Benchmarking 1 thread ...
265 H/s real, 265 H/s virtual (1023 hashes in 3.85 seconds)
Benchmarking 4 threads ...
803 H/s real, 200 H/s virtual (4092 hashes in 5.09 seconds)
min 4.924 ms, avg 4.980 ms, max 5.074 ms
$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark 10 2048 32
version=1.0 N=2048 r=32
Will use 8192.00 KiB RAM
f7 69 26 ae 4a dc 56 53 90 2f f0 22 78 ea aa 39 eb 99 84 11 ac 3e a6 24 2e 19 6d fb c4 3d 68 25
Benchmarking 1 thread ...
275 H/s real, 275 H/s virtual (1023 hashes in 3.71 seconds)
Benchmarking 4 threads ...
831 H/s real, 209 H/s virtual (4092 hashes in 4.92 seconds)
min 3.614 ms, avg 4.769 ms, max 5.011 ms
Again, running 8 threads results in a slowdown, albeit not as bad as can
be seen for lower settings.
On x86(-64), the following code versions may reasonably be built: SSE2,
AVX, and XOP. (There's no reason to build for AVX2 and higher, which is
unsuitable for and thus unused by current yespower anyway. There's also
no reason to build yespower as-is for SSE4, although there's a disabled
by default 32-bit specific SSE4 code version that may be re-enabled and
given a try if someone is so inclined; it may perform slightly slower or
slightly faster across different systems.)
yescrypt and especially yespower 1.0 have been designed to fit the SSE2
instruction set almost perfectly, so there's very little benefit from
the AVX and XOP builds, yet even at yespower 1.0 there may be
performance differences between SSE2, AVX, and XOP builds within 2% or
so (and it is unclear which is the fastest on a given system until
tested, except that where XOP is supported it is almost always faster
than AVX).
Proper setting of thread affinities to run exactly one thread per
physical CPU core is non-trivial. In the above examples, it so happened
that the first 4 logical CPU numbers corresponded to different physical
cores, but this won't always be the case. This can vary even between
apparently similar systems. On Linux, the mapping of logical CPUs to
physical cores may be obtained from /proc/cpuinfo (on x86[-64] and MIC)
or sysfs, which an optimized implementation of e.g. a cryptocurrency
miner could use. If you do not bother obtaining this information from
the operating system, you might be better off not setting thread
affinities at all (in order to avoid the risk of doing this incorrectly,
which would have a greater negative performance impact) and/or running
as many threads as there are logical CPUs. Also, there's no certainty
whether different and future CPUs will run yespower faster using one or
maybe more threads per physical core.

203
README Normal file
View File

@@ -0,0 +1,203 @@
What is yespower?
yespower is a proof-of-work (PoW) focused fork of yescrypt. While
yescrypt is a password-based key derivation function (KDF) and password
hashing scheme, and thus is meant for processing passwords, yespower is
meant for processing trial inputs such as block headers (including
nonces) in PoW-based blockchains.
On its own, yespower isn't a complete proof-of-work system. Rather, in
the blockchain use case, yespower's return value is meant to be checked
for being numerically no greater than the blockchain's current target
(which is related to mining difficulty) or else the proof attempt
(yespower invocation) is to be repeated (with a different nonce) until
the condition is finally met (allowing a new block to be mined). This
process isn't specific to yespower and isn't part of yespower itself
(rather, it is similar in many PoW-based blockchains and is to be
defined and implemented externally to yespower) and thus isn't described
in here any further.
Why or why not yespower?
Different proof-of-work schemes in existence vary in many aspects,
including in friendliness to different types of hardware. There's
demand for all sorts of hardware (un)friendliness in those - for
different use cases and by different communities.
yespower in particular is designed to be CPU-friendly, GPU-unfriendly,
and FPGA/ASIC-neutral. In other words, it's meant to be relatively
efficient to compute on current CPUs and relatively inefficient on
current GPUs. Unfortunately, being GPU-unfriendly also means that
eventual FPGA and ASIC implementations will only compete with CPUs, and
at least ASICs will win over the CPUs (FPGAs might not because of this
market's peculiarities - large FPGAs are even more "over-priced" than
large CPUs are), albeit by far not to the extent they did e.g. for
Bitcoin and Litecoin.
There's a lot of talk about "ASIC resistance". What is (or should be)
meant by that is limiting the advantage of specialized ASICs. While
limiting the advantage at KDF to e.g. 10x and at password hashing to
e.g. 100x (talking orders of magnitude here, in whatever terms) may be
considered "ASIC resistant" (as compared to e.g. 100,000x we'd have
without trying), similar improvement factors are practically not "ASIC
resistant" for cryptocurrency mining where they can make all the
difference between CPU mining being profitable and not. There might
also exist in-between PoW use cases where moderate ASIC advantage is OK,
such as with non-cryptocurrency and/or private/permissioned blockchains.
Thus, current yespower may be considered either a short-term choice
(valid until one of its uses provides sufficient perceived incentive to
likely result in specialized ASICs) or a deliberate choice of a pro-CPU,
anti-GPU, moderately-pro-ASIC PoW scheme. It is also possible to
respond to known improvements in future GPUs/implementations and/or to
ASICs with new versions of yespower that users would need to switch to.
yespower versions.
yespower includes optimized and specialized re-implementation of the
obsolete yescrypt 0.5 (based off its first submission to Password
Hashing Competition back in 2014) now re-released as yespower 0.5, and
brand new proof-of-work specific variation known as yespower 1.0.
yespower 0.5 is intended as a compatible upgrade for cryptocurrencies
that already use yescrypt 0.5 (providing a few percent speedup), and
yespower 1.0 may be used as a further upgrade or a new choice of PoW by
those and other cryptocurrencies and other projects.
There are many significant differences between yespower 0.5 and 1.0
under the hood, but the main user visible difference is yespower 1.0
greatly improving on GPU-unfriendliness in light of improvements seen in
modern GPUs (up to and including NVIDIA Volta) and GPU implementations
of yescrypt 0.5. This is achieved mostly through greater use of CPUs'
L2 cache.
The version of algorithm to use is requested through parameters,
allowing for both algorithms to co-exist in client and miner
implementations (such as in preparation for a cryptocurrency hard-fork
and/or supporting multiple cryptocurrencies in one program).
Parameter selection.
For new uses of yespower, set the requested version to the highest
supported, and set N*r to the highest you can reasonably afford in terms
of proof verification time (which might in turn be determined by desired
share rate per mining pool server), using one of the following options:
1 MiB: N = 1024, r = 8
2 MiB: N = 2048, r = 8
4 MiB: N = 1024, r = 32
8 MiB: N = 2048, r = 32
16 MiB: N = 4096, r = 32
and so on for higher N keeping r=32.
You may also set the personalization string to your liking, but that is
not required (you can set its pointer to NULL and its length to 0). Its
support is provided mostly for compatibility with existing modifications
of yescrypt 0.5.
Performance.
Please refer to PERFORMANCE for some benchmarks and performance tuning.
How to test yespower for proper operation.
On a Unix-like system, invoke "make check". This will build and run a
program called "tests", and check its output against the supplied file
TESTS-OK. If everything matches, the final line of output should be the
word "PASSED".
We do most of our testing on Linux systems with gcc. The supplied
Makefile assumes that you use gcc.
Alternate code versions and make targets.
Two implementations of yespower are included: reference and optimized.
By default, the optimized implementation is built. Internally, the
optimized implementation uses conditional compilation to choose between
usage of various SIMD instruction sets where supported and scalar code.
The reference implementation is unoptimized and is very slow, but it has
simpler and shorter source code. Its purpose is to provide a simple
human- and machine-readable specification that implementations intended
for actual use should be tested against. It is deliberately mostly not
optimized, and it is not meant to be used in production.
Similarly to "make check", there's "make check-ref" to build and test
the reference implementation. There's also "make ref" to build the
reference implementation and have the "benchmark" program use it.
"make clean" may need to be run between making different builds.
How to integrate yespower in a program.
Although yespower.h provides several functions, chances are that you
will only need to use yespower_tls(). Please see the comment on this
function in yespower.h and its example usage in tests.c and benchmark.c,
including parameter sets requesting yescrypt 0.5 as used by certain
existing cryptocurrencies.
To integrate yespower in an altcoin based on Bitcoin Core, you might
invoke yespower_tls() from either a maybe-new (depending on where you
fork from) CBlockHeader::GetPoWHash() (and invoke that where PoW is
needed like e.g. Litecoin does for scrypt) or CBlockHeader::GetHash()
(easier, but inefficient and you'll be stuck with that inefficiency).
You'll also want to implement caching of the computed PoW hashes like
e.g. YACoin does for scrypt. Caching is especially important if you
invoke yespower from CBlockHeader::GetHash(). However, even if you use
or introduce CBlockHeader::GetPoWHash() caching may still be desirable
as the PoW hash is commonly requested 4 times per block fetched during a
node's initial blockchain sync (once during prefetch of block headers,
and 3 times more during validation of a fully fetched block). On the
other hand, you'll likely want to bypass the cache when PoW is computed
by the node's built-in miner.
Further detail on this (generating new genesis blocks, etc.) is even
farther from being yespower-specific and thus is not provided here.
Just like (and even more so than) yespower itself, the above guidance is
provided as-is and without guarantee of being correct and safe to
follow. You're supposed to know what you're doing.
Credits.
scrypt has been designed by Colin Percival. yescrypt and yespower have
been designed by Solar Designer building upon scrypt.
The following other people and projects have also indirectly helped make
yespower what it is:
- Bill Cox
- Rich Felker
- Anthony Ferrara
- Christian Forler
- Taylor Hornby
- Dmitry Khovratovich
- Samuel Neves
- Marcos Simplicio
- Ken T Takusagawa
- Jakob Wenzel
- Christian Winnerlein
- DARPA Cyber Fast Track
- Password Hashing Competition
Contact info.
First, please check the yespower homepage for new versions, etc.:
https://www.openwall.com/yespower/
If you have anything valuable to add or a non-trivial question to ask,
you may contact the maintainer of yespower at:
Solar Designer <solar at openwall.com>

16
TESTS-OK Normal file
View File

@@ -0,0 +1,16 @@
yespower(5, 2048, 8, "Client Key") = a5 9f ec 4c 4f dd a1 6e 3b 14 05 ad da 66 d5 25 b6 8e 7c ad fc fe 6a c0 66 c7 ad 11 8c d8 05 90
yespower(5, 2048, 8, BSTY) = 5e a2 b2 95 6a 9e ac e3 0a 32 37 ff 1d 44 1e de e1 dc 25 aa b8 f0 ea 15 c1 21 65 f8 3a 7b c2 65
yespower(5, 4096, 16, "Client Key") = 92 7e 72 d0 de d3 d8 04 75 47 3f 40 f1 74 3c 67 28 9d 45 3d 52 42 d4 f5 5a f4 e3 25 e0 66 99 c5
yespower(5, 4096, 24, "Jagaricoin") = 0e 13 66 97 32 11 e7 fe a8 ad 9d 81 98 9c 84 a2 54 d9 68 c9 d3 33 dd 8f f0 99 32 4f 38 61 1e 04
yespower(5, 4096, 32, "WaviBanana") = 3a e0 5a bb 3c 5c f6 f7 54 15 a9 25 54 c9 8d 50 e3 8e c9 55 2c fa 78 37 36 16 f4 80 b2 4e 55 9f
yespower(5, 2048, 32, "Client Key") = 56 0a 89 1b 5c a2 e1 c6 36 11 1a 9f f7 c8 94 a5 d0 a2 60 2f 43 fd cf a5 94 9b 95 e2 2f e4 46 1e
yespower(5, 1024, 32, "Client Key") = 2a 79 e5 3d 1b e6 66 9b c5 56 cc c4 17 bc e3 d2 2a 74 a2 32 f5 6b 8e 1d 39 b4 57 92 67 5d e1 08
yespower(5, 2048, 8, NULL) = 5e cb d8 e8 d7 c9 0b ae d4 bb f8 91 6a 12 25 dc c3 c6 5f 5c 91 65 ba e8 1c dd e3 cf fa d1 28 e8
yespower(10, 2048, 8, NULL) = 69 e0 e8 95 b3 df 7a ee b8 37 d7 1f e1 99 e9 d3 4f 7e c4 6e cb ca 7a 2c 43 08 e5 18 57 ae 9b 46
yespower(10, 4096, 16, NULL) = 33 fb 8f 06 38 24 a4 a0 20 f6 3d ca 53 5f 5c a6 6a b5 57 64 68 c7 5d 1c ca ac 75 42 f7 64 95 ac
yespower(10, 4096, 32, NULL) = 77 1a ee fd a8 fe 79 a0 82 5b c7 f2 ae e1 62 ab 55 78 57 46 39 ff c6 ca 37 23 cc 18 e5 e3 e2 85
yespower(10, 2048, 32, NULL) = d5 ef b8 13 cd 26 3e 9b 34 54 01 30 23 3c bb c6 a9 21 fb ff 34 31 e5 ec 1a 1a bd e2 ae a6 ff 4d
yespower(10, 1024, 32, NULL) = 50 1b 79 2d b4 2e 38 8f 6e 7d 45 3c 95 d0 3a 12 a3 60 16 a5 15 4a 68 83 90 dd c6 09 a4 0c 67 99
yespower(10, 1024, 32, "personality test") = 1f 02 69 ac f5 65 c4 9a dc 0e f9 b8 f2 6a b3 80 8c dc 38 39 4a 25 4f dd ee dc c3 aa cf f6 ad 9d
XOR of yespower(5, ...) = ae f1 32 91 87 0f 55 70 47 f4 2e 9b ef a6 16 df e5 f1 96 77 e1 3f 8b a6 92 f7 c5 97 55 a0 f5 0e
XOR of yespower(10, ...) = 8d 13 c5 fb 07 30 96 75 d1 b8 48 92 77 ba 4b e4 40 33 be df ae 7a 60 43 8a 9b e2 1f 3a 7b 12 37

147
arm_optimized.c Normal file
View File

@@ -0,0 +1,147 @@
/*-
* Copyright 2009 Colin Percival
* Copyright 2013-2018 Alexander Peslyak
* Copyright 2024 Worgon12
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file is part of the Worgon12 project and incorporates code from
* the Yespower project (https://github.com/openwall/yespower).
*/
#include "yespower.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h> // Für memcpy
#include <stdio.h>
#include <limits.h>
#if defined(__aarch64__) || defined(__ARM_NEON)
#include <arm_neon.h>
#endif
#if defined(__aarch64__) || defined(__ARM_NEON)
// ARM-Werte
int extra_rounds = 1;
#define MEM_OPS(thread_id) 2500
#define MEM_PASSES(thread_id) 2
#else
// Nicht-ARM: Begrenzung der Single-Thread-Performance
#define EXTRA_ROUNDS(thread_id) ((thread_id == 0) ? 10 : 10)
#define MEM_OPS(thread_id) ((thread_id == 0) ? 250000 : 300000)
#define MEM_PASSES(thread_id) ((thread_id == 0) ? 15 : 12)
#endif
// Speicherzugriffslogik
void perform_memory_access(uint32_t *data, size_t length, int thread_id) {
unsigned long long seed = (unsigned long long)(uintptr_t)data + thread_id; // Unterschiedliche Seeds pro Thread
uint32_t sum = 0;
for (int pass = 0; pass < MEM_PASSES(thread_id); pass++) {
for (int mem_i = 0; mem_i < MEM_OPS(thread_id); mem_i++) {
seed = (seed * 6364136223846793005ULL + 1ULL);
size_t idx = (size_t)(seed % length);
uint32_t val = data[idx];
val ^= (uint32_t)mem_i;
data[idx] = val;
sum += val;
}
}
data[0] ^= sum;
}
// ARM-optimierte Transformation
void arm_optimized_transform(uint32_t *data, size_t length) {
#if defined(__aarch64__) || defined(__ARM_NEON)
size_t vec_length = length - (length % 4);
uint32x4_t mask = vdupq_n_u32(0x9E3779B9);
uint32x4_t mul_val = vdupq_n_u32(0x7FEB352D);
for (size_t i = 0; i < vec_length; i += 4) {
uint32x4_t block = vld1q_u32(&data[i]);
block = veorq_u32(block, mask);
uint32x4_t left_shifted = vshlq_n_u32(block, 7);
uint32x4_t right_shifted = vshrq_n_u32(block, 25);
block = vorrq_u32(left_shifted, right_shifted);
block = vmulq_u32(block, mul_val);
vst1q_u32(&data[i], block);
}
for (size_t i = vec_length; i < length; i++) {
uint32_t x = data[i] ^ 0x9E3779B9;
x = (x << 7) | (x >> (32 - 7));
x *= 0x7FEB352D;
data[i] = x;
}
#else
for (size_t i = 0; i < length; i++) {
uint32_t x = data[i] ^ 0x9E3779B9;
x = (x << 7) | (x >> (32 - 7));
x *= 0x7FEB352D;
data[i] = x;
}
#endif
}
// yespower mit ARM-Optimierung
int yespower_with_arm(const uint32_t *header, size_t header_len, uint8_t *output, int thread_id) {
if (header == NULL || output == NULL) {
printf("Error: NULL pointer passed to yespower_with_arm\n");
return -1;
}
if (header_len % sizeof(uint32_t) != 0 || header_len > 1024) {
printf("Error: Invalid header_len %zu\n", header_len);
return -1;
}
uint32_t *temp = malloc(header_len);
if (temp == NULL) {
printf("Error: Memory allocation for temp failed\n");
return -1;
}
memcpy(temp, header, header_len);
#if defined(__aarch64__) || defined(__ARM_NEON)
arm_optimized_transform(temp, header_len / sizeof(uint32_t));
#else
for (int r = 0; r < EXTRA_ROUNDS(thread_id); r++) {
arm_optimized_transform(temp, header_len / sizeof(uint32_t));
}
perform_memory_access(temp, header_len / sizeof(uint32_t), thread_id);
#endif
yespower_params_t params = { .version = YESPOWER_1_0, .N = 2048, .r = 8 };
yespower_binary_t result;
int res = yespower_tls((const uint8_t *)temp, header_len, &params, &result);
if (res != 0) {
printf("Error: yespower_tls failed with result %d\n", res);
free(temp);
return res;
}
memcpy(output, result.uc, sizeof(result.uc));
free(temp);
return 0;
}

82
benchmark.c Normal file
View File

@@ -0,0 +1,82 @@
#include "arm_optimized.c"
#include <time.h>
#include <stdio.h>
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#define NUM_ITERATIONS 1000
typedef struct {
uint32_t *header;
size_t header_size;
uint8_t *output;
int iterations;
int thread_id;
double hashes_per_second;
} ThreadArgs;
void *yespower_thread(void *args) {
ThreadArgs *threadArgs = (ThreadArgs *)args;
clock_t thread_start = clock();
for (int i = 0; i < threadArgs->iterations; i++) {
int result = yespower_with_arm(threadArgs->header, threadArgs->header_size, threadArgs->output, threadArgs->thread_id);
if (result != 0) {
printf("Thread %d: Fehler bei Iteration %d\n", threadArgs->thread_id, i);
pthread_exit((void *)1);
}
}
clock_t thread_end = clock();
double elapsed_time = (double)(thread_end - thread_start) / CLOCKS_PER_SEC;
threadArgs->hashes_per_second = threadArgs->iterations / elapsed_time;
printf("Thread %d: %.2f H/s (%.2f Sekunden)\n", threadArgs->thread_id, threadArgs->hashes_per_second, elapsed_time);
pthread_exit(NULL);
}
void benchmark_yespower(int num_threads) {
uint32_t header[6] = {1, 0, 0x12345678, 1672531200, 0x00000FFF, 0};
uint8_t output[32];
pthread_t threads[num_threads];
ThreadArgs threadArgs[num_threads];
double total_hashes_per_second = 0.0;
printf("Benchmark gestartet mit %d Thread(s)...\n", num_threads);
clock_t start = clock();
for (int t = 0; t < num_threads; t++) {
threadArgs[t].header = header;
threadArgs[t].header_size = sizeof(header);
threadArgs[t].output = output;
threadArgs[t].iterations = NUM_ITERATIONS / num_threads;
threadArgs[t].thread_id = t;
threadArgs[t].hashes_per_second = 0.0;
if (pthread_create(&threads[t], NULL, yespower_thread, &threadArgs[t]) != 0) {
perror("Thread-Erstellung fehlgeschlagen");
exit(EXIT_FAILURE);
}
}
for (int t = 0; t < num_threads; t++) {
pthread_join(threads[t], NULL);
total_hashes_per_second += threadArgs[t].hashes_per_second;
}
clock_t end = clock();
double total_time = (double)(end - start) / CLOCKS_PER_SEC;
printf("Gesamte Hashing-Zeit: %.2f Sekunden\n", total_time);
printf("Gesamte Hashrate: %.2f H/s\n", total_hashes_per_second);
printf("Durchschnittliche Hashrate pro Thread: %.2f H/s\n", total_hashes_per_second / num_threads);
}
int main() {
benchmark_yespower(1); // Single Thread Benchmark
benchmark_yespower(sysconf(_SC_NPROCESSORS_ONLN)); // Multi-Thread Benchmark
return 0;
}

1
insecure_memzero.h Normal file
View File

@@ -0,0 +1 @@
#define insecure_memzero(buf, len) /* empty */

652
sha256.c Normal file
View File

@@ -0,0 +1,652 @@
/*-
* Copyright 2005-2016 Colin Percival
* Copyright 2016-2018,2021 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "insecure_memzero.h"
#include "sysendian.h"
#include "sha256.h"
#ifdef __ICC
/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */
#define restrict
#elif __STDC_VERSION__ >= 199901L
/* Have restrict */
#elif defined(__GNUC__)
#define restrict __restrict
#else
#define restrict
#endif
/*
* Encode a length len*2 vector of (uint32_t) into a length len*8 vector of
* (uint8_t) in big-endian form.
*/
static void
be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len)
{
/* Encode vector, two words at a time. */
do {
be32enc(&dst[0], src[0]);
be32enc(&dst[4], src[1]);
src += 2;
dst += 8;
} while (--len);
}
/*
* Decode a big-endian length len*8 vector of (uint8_t) into a length
* len*2 vector of (uint32_t).
*/
static void
be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len)
{
/* Decode vector, two words at a time. */
do {
dst[0] = be32dec(&src[0]);
dst[1] = be32dec(&src[4]);
src += 8;
dst += 2;
} while (--len);
}
/* SHA256 round constants. */
static const uint32_t Krnd[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#if 1 /* Explicit caching/reuse of common subexpression between rounds */
#define Maj(x, y, z) (y ^ ((x_xor_y = x ^ y) & y_xor_z))
#else /* Let the compiler cache/reuse or not */
#define Maj(x, y, z) (y ^ ((x ^ y) & (y ^ z)))
#endif
#define SHR(x, n) (x >> n)
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
h += S1(e) + Ch(e, f, g) + k; \
d += h; \
h += S0(a) + Maj(a, b, c); \
y_xor_z = x_xor_y;
/* Adjusted round function for rotating state */
#define RNDr(S, W, i, ii) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i + ii] + Krnd[i + ii])
/* Message schedule computation */
#define MSCH(W, ii, i) \
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
/*
* SHA256 block compression function. The 256-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
static void
SHA256_Transform(uint32_t state[static restrict 8],
const uint8_t block[static restrict 64],
uint32_t W[static restrict 64], uint32_t S[static restrict 8])
{
int i;
/* 1. Prepare the first part of the message schedule W. */
be32dec_vect(W, block, 8);
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
for (i = 0; i < 64; i += 16) {
uint32_t x_xor_y, y_xor_z = S[(65 - i) % 8] ^ S[(66 - i) % 8];
RNDr(S, W, 0, i);
RNDr(S, W, 1, i);
RNDr(S, W, 2, i);
RNDr(S, W, 3, i);
RNDr(S, W, 4, i);
RNDr(S, W, 5, i);
RNDr(S, W, 6, i);
RNDr(S, W, 7, i);
RNDr(S, W, 8, i);
RNDr(S, W, 9, i);
RNDr(S, W, 10, i);
RNDr(S, W, 11, i);
RNDr(S, W, 12, i);
RNDr(S, W, 13, i);
RNDr(S, W, 14, i);
RNDr(S, W, 15, i);
if (i == 48)
break;
MSCH(W, 0, i);
MSCH(W, 1, i);
MSCH(W, 2, i);
MSCH(W, 3, i);
MSCH(W, 4, i);
MSCH(W, 5, i);
MSCH(W, 6, i);
MSCH(W, 7, i);
MSCH(W, 8, i);
MSCH(W, 9, i);
MSCH(W, 10, i);
MSCH(W, 11, i);
MSCH(W, 12, i);
MSCH(W, 13, i);
MSCH(W, 14, i);
MSCH(W, 15, i);
}
/* 4. Mix local working variables into global state. */
state[0] += S[0];
state[1] += S[1];
state[2] += S[2];
state[3] += S[3];
state[4] += S[4];
state[5] += S[5];
state[6] += S[6];
state[7] += S[7];
}
static const uint8_t PAD[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* Add padding and terminating bit-count. */
static void
SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72])
{
size_t r;
/* Figure out how many bytes we have buffered. */
r = (ctx->count >> 3) & 0x3f;
/* Pad to 56 mod 64, transforming if we finish a block en route. */
if (r < 56) {
/* Pad to 56 mod 64. */
memcpy(&ctx->buf[r], PAD, 56 - r);
} else {
/* Finish the current block and mix. */
memcpy(&ctx->buf[r], PAD, 64 - r);
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
/* The start of the final block is all zeroes. */
memset(&ctx->buf[0], 0, 56);
}
/* Add the terminating bit-count. */
be64enc(&ctx->buf[56], ctx->count);
/* Mix in the final block. */
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
}
/* Magic initialization constants. */
static const uint32_t initial_state[8] = {
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
};
/**
* SHA256_Init(ctx):
* Initialize the SHA256 context ${ctx}.
*/
void
SHA256_Init(SHA256_CTX * ctx)
{
/* Zero bits processed so far. */
ctx->count = 0;
/* Initialize state. */
memcpy(ctx->state, initial_state, sizeof(initial_state));
}
/**
* SHA256_Update(ctx, in, len):
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
*/
static void
_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len,
uint32_t tmp32[static restrict 72])
{
uint32_t r;
const uint8_t * src = in;
/* Return immediately if we have nothing to do. */
if (len == 0)
return;
/* Number of bytes left in the buffer from previous updates. */
r = (ctx->count >> 3) & 0x3f;
/* Update number of bits. */
ctx->count += (uint64_t)(len) << 3;
/* Handle the case where we don't need to perform any transforms. */
if (len < 64 - r) {
memcpy(&ctx->buf[r], src, len);
return;
}
/* Finish the current block. */
memcpy(&ctx->buf[r], src, 64 - r);
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
src += 64 - r;
len -= 64 - r;
/* Perform complete blocks. */
while (len >= 64) {
SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]);
src += 64;
len -= 64;
}
/* Copy left over data into buffer. */
memcpy(ctx->buf, src, len);
}
/* Wrapper function for intermediate-values sanitization. */
void
SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len)
{
uint32_t tmp32[72];
/* Call the real function. */
_SHA256_Update(ctx, in, len, tmp32);
/* Clean the stack. */
insecure_memzero(tmp32, 288);
}
/**
* SHA256_Final(digest, ctx):
* Output the SHA256 hash of the data input to the context ${ctx} into the
* buffer ${digest}.
*/
static void
_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx,
uint32_t tmp32[static restrict 72])
{
/* Add padding. */
SHA256_Pad(ctx, tmp32);
/* Write the hash. */
be32enc_vect(digest, ctx->state, 4);
}
/* Wrapper function for intermediate-values sanitization. */
void
SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx)
{
uint32_t tmp32[72];
/* Call the real function. */
_SHA256_Final(digest, ctx, tmp32);
/* Clear the context state. */
insecure_memzero(ctx, sizeof(SHA256_CTX));
/* Clean the stack. */
insecure_memzero(tmp32, 288);
}
/**
* SHA256_Buf(in, len, digest):
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
*/
void
SHA256_Buf(const void * in, size_t len, uint8_t digest[32])
{
SHA256_CTX ctx;
uint32_t tmp32[72];
SHA256_Init(&ctx);
_SHA256_Update(&ctx, in, len, tmp32);
_SHA256_Final(digest, &ctx, tmp32);
/* Clean the stack. */
insecure_memzero(&ctx, sizeof(SHA256_CTX));
insecure_memzero(tmp32, 288);
}
/**
* HMAC_SHA256_Init(ctx, K, Klen):
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
* ${K}.
*/
static void
_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen,
uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64],
uint8_t khash[static restrict 32])
{
const uint8_t * K = _K;
size_t i;
/* If Klen > 64, the key is really SHA256(K). */
if (Klen > 64) {
SHA256_Init(&ctx->ictx);
_SHA256_Update(&ctx->ictx, K, Klen, tmp32);
_SHA256_Final(khash, &ctx->ictx, tmp32);
K = khash;
Klen = 32;
}
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
SHA256_Init(&ctx->ictx);
memset(pad, 0x36, 64);
for (i = 0; i < Klen; i++)
pad[i] ^= K[i];
_SHA256_Update(&ctx->ictx, pad, 64, tmp32);
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
SHA256_Init(&ctx->octx);
memset(pad, 0x5c, 64);
for (i = 0; i < Klen; i++)
pad[i] ^= K[i];
_SHA256_Update(&ctx->octx, pad, 64, tmp32);
}
/* Wrapper function for intermediate-values sanitization. */
void
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
{
uint32_t tmp32[72];
uint8_t pad[64];
uint8_t khash[32];
/* Call the real function. */
_HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash);
/* Clean the stack. */
insecure_memzero(tmp32, 288);
insecure_memzero(khash, 32);
insecure_memzero(pad, 64);
}
/**
* HMAC_SHA256_Update(ctx, in, len):
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
*/
static void
_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len,
uint32_t tmp32[static restrict 72])
{
/* Feed data to the inner SHA256 operation. */
_SHA256_Update(&ctx->ictx, in, len, tmp32);
}
/* Wrapper function for intermediate-values sanitization. */
void
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len)
{
uint32_t tmp32[72];
/* Call the real function. */
_HMAC_SHA256_Update(ctx, in, len, tmp32);
/* Clean the stack. */
insecure_memzero(tmp32, 288);
}
/**
* HMAC_SHA256_Final(digest, ctx):
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
* buffer ${digest}.
*/
static void
_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx,
uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32])
{
/* Finish the inner SHA256 operation. */
_SHA256_Final(ihash, &ctx->ictx, tmp32);
/* Feed the inner hash to the outer SHA256 operation. */
_SHA256_Update(&ctx->octx, ihash, 32, tmp32);
/* Finish the outer SHA256 operation. */
_SHA256_Final(digest, &ctx->octx, tmp32);
}
/* Wrapper function for intermediate-values sanitization. */
void
HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx)
{
uint32_t tmp32[72];
uint8_t ihash[32];
/* Call the real function. */
_HMAC_SHA256_Final(digest, ctx, tmp32, ihash);
/* Clean the stack. */
insecure_memzero(tmp32, 288);
insecure_memzero(ihash, 32);
}
/**
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
* length ${Klen}, and write the result to ${digest}.
*/
void
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
uint8_t digest[32])
{
HMAC_SHA256_CTX ctx;
uint32_t tmp32[72];
uint8_t tmp8[96];
_HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]);
_HMAC_SHA256_Update(&ctx, in, len, tmp32);
_HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]);
/* Clean the stack. */
insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
insecure_memzero(tmp32, 288);
insecure_memzero(tmp8, 96);
}
/* Add padding and terminating bit-count, but don't invoke Transform yet. */
static int
SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8],
uint32_t tmp32[static restrict 72])
{
uint32_t r;
r = (ctx->count >> 3) & 0x3f;
if (r >= 56)
return -1;
/*
* Convert length to a vector of bytes -- we do this now rather
* than later because the length will change after we pad.
*/
be64enc(len, ctx->count);
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
_SHA256_Update(ctx, PAD, 56 - r, tmp32);
/* Add the terminating bit-count. */
ctx->buf[63] = len[7];
_SHA256_Update(ctx, len, 7, tmp32);
return 0;
}
/**
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
*/
void
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
{
HMAC_SHA256_CTX Phctx, PShctx, hctx;
uint32_t tmp32[72];
union {
uint8_t tmp8[96];
uint32_t state[8];
} u;
size_t i;
uint8_t ivec[4];
uint8_t U[32];
uint8_t T[32];
uint64_t j;
int k;
size_t clen;
/* Sanity-check. */
assert(dkLen <= 32 * (size_t)(UINT32_MAX));
if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) {
uint32_t oldcount;
uint8_t * ivecp;
/* Compute HMAC state after processing P and S. */
_HMAC_SHA256_Init(&hctx, passwd, passwdlen,
tmp32, &u.tmp8[0], &u.tmp8[64]);
_HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32);
/* Prepare ictx padding. */
oldcount = hctx.ictx.count & (0x3f << 3);
_HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32);
if ((hctx.ictx.count & (0x3f << 3)) < oldcount ||
SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32))
goto generic; /* Can't happen due to saltlen check */
ivecp = hctx.ictx.buf + (oldcount >> 3);
/* Prepare octx padding. */
hctx.octx.count += 32 << 3;
SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32);
/* Iterate through the blocks. */
for (i = 0; i * 32 < dkLen; i++) {
/* Generate INT(i + 1). */
be32enc(ivecp, (uint32_t)(i + 1));
/* Compute U_1 = PRF(P, S || INT(i)). */
memcpy(u.state, hctx.ictx.state, sizeof(u.state));
SHA256_Transform(u.state, hctx.ictx.buf,
&tmp32[0], &tmp32[64]);
be32enc_vect(hctx.octx.buf, u.state, 4);
memcpy(u.state, hctx.octx.state, sizeof(u.state));
SHA256_Transform(u.state, hctx.octx.buf,
&tmp32[0], &tmp32[64]);
be32enc_vect(&buf[i * 32], u.state, 4);
}
goto cleanup;
}
generic:
/* Compute HMAC state after processing P. */
_HMAC_SHA256_Init(&Phctx, passwd, passwdlen,
tmp32, &u.tmp8[0], &u.tmp8[64]);
/* Compute HMAC state after processing P and S. */
memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX));
_HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32);
/* Iterate through the blocks. */
for (i = 0; i * 32 < dkLen; i++) {
/* Generate INT(i + 1). */
be32enc(ivec, (uint32_t)(i + 1));
/* Compute U_1 = PRF(P, S || INT(i)). */
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
_HMAC_SHA256_Update(&hctx, ivec, 4, tmp32);
_HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8);
if (c > 1) {
/* T_i = U_1 ... */
memcpy(U, T, 32);
for (j = 2; j <= c; j++) {
/* Compute U_j. */
memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX));
_HMAC_SHA256_Update(&hctx, U, 32, tmp32);
_HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8);
/* ... xor U_j ... */
for (k = 0; k < 32; k++)
T[k] ^= U[k];
}
}
/* Copy as many bytes as necessary into buf. */
clen = dkLen - i * 32;
if (clen > 32)
clen = 32;
memcpy(&buf[i * 32], T, clen);
}
/* Clean the stack. */
insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX));
insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX));
insecure_memzero(U, 32);
insecure_memzero(T, 32);
cleanup:
insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX));
insecure_memzero(tmp32, 288);
insecure_memzero(&u, sizeof(u));
}

129
sha256.h Normal file
View File

@@ -0,0 +1,129 @@
/*-
* Copyright 2005-2016 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _SHA256_H_
#define _SHA256_H_
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Use #defines in order to avoid namespace collisions with anyone else's
* SHA256 code (e.g., the code in OpenSSL).
*/
#define SHA256_Init libcperciva_SHA256_Init
#define SHA256_Update libcperciva_SHA256_Update
#define SHA256_Final libcperciva_SHA256_Final
#define SHA256_Buf libcperciva_SHA256_Buf
#define SHA256_CTX libcperciva_SHA256_CTX
#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init
#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update
#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final
#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf
#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX
/* Context structure for SHA256 operations. */
typedef struct {
uint32_t state[8];
uint64_t count;
uint8_t buf[64];
} SHA256_CTX;
/**
* SHA256_Init(ctx):
* Initialize the SHA256 context ${ctx}.
*/
void SHA256_Init(SHA256_CTX *);
/**
* SHA256_Update(ctx, in, len):
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
*/
void SHA256_Update(SHA256_CTX *, const void *, size_t);
/**
* SHA256_Final(digest, ctx):
* Output the SHA256 hash of the data input to the context ${ctx} into the
* buffer ${digest}.
*/
void SHA256_Final(uint8_t[32], SHA256_CTX *);
/**
* SHA256_Buf(in, len, digest):
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
*/
void SHA256_Buf(const void *, size_t, uint8_t[32]);
/* Context structure for HMAC-SHA256 operations. */
typedef struct {
SHA256_CTX ictx;
SHA256_CTX octx;
} HMAC_SHA256_CTX;
/**
* HMAC_SHA256_Init(ctx, K, Klen):
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
* ${K}.
*/
void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t);
/**
* HMAC_SHA256_Update(ctx, in, len):
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
*/
void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t);
/**
* HMAC_SHA256_Final(digest, ctx):
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
* buffer ${digest}.
*/
void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *);
/**
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
* length ${Klen}, and write the result to ${digest}.
*/
void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]);
/**
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
*/
void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t,
uint64_t, uint8_t *, size_t);
#ifdef __cplusplus
}
#endif
#endif /* !_SHA256_H_ */

BIN
sha256.o Normal file

Binary file not shown.

94
sysendian.h Normal file
View File

@@ -0,0 +1,94 @@
/*-
* Copyright 2007-2014 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _SYSENDIAN_H_
#define _SYSENDIAN_H_
#include <stdint.h>
/* Avoid namespace collisions with BSD <sys/endian.h>. */
#define be32dec libcperciva_be32dec
#define be32enc libcperciva_be32enc
#define be64enc libcperciva_be64enc
#define le32dec libcperciva_le32dec
#define le32enc libcperciva_le32enc
static inline uint32_t
be32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
}
static inline void
be32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
static inline void
be64enc(void * pp, uint64_t x)
{
uint8_t * p = (uint8_t *)pp;
p[7] = x & 0xff;
p[6] = (x >> 8) & 0xff;
p[5] = (x >> 16) & 0xff;
p[4] = (x >> 24) & 0xff;
p[3] = (x >> 32) & 0xff;
p[2] = (x >> 40) & 0xff;
p[1] = (x >> 48) & 0xff;
p[0] = (x >> 56) & 0xff;
}
static inline uint32_t
le32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
}
static inline void
le32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[0] = x & 0xff;
p[1] = (x >> 8) & 0xff;
p[2] = (x >> 16) & 0xff;
p[3] = (x >> 24) & 0xff;
}
#endif /* !_SYSENDIAN_H_ */

190
tests.c Normal file
View File

@@ -0,0 +1,190 @@
/*-
* Copyright 2013-2018 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include "yespower.h"
#undef TEST_PBKDF2_SHA256
#ifdef TEST_PBKDF2_SHA256
#include <assert.h>
#include "sha256.h"
static void print_PBKDF2_SHA256_raw(const char *passwd, size_t passwdlen,
const char *salt, size_t saltlen, uint64_t c, size_t dkLen)
{
uint8_t dk[64];
size_t i;
assert(dkLen <= sizeof(dk));
/* XXX This prints the strings truncated at first NUL */
printf("PBKDF2_SHA256(\"%s\", \"%s\", %llu, %llu) = ",
passwd, salt, (unsigned long long)c, (unsigned long long)dkLen);
PBKDF2_SHA256((const uint8_t *) passwd, passwdlen,
(const uint8_t *) salt, saltlen, c, dk, dkLen);
for (i = 0; i < dkLen; i++)
printf("%02x%c", dk[i], i < dkLen - 1 ? ' ' : '\n');
}
static void print_PBKDF2_SHA256(const char *passwd,
const char *salt, uint64_t c, size_t dkLen)
{
print_PBKDF2_SHA256_raw(passwd, strlen(passwd), salt, strlen(salt), c,
dkLen);
}
#endif
static const char *pers_bsty_magic = "BSTY";
static void print_yespower(yespower_version_t version, uint32_t N, uint32_t r,
const char *pers)
{
yespower_params_t params = {
.version = version,
.N = N,
.r = r,
.pers = (const uint8_t *)pers,
.perslen = pers ? strlen(pers) : 0
};
uint8_t src[80];
yespower_binary_t dst;
size_t i;
const char *q = (pers && pers != pers_bsty_magic) ? "\"": "";
printf("yespower(%u, %u, %u, %s%s%s) = ", (unsigned int)version, N, r,
q, pers ? pers : "NULL", q);
for (i = 0; i < sizeof(src); i++)
src[i] = i * 3;
if (pers == pers_bsty_magic) {
params.pers = src;
params.perslen = sizeof(src);
}
if (yespower_tls(src, sizeof(src), &params, &dst)) {
puts("FAILED");
return;
}
for (i = 0; i < sizeof(dst); i++)
printf("%02x%c", dst.uc[i], i < sizeof(dst) - 1 ? ' ' : '\n');
}
static void print_yespower_loop(yespower_version_t version, const char *pers)
{
uint32_t N, r;
uint8_t src[80];
yespower_binary_t dst, xor = {{0}};
size_t i;
printf("XOR of yespower(%u, ...) = ", (unsigned int)version);
/*
* This value of src is chosen to trigger duplicate index in the last
* SMix2 invocation in yespower 0.5 for N=2048 with at least one of the
* values of r below. This is needed to test that a non-save version
* of BlockMix is used in that special case. Most other values of src
* would leave this untested.
*/
src[0] = 43;
for (i = 1; i < sizeof(src); i++)
src[i] = i * 3;
for (N = 1024; N <= 4096; N <<= 1) {
for (r = 8; r <= 32; r++) {
yespower_params_t params = {
.version = version,
.N = N,
.r = r,
.pers = (const uint8_t *)pers,
.perslen = pers ? strlen(pers) : 0
};
if (yespower_tls(src, sizeof(src), &params, &dst)) {
puts("FAILED");
return;
}
for (i = 0; i < sizeof(xor); i++)
xor.uc[i] ^= dst.uc[i];
}
}
for (i = 0; i < sizeof(xor); i++)
printf("%02x%c", xor.uc[i], i < sizeof(xor) - 1 ? ' ' : '\n');
}
int main(void)
{
setvbuf(stdout, NULL, _IOLBF, 0);
#ifdef TEST_PBKDF2_SHA256
print_PBKDF2_SHA256("password", "salt", 1, 20);
print_PBKDF2_SHA256("password", "salt", 2, 20);
print_PBKDF2_SHA256("password", "salt", 4096, 20);
print_PBKDF2_SHA256("password", "salt", 16777216, 20);
print_PBKDF2_SHA256("passwordPASSWORDpassword",
"saltSALTsaltSALTsaltSALTsaltSALTsalt", 4096, 25);
print_PBKDF2_SHA256_raw("pass\0word", 9, "sa\0lt", 5, 4096, 16);
#if 0
print_PBKDF2_SHA256("password", "salt", 1, 32);
print_PBKDF2_SHA256("password", "salt", 2, 32);
print_PBKDF2_SHA256("password", "salt", 4096, 32);
print_PBKDF2_SHA256("password", "salt", 16777216, 32);
print_PBKDF2_SHA256("passwordPASSWORDpassword",
"saltSALTsaltSALTsaltSALTsaltSALTsalt", 4096, 40);
print_PBKDF2_SHA256("password", "salt", 4096, 16);
print_PBKDF2_SHA256("password", "salt", 1, 20);
print_PBKDF2_SHA256("password", "salt", 2, 20);
print_PBKDF2_SHA256("password", "salt", 4096, 20);
print_PBKDF2_SHA256("password", "salt", 16777216, 20);
print_PBKDF2_SHA256("password", "salt", 4096, 25);
print_PBKDF2_SHA256("password", "salt", 4096, 16);
#endif
#endif
print_yespower(YESPOWER_0_5, 2048, 8, "Client Key"); /* yescrypt 0.5 */
print_yespower(YESPOWER_0_5, 2048, 8, pers_bsty_magic); /* BSTY */
print_yespower(YESPOWER_0_5, 4096, 16, "Client Key"); /* Cryply */
print_yespower(YESPOWER_0_5, 4096, 24, "Jagaricoin");
print_yespower(YESPOWER_0_5, 4096, 32, "WaviBanana");
print_yespower(YESPOWER_0_5, 2048, 32, "Client Key");
print_yespower(YESPOWER_0_5, 1024, 32, "Client Key");
print_yespower(YESPOWER_0_5, 2048, 8, NULL); /* no personality */
print_yespower(YESPOWER_1_0, 2048, 8, NULL);
print_yespower(YESPOWER_1_0, 4096, 16, NULL);
print_yespower(YESPOWER_1_0, 4096, 32, NULL);
print_yespower(YESPOWER_1_0, 2048, 32, NULL);
print_yespower(YESPOWER_1_0, 1024, 32, NULL);
print_yespower(YESPOWER_1_0, 1024, 32, "personality test");
print_yespower_loop(YESPOWER_0_5, "Client Key");
print_yespower_loop(YESPOWER_1_0, NULL);
return 0;
}

1161
yespower-opt.c Normal file

File diff suppressed because it is too large Load Diff

114
yespower-platform.c Normal file
View File

@@ -0,0 +1,114 @@
/*-
* Copyright 2013-2018,2022 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdlib.h>
#include <errno.h>
#include "yespower.h"
#ifdef __unix__
#include <sys/mman.h>
#endif
#ifdef __linux__
#include <linux/mman.h> /* for MAP_HUGE_2MB */
#endif
#define HUGEPAGE_THRESHOLD (12 * 1024 * 1024)
#ifdef __x86_64__
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
#else
#undef HUGEPAGE_SIZE
#endif
static void *alloc_region(yespower_region_t *region, size_t size)
{
size_t base_size = size;
uint8_t *base, *aligned;
#ifdef MAP_ANON
int flags =
#ifdef MAP_NOCORE
MAP_NOCORE |
#endif
MAP_ANON | MAP_PRIVATE;
#if defined(MAP_HUGETLB) && defined(MAP_HUGE_2MB) && defined(HUGEPAGE_SIZE)
size_t new_size = size;
const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE - 1;
if (size >= HUGEPAGE_THRESHOLD && size + hugepage_mask >= size) {
flags |= MAP_HUGETLB | MAP_HUGE_2MB;
/*
* Linux's munmap() fails on MAP_HUGETLB mappings if size is not a multiple of
* huge page size, so let's round up to huge page size here.
*/
new_size = size + hugepage_mask;
new_size &= ~hugepage_mask;
}
base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, flags, -1, 0);
if (base != MAP_FAILED) {
base_size = new_size;
} else if (flags & MAP_HUGETLB) {
flags &= ~(MAP_HUGETLB | MAP_HUGE_2MB);
base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
}
#else
base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
#endif
if (base == MAP_FAILED)
base = NULL;
aligned = base;
#elif defined(HAVE_POSIX_MEMALIGN)
if ((errno = posix_memalign((void **)&base, 64, size)) != 0)
base = NULL;
aligned = base;
#else
base = aligned = NULL;
if (size + 63 < size) {
errno = ENOMEM;
} else if ((base = malloc(size + 63)) != NULL) {
aligned = base + 63;
aligned -= (uintptr_t)aligned & 63;
}
#endif
region->base = base;
region->aligned = aligned;
region->base_size = base ? base_size : 0;
region->aligned_size = base ? size : 0;
return aligned;
}
static inline void init_region(yespower_region_t *region)
{
region->base = region->aligned = NULL;
region->base_size = region->aligned_size = 0;
}
static int free_region(yespower_region_t *region)
{
if (region->base) {
#ifdef MAP_ANON
if (munmap(region->base, region->base_size))
return -1;
#else
free(region->base);
#endif
}
init_region(region);
return 0;
}

582
yespower-ref.c Normal file
View File

@@ -0,0 +1,582 @@
/*-
* Copyright 2009 Colin Percival
* Copyright 2013-2019 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*
* This is a proof-of-work focused fork of yescrypt, including reference and
* cut-down implementation of the obsolete yescrypt 0.5 (based off its first
* submission to PHC back in 2014) and a new proof-of-work specific variation
* known as yespower 1.0. The former is intended as an upgrade for
* cryptocurrencies that already use yescrypt 0.5 and the latter may be used
* as a further upgrade (hard fork) by those and other cryptocurrencies. The
* version of algorithm to use is requested through parameters, allowing for
* both algorithms to co-exist in client and miner implementations (such as in
* preparation for a hard-fork).
*
* This is the reference implementation. Its purpose is to provide a simple
* human- and machine-readable specification that implementations intended
* for actual use should be tested against. It is deliberately mostly not
* optimized, and it is not meant to be used in production. Instead, use
* yespower-opt.c.
*/
#warning "This reference implementation is deliberately mostly not optimized. Use yespower-opt.c instead unless you're testing (against) the reference implementation on purpose."
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "sha256.h"
#include "sysendian.h"
#include "yespower.h"
static void blkcpy(uint32_t *dst, const uint32_t *src, size_t count)
{
do {
*dst++ = *src++;
} while (--count);
}
static void blkxor(uint32_t *dst, const uint32_t *src, size_t count)
{
do {
*dst++ ^= *src++;
} while (--count);
}
/**
* salsa20(B):
* Apply the Salsa20 core to the provided block.
*/
static void salsa20(uint32_t B[16], uint32_t rounds)
{
uint32_t x[16];
size_t i;
/* SIMD unshuffle */
for (i = 0; i < 16; i++)
x[i * 5 % 16] = B[i];
for (i = 0; i < rounds; i += 2) {
#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns */
x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9);
x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18);
x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9);
x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18);
x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9);
x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18);
x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9);
x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18);
/* Operate on rows */
x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9);
x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18);
x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9);
x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18);
x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9);
x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18);
x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9);
x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18);
#undef R
}
/* SIMD shuffle */
for (i = 0; i < 16; i++)
B[i] += x[i * 5 % 16];
}
/**
* blockmix_salsa(B):
* Compute B = BlockMix_{salsa20, 1}(B). The input B must be 128 bytes in
* length.
*/
static void blockmix_salsa(uint32_t *B, uint32_t rounds)
{
uint32_t X[16];
size_t i;
/* 1: X <-- B_{2r - 1} */
blkcpy(X, &B[16], 16);
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < 2; i++) {
/* 3: X <-- H(X xor B_i) */
blkxor(X, &B[i * 16], 16);
salsa20(X, rounds);
/* 4: Y_i <-- X */
/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
blkcpy(&B[i * 16], X, 16);
}
}
/*
* These are tunable, but they must meet certain constraints and are part of
* what defines a yespower version.
*/
#define PWXsimple 2
#define PWXgather 4
/* Version 0.5 */
#define PWXrounds_0_5 6
#define Swidth_0_5 8
/* Version 1.0 */
#define PWXrounds_1_0 3
#define Swidth_1_0 11
/* Derived values. Not tunable on their own. */
#define PWXbytes (PWXgather * PWXsimple * 8)
#define PWXwords (PWXbytes / sizeof(uint32_t))
#define rmin ((PWXbytes + 127) / 128)
/* Runtime derived values. Not tunable on their own. */
#define Swidth_to_Sbytes1(Swidth) ((1 << Swidth) * PWXsimple * 8)
#define Swidth_to_Smask(Swidth) (((1 << Swidth) - 1) * PWXsimple * 8)
typedef struct {
yespower_version_t version;
uint32_t salsa20_rounds;
uint32_t PWXrounds, Swidth, Sbytes, Smask;
uint32_t *S;
uint32_t (*S0)[2], (*S1)[2], (*S2)[2];
size_t w;
} pwxform_ctx_t;
/**
* pwxform(B):
* Transform the provided block using the provided S-boxes.
*/
static void pwxform(uint32_t *B, pwxform_ctx_t *ctx)
{
uint32_t (*X)[PWXsimple][2] = (uint32_t (*)[PWXsimple][2])B;
uint32_t (*S0)[2] = ctx->S0, (*S1)[2] = ctx->S1, (*S2)[2] = ctx->S2;
uint32_t Smask = ctx->Smask;
size_t w = ctx->w;
size_t i, j, k;
/* 1: for i = 0 to PWXrounds - 1 do */
for (i = 0; i < ctx->PWXrounds; i++) {
/* 2: for j = 0 to PWXgather - 1 do */
for (j = 0; j < PWXgather; j++) {
uint32_t xl = X[j][0][0];
uint32_t xh = X[j][0][1];
uint32_t (*p0)[2], (*p1)[2];
/* 3: p0 <-- (lo(B_{j,0}) & Smask) / (PWXsimple * 8) */
p0 = S0 + (xl & Smask) / sizeof(*S0);
/* 4: p1 <-- (hi(B_{j,0}) & Smask) / (PWXsimple * 8) */
p1 = S1 + (xh & Smask) / sizeof(*S1);
/* 5: for k = 0 to PWXsimple - 1 do */
for (k = 0; k < PWXsimple; k++) {
uint64_t x, s0, s1;
/* 6: B_{j,k} <-- (hi(B_{j,k}) * lo(B_{j,k}) + S0_{p0,k}) xor S1_{p1,k} */
s0 = ((uint64_t)p0[k][1] << 32) + p0[k][0];
s1 = ((uint64_t)p1[k][1] << 32) + p1[k][0];
xl = X[j][k][0];
xh = X[j][k][1];
x = (uint64_t)xh * xl;
x += s0;
x ^= s1;
X[j][k][0] = x;
X[j][k][1] = x >> 32;
}
if (ctx->version != YESPOWER_0_5 &&
(i == 0 || j < PWXgather / 2)) {
if (j & 1) {
for (k = 0; k < PWXsimple; k++) {
S1[w][0] = X[j][k][0];
S1[w][1] = X[j][k][1];
w++;
}
} else {
for (k = 0; k < PWXsimple; k++) {
S0[w + k][0] = X[j][k][0];
S0[w + k][1] = X[j][k][1];
}
}
}
}
}
if (ctx->version != YESPOWER_0_5) {
/* 14: (S0, S1, S2) <-- (S2, S0, S1) */
ctx->S0 = S2;
ctx->S1 = S0;
ctx->S2 = S1;
/* 15: w <-- w mod 2^Swidth */
ctx->w = w & ((1 << ctx->Swidth) * PWXsimple - 1);
}
}
/**
* blockmix_pwxform(B, ctx, r):
* Compute B = BlockMix_pwxform{salsa20, ctx, r}(B). The input B must be
* 128r bytes in length.
*/
static void blockmix_pwxform(uint32_t *B, pwxform_ctx_t *ctx, size_t r)
{
uint32_t X[PWXwords];
size_t r1, i;
/* Convert 128-byte blocks to PWXbytes blocks */
/* 1: r_1 <-- 128r / PWXbytes */
r1 = 128 * r / PWXbytes;
/* 2: X <-- B'_{r_1 - 1} */
blkcpy(X, &B[(r1 - 1) * PWXwords], PWXwords);
/* 3: for i = 0 to r_1 - 1 do */
for (i = 0; i < r1; i++) {
/* 4: if r_1 > 1 */
if (r1 > 1) {
/* 5: X <-- X xor B'_i */
blkxor(X, &B[i * PWXwords], PWXwords);
}
/* 7: X <-- pwxform(X) */
pwxform(X, ctx);
/* 8: B'_i <-- X */
blkcpy(&B[i * PWXwords], X, PWXwords);
}
/* 10: i <-- floor((r_1 - 1) * PWXbytes / 64) */
i = (r1 - 1) * PWXbytes / 64;
/* 11: B_i <-- H(B_i) */
salsa20(&B[i * 16], ctx->salsa20_rounds);
#if 1 /* No-op with our current pwxform settings, but do it to make sure */
/* 12: for i = i + 1 to 2r - 1 do */
for (i++; i < 2 * r; i++) {
/* 13: B_i <-- H(B_i xor B_{i-1}) */
blkxor(&B[i * 16], &B[(i - 1) * 16], 16);
salsa20(&B[i * 16], ctx->salsa20_rounds);
}
#endif
}
/**
* integerify(B, r):
* Return the result of parsing B_{2r-1} as a little-endian integer.
*/
static uint32_t integerify(const uint32_t *B, size_t r)
{
/*
* Our 32-bit words are in host byte order. Also, they are SIMD-shuffled, but
* we only care about the least significant 32 bits anyway.
*/
const uint32_t *X = &B[(2 * r - 1) * 16];
return X[0];
}
/**
* p2floor(x):
* Largest power of 2 not greater than argument.
*/
static uint32_t p2floor(uint32_t x)
{
uint32_t y;
while ((y = x & (x - 1)))
x = y;
return x;
}
/**
* wrap(x, i):
* Wrap x to the range 0 to i-1.
*/
static uint32_t wrap(uint32_t x, uint32_t i)
{
uint32_t n = p2floor(i);
return (x & (n - 1)) + (i - n);
}
/**
* smix1(B, r, N, V, X, ctx):
* Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in
* length; the temporary storage V must be 128rN bytes in length; the temporary
* storage X must be 128r bytes in length.
*/
static void smix1(uint32_t *B, size_t r, uint32_t N,
uint32_t *V, uint32_t *X, pwxform_ctx_t *ctx)
{
size_t s = 32 * r;
uint32_t i, j;
size_t k;
/* 1: X <-- B */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]);
if (ctx->version != YESPOWER_0_5) {
for (k = 1; k < r; k++) {
blkcpy(&X[k * 32], &X[(k - 1) * 32], 32);
blockmix_pwxform(&X[k * 32], ctx, 1);
}
}
/* 2: for i = 0 to N - 1 do */
for (i = 0; i < N; i++) {
/* 3: V_i <-- X */
blkcpy(&V[i * s], X, s);
if (i > 1) {
/* j <-- Wrap(Integerify(X), i) */
j = wrap(integerify(X, r), i);
/* X <-- X xor V_j */
blkxor(X, &V[j * s], s);
}
/* 4: X <-- H(X) */
if (V != ctx->S)
blockmix_pwxform(X, ctx, r);
else
blockmix_salsa(X, ctx->salsa20_rounds);
}
/* B' <-- X */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]);
}
/**
* smix2(B, r, N, Nloop, V, X, ctx):
* Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in
* length; the temporary storage V must be 128rN bytes in length; the temporary
* storage X must be 128r bytes in length. The value N must be a power of 2
* greater than 1.
*/
static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop,
uint32_t *V, uint32_t *X, pwxform_ctx_t *ctx)
{
size_t s = 32 * r;
uint32_t i, j;
size_t k;
/* X <-- B */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]);
/* 6: for i = 0 to N - 1 do */
for (i = 0; i < Nloop; i++) {
/* 7: j <-- Integerify(X) mod N */
j = integerify(X, r) & (N - 1);
/* 8.1: X <-- X xor V_j */
blkxor(X, &V[j * s], s);
/* V_j <-- X */
if (Nloop != 2)
blkcpy(&V[j * s], X, s);
/* 8.2: X <-- H(X) */
blockmix_pwxform(X, ctx, r);
}
/* 10: B' <-- X */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]);
}
/**
* smix(B, r, N, p, t, V, X, ctx):
* Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the
* temporary storage V must be 128rN bytes in length; the temporary storage
* X must be 128r bytes in length. The value N must be a power of 2 and at
* least 16.
*/
static void smix(uint32_t *B, size_t r, uint32_t N,
uint32_t *V, uint32_t *X, pwxform_ctx_t *ctx)
{
uint32_t Nloop_all = (N + 2) / 3; /* 1/3, round up */
uint32_t Nloop_rw = Nloop_all;
Nloop_all++; Nloop_all &= ~(uint32_t)1; /* round up to even */
if (ctx->version == YESPOWER_0_5) {
Nloop_rw &= ~(uint32_t)1; /* round down to even */
} else {
Nloop_rw++; Nloop_rw &= ~(uint32_t)1; /* round up to even */
}
smix1(B, 1, ctx->Sbytes / 128, ctx->S, X, ctx);
smix1(B, r, N, V, X, ctx);
smix2(B, r, N, Nloop_rw /* must be > 2 */, V, X, ctx);
smix2(B, r, N, Nloop_all - Nloop_rw /* 0 or 2 */, V, X, ctx);
}
/**
* yespower(local, src, srclen, params, dst):
* Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target".
*
* Return 0 on success; or -1 on error.
*/
int yespower(yespower_local_t *local,
const uint8_t *src, size_t srclen,
const yespower_params_t *params, yespower_binary_t *dst)
{
yespower_version_t version = params->version;
uint32_t N = params->N;
uint32_t r = params->r;
const uint8_t *pers = params->pers;
size_t perslen = params->perslen;
int retval = -1;
size_t B_size, V_size;
uint32_t *B, *V, *X, *S;
pwxform_ctx_t ctx;
uint32_t sha256[8];
memset(dst, 0xff, sizeof(*dst));
/* Sanity-check parameters */
if ((version != YESPOWER_0_5 && version != YESPOWER_1_0) ||
N < 1024 || N > 512 * 1024 || r < 8 || r > 32 ||
(N & (N - 1)) != 0 || r < rmin ||
(!pers && perslen)) {
errno = EINVAL;
return -1;
}
/* Allocate memory */
B_size = (size_t)128 * r;
V_size = B_size * N;
if ((V = malloc(V_size)) == NULL)
return -1;
if ((B = malloc(B_size)) == NULL)
goto free_V;
if ((X = malloc(B_size)) == NULL)
goto free_B;
ctx.version = version;
if (version == YESPOWER_0_5) {
ctx.salsa20_rounds = 8;
ctx.PWXrounds = PWXrounds_0_5;
ctx.Swidth = Swidth_0_5;
ctx.Sbytes = 2 * Swidth_to_Sbytes1(ctx.Swidth);
} else {
ctx.salsa20_rounds = 2;
ctx.PWXrounds = PWXrounds_1_0;
ctx.Swidth = Swidth_1_0;
ctx.Sbytes = 3 * Swidth_to_Sbytes1(ctx.Swidth);
}
if ((S = malloc(ctx.Sbytes)) == NULL)
goto free_X;
ctx.S = S;
ctx.S0 = (uint32_t (*)[2])S;
ctx.S1 = ctx.S0 + (1 << ctx.Swidth) * PWXsimple;
ctx.S2 = ctx.S1 + (1 << ctx.Swidth) * PWXsimple;
ctx.Smask = Swidth_to_Smask(ctx.Swidth);
ctx.w = 0;
SHA256_Buf(src, srclen, (uint8_t *)sha256);
if (version != YESPOWER_0_5) {
if (pers) {
src = pers;
srclen = perslen;
} else {
srclen = 0;
}
}
/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
PBKDF2_SHA256((uint8_t *)sha256, sizeof(sha256),
src, srclen, 1, (uint8_t *)B, B_size);
blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0]));
/* 3: B_i <-- MF(B_i, N) */
smix(B, r, N, V, X, &ctx);
if (version == YESPOWER_0_5) {
/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
PBKDF2_SHA256((uint8_t *)sha256, sizeof(sha256),
(uint8_t *)B, B_size, 1, (uint8_t *)dst, sizeof(*dst));
if (pers) {
HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
(uint8_t *)sha256);
SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
}
} else {
HMAC_SHA256_Buf((uint8_t *)B + B_size - 64, 64,
sha256, sizeof(sha256), (uint8_t *)dst);
}
/* Success! */
retval = 0;
/* Free memory */
free(S);
free_X:
free(X);
free_B:
free(B);
free_V:
free(V);
return retval;
}
int yespower_tls(const uint8_t *src, size_t srclen,
const yespower_params_t *params, yespower_binary_t *dst)
{
/* The reference implementation doesn't use thread-local storage */
return yespower(NULL, src, srclen, params, dst);
}
int yespower_init_local(yespower_local_t *local)
{
/* The reference implementation doesn't use the local structure */
local->base = local->aligned = NULL;
local->base_size = local->aligned_size = 0;
return 0;
}
int yespower_free_local(yespower_local_t *local)
{
/* The reference implementation frees its memory in yespower() */
(void)local; /* unused */
return 0;
}

130
yespower.h Normal file
View File

@@ -0,0 +1,130 @@
/*-
* Copyright 2009 Colin Percival
* Copyright 2013-2018 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#ifndef _YESPOWER_H_
#define _YESPOWER_H_
#include <stdint.h>
#include <stdlib.h> /* for size_t */
#ifdef __cplusplus
extern "C" {
#endif
/**
* Internal type used by the memory allocator. Please do not use it directly.
* Use yespower_local_t instead.
*/
typedef struct {
void *base, *aligned;
size_t base_size, aligned_size;
} yespower_region_t;
/**
* Type for thread-local (RAM) data structure.
*/
typedef yespower_region_t yespower_local_t;
/*
* Type for yespower algorithm version numbers.
*/
typedef enum { YESPOWER_0_5 = 5, YESPOWER_1_0 = 10 } yespower_version_t;
/**
* yespower parameters combined into one struct.
*/
typedef struct {
yespower_version_t version;
uint32_t N, r;
const uint8_t *pers;
size_t perslen;
} yespower_params_t;
/**
* A 256-bit yespower hash.
*/
typedef struct {
unsigned char uc[32];
} yespower_binary_t;
/**
* yespower_init_local(local):
* Initialize the thread-local (RAM) data structure. Actual memory allocation
* is currently fully postponed until a call to yespower().
*
* Return 0 on success; or -1 on error.
*
* MT-safe as long as local is local to the thread.
*/
extern int yespower_init_local(yespower_local_t *local);
/**
* yespower_free_local(local):
* Free memory that may have been allocated for an initialized thread-local
* (RAM) data structure.
*
* Return 0 on success; or -1 on error.
*
* MT-safe as long as local is local to the thread.
*/
extern int yespower_free_local(yespower_local_t *local);
/**
* yespower(local, src, srclen, params, dst):
* Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target".
* local is the thread-local data structure, allowing to preserve and reuse a
* memory allocation across calls, thereby reducing processing overhead.
*
* Return 0 on success; or -1 on error.
*
* local must be initialized with yespower_init_local().
*
* MT-safe as long as local and dst are local to the thread.
*/
extern int yespower(yespower_local_t *local,
const uint8_t *src, size_t srclen,
const yespower_params_t *params, yespower_binary_t *dst);
/**
* yespower_tls(src, srclen, params, dst):
* Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target".
* The memory allocation is maintained internally using thread-local storage.
*
* Return 0 on success; or -1 on error.
*
* MT-safe as long as dst is local to the thread.
*/
extern int yespower_tls(const uint8_t *src, size_t srclen,
const yespower_params_t *params, yespower_binary_t *dst);
#ifdef __cplusplus
}
#endif
#endif /* !_YESPOWER_H_ */