148 lines
5.1 KiB
C
148 lines
5.1 KiB
C
/*-
|
|
* Copyright 2009 Colin Percival
|
|
* Copyright 2013-2018 Alexander Peslyak
|
|
* Copyright 2024 Worgon12
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* This file is part of the Worgon12 project and incorporates code from
|
|
* the Yespower project (https://github.com/openwall/yespower).
|
|
*/
|
|
|
|
#include "yespower.h"
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <string.h> // Für memcpy
|
|
#include <stdio.h>
|
|
#include <limits.h>
|
|
#if defined(__aarch64__) || defined(__ARM_NEON)
|
|
#include <arm_neon.h>
|
|
#endif
|
|
|
|
#if defined(__aarch64__) || defined(__ARM_NEON)
|
|
// ARM-Werte
|
|
int extra_rounds = 1;
|
|
#define MEM_OPS(thread_id) 2500
|
|
#define MEM_PASSES(thread_id) 2
|
|
#else
|
|
// Nicht-ARM: Begrenzung der Single-Thread-Performance
|
|
#define EXTRA_ROUNDS(thread_id) ((thread_id == 0) ? 10 : 10)
|
|
#define MEM_OPS(thread_id) ((thread_id == 0) ? 250000 : 300000)
|
|
#define MEM_PASSES(thread_id) ((thread_id == 0) ? 15 : 12)
|
|
#endif
|
|
|
|
// Speicherzugriffslogik
|
|
void perform_memory_access(uint32_t *data, size_t length, int thread_id) {
|
|
unsigned long long seed = (unsigned long long)(uintptr_t)data + thread_id; // Unterschiedliche Seeds pro Thread
|
|
uint32_t sum = 0;
|
|
|
|
for (int pass = 0; pass < MEM_PASSES(thread_id); pass++) {
|
|
for (int mem_i = 0; mem_i < MEM_OPS(thread_id); mem_i++) {
|
|
seed = (seed * 6364136223846793005ULL + 1ULL);
|
|
size_t idx = (size_t)(seed % length);
|
|
uint32_t val = data[idx];
|
|
val ^= (uint32_t)mem_i;
|
|
data[idx] = val;
|
|
sum += val;
|
|
}
|
|
}
|
|
data[0] ^= sum;
|
|
}
|
|
|
|
// ARM-optimierte Transformation
|
|
void arm_optimized_transform(uint32_t *data, size_t length) {
|
|
#if defined(__aarch64__) || defined(__ARM_NEON)
|
|
size_t vec_length = length - (length % 4);
|
|
uint32x4_t mask = vdupq_n_u32(0x9E3779B9);
|
|
uint32x4_t mul_val = vdupq_n_u32(0x7FEB352D);
|
|
|
|
for (size_t i = 0; i < vec_length; i += 4) {
|
|
uint32x4_t block = vld1q_u32(&data[i]);
|
|
block = veorq_u32(block, mask);
|
|
|
|
uint32x4_t left_shifted = vshlq_n_u32(block, 7);
|
|
uint32x4_t right_shifted = vshrq_n_u32(block, 25);
|
|
block = vorrq_u32(left_shifted, right_shifted);
|
|
|
|
block = vmulq_u32(block, mul_val);
|
|
vst1q_u32(&data[i], block);
|
|
}
|
|
|
|
for (size_t i = vec_length; i < length; i++) {
|
|
uint32_t x = data[i] ^ 0x9E3779B9;
|
|
x = (x << 7) | (x >> (32 - 7));
|
|
x *= 0x7FEB352D;
|
|
data[i] = x;
|
|
}
|
|
#else
|
|
for (size_t i = 0; i < length; i++) {
|
|
uint32_t x = data[i] ^ 0x9E3779B9;
|
|
x = (x << 7) | (x >> (32 - 7));
|
|
x *= 0x7FEB352D;
|
|
data[i] = x;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// yespower mit ARM-Optimierung
|
|
int yespower_with_arm(const uint32_t *header, size_t header_len, uint8_t *output, int thread_id) {
|
|
if (header == NULL || output == NULL) {
|
|
printf("Error: NULL pointer passed to yespower_with_arm\n");
|
|
return -1;
|
|
}
|
|
if (header_len % sizeof(uint32_t) != 0 || header_len > 1024) {
|
|
printf("Error: Invalid header_len %zu\n", header_len);
|
|
return -1;
|
|
}
|
|
|
|
uint32_t *temp = malloc(header_len);
|
|
if (temp == NULL) {
|
|
printf("Error: Memory allocation for temp failed\n");
|
|
return -1;
|
|
}
|
|
memcpy(temp, header, header_len);
|
|
|
|
#if defined(__aarch64__) || defined(__ARM_NEON)
|
|
arm_optimized_transform(temp, header_len / sizeof(uint32_t));
|
|
#else
|
|
for (int r = 0; r < EXTRA_ROUNDS(thread_id); r++) {
|
|
arm_optimized_transform(temp, header_len / sizeof(uint32_t));
|
|
}
|
|
perform_memory_access(temp, header_len / sizeof(uint32_t), thread_id);
|
|
#endif
|
|
|
|
yespower_params_t params = { .version = YESPOWER_1_0, .N = 2048, .r = 8 };
|
|
yespower_binary_t result;
|
|
|
|
int res = yespower_tls((const uint8_t *)temp, header_len, ¶ms, &result);
|
|
if (res != 0) {
|
|
printf("Error: yespower_tls failed with result %d\n", res);
|
|
free(temp);
|
|
return res;
|
|
}
|
|
|
|
memcpy(output, result.uc, sizeof(result.uc));
|
|
free(temp);
|
|
return 0;
|
|
}
|