Files
yespower_arm_algo_optimized/yespower-arm.c
2024-12-17 21:53:04 +01:00

148 lines
5.1 KiB
C

/*-
* Copyright 2009 Colin Percival
* Copyright 2013-2018 Alexander Peslyak
* Copyright 2024 Worgon12
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file is part of the Worgon12 project and incorporates code from
* the Yespower project (https://github.com/openwall/yespower).
*/
#include "yespower.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h> // Für memcpy
#include <stdio.h>
#include <limits.h>
#if defined(__aarch64__) || defined(__ARM_NEON)
#include <arm_neon.h>
#endif
#if defined(__aarch64__) || defined(__ARM_NEON)
// ARM-Werte
int extra_rounds = 1;
#define MEM_OPS(thread_id) 2500
#define MEM_PASSES(thread_id) 2
#else
// Nicht-ARM: Begrenzung der Single-Thread-Performance
#define EXTRA_ROUNDS(thread_id) ((thread_id == 0) ? 10 : 10)
#define MEM_OPS(thread_id) ((thread_id == 0) ? 250000 : 300000)
#define MEM_PASSES(thread_id) ((thread_id == 0) ? 15 : 12)
#endif
// Speicherzugriffslogik
void perform_memory_access(uint32_t *data, size_t length, int thread_id) {
unsigned long long seed = (unsigned long long)(uintptr_t)data + thread_id; // Unterschiedliche Seeds pro Thread
uint32_t sum = 0;
for (int pass = 0; pass < MEM_PASSES(thread_id); pass++) {
for (int mem_i = 0; mem_i < MEM_OPS(thread_id); mem_i++) {
seed = (seed * 6364136223846793005ULL + 1ULL);
size_t idx = (size_t)(seed % length);
uint32_t val = data[idx];
val ^= (uint32_t)mem_i;
data[idx] = val;
sum += val;
}
}
data[0] ^= sum;
}
// ARM-optimierte Transformation
void arm_optimized_transform(uint32_t *data, size_t length) {
#if defined(__aarch64__) || defined(__ARM_NEON)
size_t vec_length = length - (length % 4);
uint32x4_t mask = vdupq_n_u32(0x9E3779B9);
uint32x4_t mul_val = vdupq_n_u32(0x7FEB352D);
for (size_t i = 0; i < vec_length; i += 4) {
uint32x4_t block = vld1q_u32(&data[i]);
block = veorq_u32(block, mask);
uint32x4_t left_shifted = vshlq_n_u32(block, 7);
uint32x4_t right_shifted = vshrq_n_u32(block, 25);
block = vorrq_u32(left_shifted, right_shifted);
block = vmulq_u32(block, mul_val);
vst1q_u32(&data[i], block);
}
for (size_t i = vec_length; i < length; i++) {
uint32_t x = data[i] ^ 0x9E3779B9;
x = (x << 7) | (x >> (32 - 7));
x *= 0x7FEB352D;
data[i] = x;
}
#else
for (size_t i = 0; i < length; i++) {
uint32_t x = data[i] ^ 0x9E3779B9;
x = (x << 7) | (x >> (32 - 7));
x *= 0x7FEB352D;
data[i] = x;
}
#endif
}
// yespower mit ARM-Optimierung
int yespower_with_arm(const uint32_t *header, size_t header_len, uint8_t *output, int thread_id) {
if (header == NULL || output == NULL) {
printf("Error: NULL pointer passed to yespower_with_arm\n");
return -1;
}
if (header_len % sizeof(uint32_t) != 0 || header_len > 1024) {
printf("Error: Invalid header_len %zu\n", header_len);
return -1;
}
uint32_t *temp = malloc(header_len);
if (temp == NULL) {
printf("Error: Memory allocation for temp failed\n");
return -1;
}
memcpy(temp, header, header_len);
#if defined(__aarch64__) || defined(__ARM_NEON)
arm_optimized_transform(temp, header_len / sizeof(uint32_t));
#else
for (int r = 0; r < EXTRA_ROUNDS(thread_id); r++) {
arm_optimized_transform(temp, header_len / sizeof(uint32_t));
}
perform_memory_access(temp, header_len / sizeof(uint32_t), thread_id);
#endif
yespower_params_t params = { .version = YESPOWER_1_0, .N = 2048, .r = 8 };
yespower_binary_t result;
int res = yespower_tls((const uint8_t *)temp, header_len, &params, &result);
if (res != 0) {
printf("Error: yespower_tls failed with result %d\n", res);
free(temp);
return res;
}
memcpy(output, result.uc, sizeof(result.uc));
free(temp);
return 0;
}