/*- * Copyright 2009 Colin Percival * Copyright 2013-2018 Alexander Peslyak * Copyright 2024 Worgon12 * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This file is part of the Worgon12 project and incorporates code from * the Yespower project (https://github.com/openwall/yespower). */ #include "yespower.h" #include #include #include // Für memcpy #include #include #if defined(__aarch64__) || defined(__ARM_NEON) #include #endif #if defined(__aarch64__) || defined(__ARM_NEON) // ARM-Werte int extra_rounds = 1; #define MEM_OPS(thread_id) 2500 #define MEM_PASSES(thread_id) 2 #else // Nicht-ARM: Begrenzung der Single-Thread-Performance #define EXTRA_ROUNDS(thread_id) ((thread_id == 0) ? 10 : 10) #define MEM_OPS(thread_id) ((thread_id == 0) ? 250000 : 300000) #define MEM_PASSES(thread_id) ((thread_id == 0) ? 15 : 12) #endif // Speicherzugriffslogik void perform_memory_access(uint32_t *data, size_t length, int thread_id) { unsigned long long seed = (unsigned long long)(uintptr_t)data + thread_id; // Unterschiedliche Seeds pro Thread uint32_t sum = 0; for (int pass = 0; pass < MEM_PASSES(thread_id); pass++) { for (int mem_i = 0; mem_i < MEM_OPS(thread_id); mem_i++) { seed = (seed * 6364136223846793005ULL + 1ULL); size_t idx = (size_t)(seed % length); uint32_t val = data[idx]; val ^= (uint32_t)mem_i; data[idx] = val; sum += val; } } data[0] ^= sum; } // ARM-optimierte Transformation void arm_optimized_transform(uint32_t *data, size_t length) { #if defined(__aarch64__) || defined(__ARM_NEON) size_t vec_length = length - (length % 4); uint32x4_t mask = vdupq_n_u32(0x9E3779B9); uint32x4_t mul_val = vdupq_n_u32(0x7FEB352D); for (size_t i = 0; i < vec_length; i += 4) { uint32x4_t block = vld1q_u32(&data[i]); block = veorq_u32(block, mask); uint32x4_t left_shifted = vshlq_n_u32(block, 7); uint32x4_t right_shifted = vshrq_n_u32(block, 25); block = vorrq_u32(left_shifted, right_shifted); block = vmulq_u32(block, mul_val); vst1q_u32(&data[i], block); } for (size_t i = vec_length; i < length; i++) { uint32_t x = data[i] ^ 0x9E3779B9; x = (x << 7) | (x >> (32 - 7)); x *= 0x7FEB352D; data[i] = x; } #else for (size_t i = 0; i < length; i++) { uint32_t x = data[i] ^ 0x9E3779B9; x = (x << 7) | (x >> (32 - 7)); x *= 0x7FEB352D; data[i] = x; } #endif } // yespower mit ARM-Optimierung int yespower_with_arm(const uint32_t *header, size_t header_len, uint8_t *output, int thread_id) { if (header == NULL || output == NULL) { printf("Error: NULL pointer passed to yespower_with_arm\n"); return -1; } if (header_len % sizeof(uint32_t) != 0 || header_len > 1024) { printf("Error: Invalid header_len %zu\n", header_len); return -1; } uint32_t *temp = malloc(header_len); if (temp == NULL) { printf("Error: Memory allocation for temp failed\n"); return -1; } memcpy(temp, header, header_len); #if defined(__aarch64__) || defined(__ARM_NEON) arm_optimized_transform(temp, header_len / sizeof(uint32_t)); #else for (int r = 0; r < EXTRA_ROUNDS(thread_id); r++) { arm_optimized_transform(temp, header_len / sizeof(uint32_t)); } perform_memory_access(temp, header_len / sizeof(uint32_t), thread_id); #endif yespower_params_t params = { .version = YESPOWER_1_0, .N = 2048, .r = 8 }; yespower_binary_t result; int res = yespower_tls((const uint8_t *)temp, header_len, ¶ms, &result); if (res != 0) { printf("Error: yespower_tls failed with result %d\n", res); free(temp); return res; } memcpy(output, result.uc, sizeof(result.uc)); free(temp); return 0; }