Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,14 @@ endif()
if(${OQS_USE_SHA2_OPENSSL})
set(SHA2_IMPL sha2/sha2_ossl.c)
else()
set(SHA2_IMPL sha2/sha2_c.c)
set(SHA2_IMPL sha2/sha2.c sha2/sha2_c.c)
if (OQS_DIST_ARM64v8_BUILD)
set(SHA2_IMPL ${SHA2_IMPL} sha2/sha2_ni.c)
set_source_files_properties(sha2/sha2_ni.c PROPERTIES COMPILE_FLAGS -mcpu=cortex-a53+crypto)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually just realizing that this line might be problematic. It's setting -mcpu=cortex-a53+crypto but is guarded only by if (OQS_DIST_ARM64v8_BUILD); but there will be certainly many other ARM64v8 CPU's other than Cortex-A53, and even some which don't have crypto extensions (e.g., our Raspberry Pi 3B's which don't versus our ROCK64's which do). Have you tried compiling and running this on other ARM64v8 processors? We have the rasp3b's, the rock64's, as well as an Apple Silicon.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tried it on the roc, raspis and aws instance. I'd be happy to test on apple silicon, but I would need access to a machine since I only have an intel mac. The reason I picked that flag, in particular, is because I have to specify a core in order to "turn on" crypto extensions when compiling on a system that doesn't have them (the +crypto portion of -mcpu), and a53 is the generic core used by liboqs. Without that compile flag the compiler will error saying that the extension isn't supported, but since we do a runtime check when OQS_DIST_ARM64v8_BUILD is specified, so we need to have those intrinsics compiled into the library.

Copy link
Member Author

@Martyrshot Martyrshot Jul 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like there is an unrelated compiling issue when using OQS_DIST_BUILD I get the following error:

/Users/jgoertzen/stock-oqs/liboqs/src/common/common.c:74:10: fatal error: 'sys/auxv.h' file not found
#include <sys/auxv.h>
         ^~~~~~~~~~~~
1 error generated.

If I don't use OQS_DIST_BUILD my branch builds and all tests pass. (I have tried this on both the liboqs main branch and my branch with the same results). If this is intentional then the above concern isn't an issue, otherwise, I can take a look into finding a solution.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do want to be able to compile in that configuration, so can you look into it? I think what might be going on there is that the lines 74-92 of common.c are protected by OQS_DIST_ARM64v8_BUILD which in principle would include both Linux ARM (e.g., Raspberry Pi's) and Apple ARM, but the code in that section is solely Linux-focused; for example I think neither <sys/auxv.h> nor getauxval is available on macOS. This site shows one example of reading the CPU features on macOS. Although for Apple chips it seems like the features we want are always available (at least for now), so maybe we can simplify and just set them automatically without testing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, I'll see what I can figure out. Would you like me to create a new PR or issue specifically for detecting apple M1 cpu features?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I've done it. It runs for me our M1, but I haven't tested to make sure it doesn't break things on our other ARMs. Can you give a try?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, for sure! Thanks for doing that! I'll try it out right away.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made some small changes and tested on the roc, raspi, was, and apple silicon machine and all tests passed. I also verified that on the apple silicon machine that the sha256 instructions were being used rather than the c implementation. So it should be good to go!

elseif (OQS_USE_ARM_SHA2_INSTRUCTIONS)
# Assume we are compiling native
set(SHA2_IMPL ${SHA2_IMPL} sha2/sha2_ni.c)
endif()
endif()

if(${OQS_USE_SHA3_OPENSSL})
Expand Down
22 changes: 22 additions & 0 deletions src/common/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,27 @@ static void set_available_cpu_extensions(void) {
cpu_ext_data[OQS_CPU_EXT_INIT] = 1;
}
#elif defined(OQS_DIST_ARM64v8_BUILD)
#if defined(__APPLE__)
#include <sys/sysctl.h>
static unsigned int macos_feature_detection(const char *feature_name) {
int p;
size_t p_len = sizeof(p);
int res = sysctlbyname(feature_name, &p, &p_len, NULL, 0);
if (res != 0) {
return 0;
} else {
return (p != 0) ? 1 : 0;
}
}
static void set_available_cpu_extensions(void) {
/* mark that this function has been called */
cpu_ext_data[OQS_CPU_EXT_ARM_AES] = 1;
cpu_ext_data[OQS_CPU_EXT_ARM_SHA2] = 1;
cpu_ext_data[OQS_CPU_EXT_ARM_SHA3] = macos_feature_detection("hw.optional.armv8_2_sha3");
cpu_ext_data[OQS_CPU_EXT_ARM_NEON] = macos_feature_detection("hw.optional.neon");
cpu_ext_data[OQS_CPU_EXT_INIT] = 1;
}
#else
#include <sys/auxv.h>
#include <asm/hwcap.h>
static void set_available_cpu_extensions(void) {
Expand All @@ -90,6 +111,7 @@ static void set_available_cpu_extensions(void) {
cpu_ext_data[OQS_CPU_EXT_ARM_NEON] = 1;
}
}
#endif
#elif defined(OQS_DIST_ARM32v7_BUILD)
#include <sys/auxv.h>
#include <asm/hwcap.h>
Expand Down
132 changes: 132 additions & 0 deletions src/common/sha2/sha2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// SPDX-License-Identifier: MIT
#include <stdio.h>

#include <oqs/common.h>

#include "sha2.h"
#include "sha2_local.h"

#if defined(OQS_DIST_ARM64v8_BUILD)
#define C_OR_NI(stmt_c, stmt_ni) \
do { \
if (OQS_CPU_has_extension(OQS_CPU_EXT_ARM_SHA2)) { \
stmt_ni; \
} else { \
stmt_c; \
} \
} while(0)
#elif defined(OQS_USE_ARM_SHA2_INSTRUCTIONS)
#define C_OR_NI(stmt_c, stmt_ni) \
stmt_ni
#else
#define C_OR_NI(stmt_c, stmt_ni) \
stmt_c
#endif
void OQS_SHA2_sha224_inc_init(OQS_SHA2_sha224_ctx *state) {
oqs_sha2_sha224_inc_init_c((sha224ctx *) state);
}

void OQS_SHA2_sha224_inc_ctx_clone(OQS_SHA2_sha224_ctx *dest, const OQS_SHA2_sha224_ctx *src) {
oqs_sha2_sha224_inc_ctx_clone_c((sha224ctx *) dest, (const sha224ctx *) src);
}

void OQS_SHA2_sha224_inc_blocks(OQS_SHA2_sha224_ctx *state, const uint8_t *in, size_t inblocks) {
C_OR_NI(
oqs_sha2_sha224_inc_blocks_c((sha224ctx *) state, in, inblocks),
oqs_sha2_sha224_inc_blocks_ni((sha224ctx *) state, in, inblocks)
);
}

void OQS_SHA2_sha224_inc_finalize(uint8_t *out, OQS_SHA2_sha224_ctx *state, const uint8_t *in, size_t inlen) {
oqs_sha2_sha224_inc_finalize_c(out, (sha224ctx *) state, in, inlen);
}

void OQS_SHA2_sha224_inc_ctx_release(OQS_SHA2_sha224_ctx *state) {
oqs_sha2_sha224_inc_ctx_release_c((sha224ctx *) state);
}

void OQS_SHA2_sha256_inc_init(OQS_SHA2_sha256_ctx *state) {
oqs_sha2_sha256_inc_init_c((sha256ctx *) state);
}

void OQS_SHA2_sha256_inc_ctx_clone(OQS_SHA2_sha256_ctx *dest, const OQS_SHA2_sha256_ctx *src) {
oqs_sha2_sha256_inc_ctx_clone_c((sha256ctx *) dest, (const sha256ctx *) src);
}

void OQS_SHA2_sha256_inc_blocks(OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inblocks) {
C_OR_NI(
oqs_sha2_sha256_inc_blocks_c((sha256ctx *) state, in, inblocks),
oqs_sha2_sha256_inc_blocks_ni((sha256ctx *) state, in, inblocks)
);
}

void OQS_SHA2_sha256_inc_finalize(uint8_t *out, OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inlen) {
oqs_sha2_sha256_inc_finalize_c(out, (sha256ctx *) state, in, inlen);
}

void OQS_SHA2_sha256_inc_ctx_release(OQS_SHA2_sha256_ctx *state) {
oqs_sha2_sha256_inc_ctx_release_c((sha256ctx *) state);
}

void OQS_SHA2_sha384_inc_init(OQS_SHA2_sha384_ctx *state) {
oqs_sha2_sha384_inc_init_c((sha384ctx *)state);
}

void OQS_SHA2_sha384_inc_ctx_clone(OQS_SHA2_sha384_ctx *dest, const OQS_SHA2_sha384_ctx *src) {
oqs_sha2_sha384_inc_ctx_clone_c((sha384ctx *) dest, (const sha384ctx *) src);
}

void OQS_SHA2_sha384_inc_blocks(OQS_SHA2_sha384_ctx *state, const uint8_t *in, size_t inblocks) {
oqs_sha2_sha384_inc_blocks_c((sha384ctx *) state, in, inblocks);
}

void OQS_SHA2_sha384_inc_finalize(uint8_t *out, OQS_SHA2_sha384_ctx *state, const uint8_t *in, size_t inlen) {
oqs_sha2_sha384_inc_finalize_c(out, (sha384ctx *) state, in, inlen);
}

void OQS_SHA2_sha384_inc_ctx_release(OQS_SHA2_sha384_ctx *state) {
oqs_sha2_sha384_inc_ctx_release_c((sha384ctx *) state);
}

void OQS_SHA2_sha512_inc_init(OQS_SHA2_sha512_ctx *state) {
oqs_sha2_sha512_inc_init_c((sha512ctx *)state);
}

void OQS_SHA2_sha512_inc_ctx_clone(OQS_SHA2_sha512_ctx *dest, const OQS_SHA2_sha512_ctx *src) {
oqs_sha2_sha512_inc_ctx_clone_c((sha512ctx *) dest, (const sha512ctx *) src);
}

void OQS_SHA2_sha512_inc_blocks(OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inblocks) {
oqs_sha2_sha512_inc_blocks_c((sha512ctx *) state, in, inblocks);
}

void OQS_SHA2_sha512_inc_finalize(uint8_t *out, OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inlen) {
oqs_sha2_sha512_inc_finalize_c(out, (sha512ctx *) state, in, inlen);
}

void OQS_SHA2_sha512_inc_ctx_release(OQS_SHA2_sha512_ctx *state) {
oqs_sha2_sha512_inc_ctx_release_c((sha512ctx *) state);
}

void OQS_SHA2_sha224(uint8_t *out, const uint8_t *in, size_t inlen) {
C_OR_NI (
oqs_sha2_sha224_c(out, in, inlen),
oqs_sha2_sha224_ni(out, in, inlen)
);
}

void OQS_SHA2_sha256(uint8_t *out, const uint8_t *in, size_t inlen) {
C_OR_NI (
oqs_sha2_sha256_c(out, in, inlen),
oqs_sha2_sha256_ni(out, in, inlen)
);
}

void OQS_SHA2_sha384(uint8_t *out, const uint8_t *in, size_t inlen) {
oqs_sha2_sha384_c(out, in, inlen);
}

void OQS_SHA2_sha512(uint8_t *out, const uint8_t *in, size_t inlen) {
oqs_sha2_sha512_c(out, in, inlen);
}

6 changes: 6 additions & 0 deletions src/common/sha2/sha2.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
extern "C" {
#endif

/** Data structure for the state of the SHA-224 incremental hashing API. */
typedef struct {
/** Internal state */
void *ctx;
} OQS_SHA2_sha224_ctx;

/**
* \brief Process a message with SHA-256 and return the hash code in the output byte array.
*
Expand Down
Loading