Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
cmake_minimum_required(VERSION 3.0)

if(APPLE)
execute_process(COMMAND uname -m OUTPUT_VARIABLE arch OUTPUT_STRIP_TRAILING_WHITESPACE)
set(HOST_ARCHITECTURE "${arch}")
string(TOLOWER ${HOST_ARCHITECTURE} HOST_ARCHITECTURE)
endif()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the correct location for this code, and at least with CMake 3.21 this code also isn't necessary for Apple ARM64 support; CMAKE_SYSTEM_PROCESSOR is correctly set to arm64 just as you're doing here. IDK about prior CMake, except that the variable CMAKE_SYSTEM_PROCESSOR has been around for a long time and seems to represent what you're trying to do here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, CMake as added the M1 support in 3.19 according to https://cmake.org/cmake/help/latest/release/3.19.html#platforms

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While that may be true, still with CMake 3.22.0, I get this build error on an M1 MacBook Pro:

npm ERR! gyp info using node-gyp@8.4.1
npm ERR! gyp info using node@17.2.0 | darwin | arm64
npm ERR! gyp info find Python using Python version 3.9.9 found at "/opt/homebrew/opt/python@3.9/bin/python3.9"
npm ERR! gyp info spawn /opt/homebrew/opt/python@3.9/bin/python3.9
npm ERR! gyp info spawn args [
npm ERR! gyp info spawn args   '/Users/jwatte/source/repos/cloud-instance/node_modules/node-gyp/gyp/gyp_main.py',
npm ERR! gyp info spawn args   'binding.gyp',
npm ERR! gyp info spawn args   '-f',
npm ERR! gyp info spawn args   'make',
npm ERR! gyp info spawn args   '-I',
npm ERR! gyp info spawn args   '/Users/jwatte/source/repos/cloud-instance/node_modules/cpu-features/build/config.gypi',
npm ERR! gyp info spawn args   '-I',
npm ERR! gyp info spawn args   '/Users/jwatte/source/repos/cloud-instance/node_modules/node-gyp/addon.gypi',
npm ERR! gyp info spawn args   '-I',
npm ERR! gyp info spawn args   '/Users/jwatte/Library/Caches/node-gyp/17.2.0/include/node/common.gypi',
npm ERR! gyp info spawn args   '-Dlibrary=shared_library',
npm ERR! gyp info spawn args   '-Dvisibility=default',
npm ERR! gyp info spawn args   '-Dnode_root_dir=/Users/jwatte/Library/Caches/node-gyp/17.2.0',
npm ERR! gyp info spawn args   '-Dnode_gyp_dir=/Users/jwatte/source/repos/cloud-instance/node_modules/node-gyp',
npm ERR! gyp info spawn args   '-Dnode_lib_file=/Users/jwatte/Library/Caches/node-gyp/17.2.0/<(target_arch)/node.lib',
npm ERR! gyp info spawn args   '-Dmodule_root_dir=/Users/jwatte/source/repos/cloud-instance/node_modules/cpu-features',
npm ERR! gyp info spawn args   '-Dnode_engine=v8',
npm ERR! gyp info spawn args   '--depth=.',
npm ERR! gyp info spawn args   '--no-parallel',
npm ERR! gyp info spawn args   '--generator-output',
npm ERR! gyp info spawn args   'build',
npm ERR! gyp info spawn args   '-Goutput_dir=.'
npm ERR! gyp info spawn args ]
npm ERR! gyp info spawn make
npm ERR! gyp info spawn args [ 'BUILDTYPE=Release', '-C', 'build' ]
npm ERR! In file included from /Users/jwatte/source/repos/cloud-instance/node_modules/cpu-features/deps/cpu_features/src/cpuinfo_arm.c:15:
npm ERR! /Users/jwatte/source/repos/cloud-instance/node_modules/cpu-features/deps/cpu_features/include/cpuinfo_arm.h:118:2: error: "Including cpuinfo_arm.h from a non-arm target."
npm ERR! #error "Including cpuinfo_arm.h from a non-arm target."
npm ERR!  ^
npm ERR! 1 error generated.
npm ERR! make[3]: *** [CMakeFiles/cpu_features.dir/src/cpuinfo_arm.c.o] Error 1

# option() honors normal variables.
# see: https://cmake.org/cmake/help/git-stage/policy/CMP0077.html
if(POLICY CMP0077)
Expand Down Expand Up @@ -55,16 +61,20 @@ set(PROCESSOR_IS_AARCH64 FALSE)
set(PROCESSOR_IS_X86 FALSE)
set(PROCESSOR_IS_POWER FALSE)

if(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips")
set(PROCESSOR_IS_MIPS TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set(PROCESSOR_IS_ARM TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64")
if(HOST_ARCHITECTURE MATCHES "^arm64")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the above code, this chunk can be reverted back to original ... with 2 changes as noted below

set(PROCESSOR_IS_AARCH64 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
set(PROCESSOR_IS_X86 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
set(PROCESSOR_IS_POWER TRUE)
else()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips")
set(PROCESSOR_IS_MIPS TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

swap this and the next elseif entry

set(PROCESSOR_IS_ARM TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change this to (^aarch64)|(arm64)

set(PROCESSOR_IS_AARCH64 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
set(PROCESSOR_IS_X86 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
set(PROCESSOR_IS_POWER TRUE)
endif()
endif()

macro(add_cpu_features_headers_and_sources HDRS_LIST_NAME SRCS_LIST_NAME)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been editing things to get this building on M1 (actually Universal) and I had to change the way files are included because generating a project as it stands currently, it'll pick one of the architecture's sets of files to include, but then if you build as universal it obviously does not have the required files for the other architecture. So I changed it locally so that all the files are always included and then added #if per platform in the various platform files.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and there are also issues with including the libraries because it decides based on the host architecture for some of it (unix_based_hardware_detection) so it can fail to build in those cases too.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah CMake universal builds are a bit of a pain. They are 1 build, making universal targets, so anything done at configure time is probably incorrect. I found that you basically have to move all this stuff ot compile time with ifdefs.

Copy link
Collaborator

@Mizux Mizux Jan 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my 2 cents,
CMAKE_SYSTEM_PROCESSOR is for target system and it's usually part of a cmake cross-toolchain otherwise you have the CMAKE_HOST_SYSTEM_PROCESSOR...
ref: https://cmake.org/cmake/help/latest/variable/CMAKE_SYSTEM_PROCESSOR.html

For universal platform, i don't know if we could/should introduce a "universal" cmake system processor and propagate it accordingly than including everything and using preprocessor everywhere...

Copy link

@ghost ghost Jan 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way to build for universal currently means invoking xcodebuild with dual architecture. This is well after cmake has decided which files to include. I know the CMake guys have been doing some work on all this, but its still early days. Think we just have to suck it up for now.

Besides, I've always found writing cross-platform code is far easier when its all visible in a project.

Copy link
Collaborator

@Mizux Mizux Jan 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way to build for universal currently means invoking xcodebuild with dual architecture. This is well after cmake has decided which files to include.

I means, instead of adding all files we could simply add files supported by their "universal 2" build, I'm not sure Apple support (mips, ppc, arm, arm64 and x86)...
EDIT: seems there is universal(i386,ppc) and universal2 (x86_64; arm64)
ref: https://developer.apple.com/documentation/xcode/building_a_universal_macos_binary

I know the CMake guys have been doing some work on all this, but its still early days. Think we just have to suck it up for now.

I think you must talk about CMAKE_OSX_ARCHITECTURES

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I've tried using that to target a specific target and that's ok, but it does not quite work yet. ie. you can't tell it x86_64;arm64 as it complains. Hence the xcodebuild route.

Expand Down Expand Up @@ -146,7 +156,7 @@ set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ${BUILD_PIC}
target_include_directories(cpu_features
PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/cpu_features>
)
if(PROCESSOR_IS_X86)
if(PROCESSOR_IS_X86 OR PROCESSOR_IS_AARCH64)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know you're following the original code here, but it would make more sense to instead do just a single if here since the clauses are just about APPLE, e.g.:

if(APPLE AND (PROCESSOR_IS_X86 OR PROCESSOR_IS_AARCH64))

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I'm not great with Cmake yet. I'm sure there will be more changes, but currently it passes the build checks so I'll probably make the change locally and wait on it to submit until we get any more feedback.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My comment here is still valid

if(APPLE)
target_compile_definitions(cpu_features PRIVATE HAVE_SYSCTLBYNAME)
endif()
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ flags : aes,avx,cx16,smx,sse4_1,sse4_2,ssse3
| Android | yes² | yes¹ | yes¹ | yes¹ | N/A |
| iOS | N/A | not yet | not yet | N/A | N/A |
| Linux | yes² | yes¹ | yes¹ | yes¹ | yes¹ |
| MacOs | yes² | N/A | not yet | N/A | no |
| MacOs | yes² | N/A | yes² | N/A | no |

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we're modifying this line, let's use macOS instead of MacOs

| Windows | yes² | not yet | not yet | N/A | N/A |

1. **Features revealed from Linux.** We gather data from several sources
Expand Down
4 changes: 4 additions & 0 deletions include/cpu_features_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
#define CPU_FEATURES_ARCH_ARM
#endif

#if (defined(__APPLE__) && defined(__arm64__))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I for one would prefer to join this into the next statement, as

#if (defined(__aarch64__) || (defined(__APPLE__) && defined(__arm64__)))

#define CPU_FEATURES_ARCH_AARCH64
#endif

#if defined(__aarch64__)
#define CPU_FEATURES_ARCH_AARCH64
#endif
Expand Down
48 changes: 48 additions & 0 deletions src/cpuinfo_aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,35 @@
#include "internal/stack_line_reader.h"
#include "internal/string_view.h"

// The following includes are necessary to provide SSE detections on pre-AVX
// microarchitectures.
#if defined(CPU_FEATURES_OS_DARWIN)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prefer to join this #if (for CPU_FEATURES_OS_DARWIN) and the next for CPU_FEATURES_OS_DARWIN together into 1 clump. Yes, there are places elsewhere in the code where these are separate, but there is also other code between then. Here, it's simple to join them together.

#if !defined(HAVE_SYSCTLBYNAME)
#error "Darwin needs support for sysctlbyname"
#endif
#include <sys/sysctl.h>
#endif // CPU_FEATURES_OS

#if defined(CPU_FEATURES_OS_DARWIN)
#if defined(CPU_FEATURES_MOCK_CPUID_ARM64)
extern bool GetDarwinSysCtlByName(const char*);
extern int GetDarwinSysCtlByNameValue(const char*);
#else // CPU_FEATURES_MOCK_CPUID_ARM64
static bool GetDarwinSysCtlByName(const char* name) {
int enabled;
size_t enabled_len = sizeof(enabled);
const int failure = sysctlbyname(name, &enabled, &enabled_len, NULL, 0);
return failure ? false : enabled;
}
static int GetDarwinSysCtlByNameValue(const char* name) {
int enabled;
size_t enabled_len = sizeof(enabled);
const int failure = sysctlbyname(name, &enabled, &enabled_len, NULL, 0);
return failure ? 0 : enabled;
}
#endif
#endif // CPU_FEATURES_OS_DARWIN

// Generation of feature's getters/setters functions and kGetters, kSetters,
// kCpuInfoFlags and kHardwareCapabilities global tables.
#define DEFINE_TABLE_FEATURES \
Expand Down Expand Up @@ -125,6 +154,23 @@ Aarch64Info GetAarch64Info(void) {
// /proc/cpuinfo).
Aarch64Info info = kEmptyAarch64Info;

#if defined(CPU_FEATURES_OS_DARWIN)
// Handling Darwin platform through sysctlbyname.
info.implementer = GetDarwinSysCtlByNameValue("hw.cputype");
info.variant = GetDarwinSysCtlByNameValue("hw.cpusubtype");
info.part = GetDarwinSysCtlByNameValue("hw.cpufamily");
info.revision = GetDarwinSysCtlByNameValue("hw.cpusubfamily");

info.features.fp = GetDarwinSysCtlByName("hw.optional.floatingpoint");
info.features.fphp = GetDarwinSysCtlByName("hw.optional.neon_hpfp");
info.features.sha512 = GetDarwinSysCtlByName("hw.optional.armv8_2_sha512");
info.features.atomics = GetDarwinSysCtlByName("hw.optional.armv8_1_atomics");
info.features.asimdfhm = GetDarwinSysCtlByName("hw.optional.armv8_2_fhm");
info.features.sha3 = GetDarwinSysCtlByName("hw.optional.armv8_2_sha3");
info.features.crc32 = GetDarwinSysCtlByName("hw.optional.armv8_crc32");

#else

FillProcCpuInfoData(&info);
const HardwareCapabilities hwcaps = CpuFeatures_GetHardwareCapabilities();
for (size_t i = 0; i < AARCH64_LAST_; ++i) {
Expand All @@ -133,6 +179,8 @@ Aarch64Info GetAarch64Info(void) {
}
}

#endif

return info;
}

Expand Down
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ endif()
## cpuinfo_aarch64_test
if(PROCESSOR_IS_AARCH64)
add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/cpuinfo_aarch64.c)
if(APPLE)
target_compile_definitions(cpuinfo_aarch64_test PUBLIC CPU_FEATURES_MOCK_CPUID_ARM64)
target_compile_definitions(cpuinfo_aarch64_test PRIVATE HAVE_SYSCTLBYNAME)
endif()
target_link_libraries(cpuinfo_aarch64_test all_libraries)
add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test)
endif()
Expand Down
112 changes: 112 additions & 0 deletions test/cpuinfo_aarch64_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,116 @@
namespace cpu_features {
namespace {

#if defined(CPU_FEATURES_OS_DARWIN)

class FakeCpu {
public:
bool GetDarwinSysCtlByName(std::string name) const {
return darwin_sysctlbyname_.count(name);
}

int GetDarwinSysCtlByNameValue(std::string name) const {
std::map<std::string, int>::const_iterator iter =
darwin_sysctlbynamevalue_.find(name);
if (iter != std::end(darwin_sysctlbynamevalue_)) {
return iter->second;
}

return 0;
}

void SetDarwinSysCtlByName(std::string name) {
darwin_sysctlbyname_.insert(name);
}

void SetDarwinSysCtlByNameValue(std::string name, int value) {
darwin_sysctlbynamevalue_[name] = value;
}

private:
std::set<std::string> darwin_sysctlbyname_;
std::map<std::string, int> darwin_sysctlbynamevalue_;
};

FakeCpu* g_fake_cpu = nullptr;

extern "C" bool GetDarwinSysCtlByName(const char* name) {
return g_fake_cpu->GetDarwinSysCtlByName(name);
}

extern "C" int GetDarwinSysCtlByNameValue(const char* name) {
return g_fake_cpu->GetDarwinSysCtlByNameValue(name);
}

class CpuinfoAarch64Test : public ::testing::Test {
protected:
void SetUp() override { g_fake_cpu = new FakeCpu(); }
void TearDown() override { delete g_fake_cpu; }
};

TEST_F(CpuinfoAarch64Test, FromDarwinSysctlFromName) {
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.floatingpoint");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.neon");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.neon_hpfp");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.neon_fp16");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.armv8_1_atomics");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.armv8_crc32");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.armv8_2_fhm");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.armv8_2_sha512");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.armv8_2_sha3");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.amx_version");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.ucnormal_mem");
g_fake_cpu->SetDarwinSysCtlByName("hw.optional.arm64");

g_fake_cpu->SetDarwinSysCtlByNameValue("hw.cputype", 16777228);
g_fake_cpu->SetDarwinSysCtlByNameValue("hw.cpusubtype", 2);
g_fake_cpu->SetDarwinSysCtlByNameValue("hw.cpu64bit", 1);
g_fake_cpu->SetDarwinSysCtlByNameValue("hw.cpufamily", 458787763);
g_fake_cpu->SetDarwinSysCtlByNameValue("hw.cpusubfamily", 2);

const auto info = GetAarch64Info();

EXPECT_EQ(info.implementer, 0x100000C);
EXPECT_EQ(info.variant, 2);
EXPECT_EQ(info.part, 0x1B588BB3);
EXPECT_EQ(info.revision, 2);

EXPECT_TRUE(info.features.fp);
EXPECT_FALSE(info.features.asimd);
EXPECT_FALSE(info.features.evtstrm);
EXPECT_FALSE(info.features.aes);
EXPECT_FALSE(info.features.pmull);
EXPECT_FALSE(info.features.sha1);
EXPECT_FALSE(info.features.sha2);
EXPECT_TRUE(info.features.crc32);
EXPECT_TRUE(info.features.atomics);
EXPECT_TRUE(info.features.fphp);
EXPECT_FALSE(info.features.asimdhp);
EXPECT_FALSE(info.features.cpuid);
EXPECT_FALSE(info.features.asimdrdm);
EXPECT_FALSE(info.features.jscvt);
EXPECT_FALSE(info.features.fcma);
EXPECT_FALSE(info.features.lrcpc);
EXPECT_FALSE(info.features.dcpop);
EXPECT_TRUE(info.features.sha3);
EXPECT_FALSE(info.features.sm3);
EXPECT_FALSE(info.features.sm4);
EXPECT_FALSE(info.features.asimddp);
EXPECT_TRUE(info.features.sha512);
EXPECT_FALSE(info.features.sve);
EXPECT_TRUE(info.features.asimdfhm);
EXPECT_FALSE(info.features.dit);
EXPECT_FALSE(info.features.uscat);
EXPECT_FALSE(info.features.ilrcpc);
EXPECT_FALSE(info.features.flagm);
EXPECT_FALSE(info.features.ssbs);
EXPECT_FALSE(info.features.sb);
EXPECT_FALSE(info.features.paca);
EXPECT_FALSE(info.features.pacg);
}

#else

void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }

TEST(CpuinfoAarch64Test, FromHardwareCap) {
Expand Down Expand Up @@ -168,5 +278,7 @@ CPU revision : 3)");
EXPECT_FALSE(info.features.mte);
}

#endif

} // namespace
} // namespace cpu_features