initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
377
thirdparty/embree/common/sys/alloc.cpp
vendored
Normal file
377
thirdparty/embree/common/sys/alloc.cpp
vendored
Normal file
@@ -0,0 +1,377 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "alloc.h"
|
||||
#include "intrinsics.h"
|
||||
#include "sysinfo.h"
|
||||
#include "mutex.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// All Platforms
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace embree
|
||||
{
|
||||
void* alignedMalloc(size_t size, size_t align)
|
||||
{
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
assert((align & (align-1)) == 0);
|
||||
void* ptr = _mm_malloc(size,align);
|
||||
if (size != 0 && ptr == nullptr)
|
||||
abort(); //throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void alignedFree(void* ptr)
|
||||
{
|
||||
if (ptr) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode)
|
||||
{
|
||||
assert(context);
|
||||
assert(device);
|
||||
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
assert((align & (align-1)) == 0);
|
||||
|
||||
void* ptr = nullptr;
|
||||
if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
|
||||
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
|
||||
else
|
||||
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
|
||||
|
||||
if (size != 0 && ptr == nullptr)
|
||||
abort(); //throw std::bad_alloc();
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type)
|
||||
{
|
||||
assert(context);
|
||||
assert(device);
|
||||
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
assert((align & (align-1)) == 0);
|
||||
|
||||
void* ptr = nullptr;
|
||||
if (type == EmbreeMemoryType::USM_SHARED) {
|
||||
if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
|
||||
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
|
||||
else
|
||||
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
|
||||
}
|
||||
else if (type == EmbreeMemoryType::USM_HOST) {
|
||||
ptr = sycl::aligned_alloc_host(align,size,*context);
|
||||
}
|
||||
else if (type == EmbreeMemoryType::USM_DEVICE) {
|
||||
ptr = sycl::aligned_alloc_device(align,size,*device,*context);
|
||||
}
|
||||
else {
|
||||
ptr = alignedMalloc(size,align);
|
||||
}
|
||||
|
||||
if (size != 0 && ptr == nullptr)
|
||||
abort(); //throw std::bad_alloc();
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void alignedSYCLFree(sycl::context* context, void* ptr)
|
||||
{
|
||||
assert(context);
|
||||
if (ptr) {
|
||||
sycl::usm::alloc type = sycl::get_pointer_type(ptr, *context);
|
||||
if (type == sycl::usm::alloc::host || type == sycl::usm::alloc::device || type == sycl::usm::alloc::shared)
|
||||
sycl::free(ptr,*context);
|
||||
else {
|
||||
alignedFree(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static bool huge_pages_enabled = false;
|
||||
static MutexSys os_init_mutex;
|
||||
|
||||
__forceinline bool isHugePageCandidate(const size_t bytes)
|
||||
{
|
||||
if (!huge_pages_enabled)
|
||||
return false;
|
||||
|
||||
/* use huge pages only when memory overhead is low */
|
||||
const size_t hbytes = (bytes+PAGE_SIZE_2M-1) & ~size_t(PAGE_SIZE_2M-1);
|
||||
return 66*(hbytes-bytes) < bytes; // at most 1.5% overhead
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Windows Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#include <malloc.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
bool win_enable_selockmemoryprivilege (bool verbose)
|
||||
{
|
||||
HANDLE hToken;
|
||||
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY | TOKEN_ADJUST_PRIVILEGES, &hToken)) {
|
||||
if (verbose) std::cout << "WARNING: OpenProcessToken failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
TOKEN_PRIVILEGES tp;
|
||||
tp.PrivilegeCount = 1;
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
|
||||
if (!LookupPrivilegeValueW(nullptr, L"SeLockMemoryPrivilege", &tp.Privileges[0].Luid)) {
|
||||
if (verbose) std::cout << "WARNING: LookupPrivilegeValue failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
SetLastError(ERROR_SUCCESS);
|
||||
if (!AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(tp), nullptr, 0)) {
|
||||
if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed while trying to enable SeLockMemoryPrivilege" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (GetLastError() == ERROR_NOT_ALL_ASSIGNED) {
|
||||
if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed to enable SeLockMemoryPrivilege: Add SeLockMemoryPrivilege for current user and run process in elevated mode (Run as administrator)." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool os_init(bool hugepages, bool verbose)
|
||||
{
|
||||
Lock<MutexSys> lock(os_init_mutex);
|
||||
|
||||
if (!hugepages) {
|
||||
huge_pages_enabled = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (GetLargePageMinimum() != PAGE_SIZE_2M) {
|
||||
huge_pages_enabled = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
huge_pages_enabled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void* os_malloc(size_t bytes, bool& hugepages)
|
||||
{
|
||||
if (bytes == 0) {
|
||||
hugepages = false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* try direct huge page allocation first */
|
||||
if (isHugePageCandidate(bytes))
|
||||
{
|
||||
int flags = MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES;
|
||||
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
|
||||
if (ptr != nullptr) {
|
||||
hugepages = true;
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
/* fall back to 4k pages */
|
||||
int flags = MEM_COMMIT | MEM_RESERVE;
|
||||
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
|
||||
if (ptr == nullptr) abort(); //throw std::bad_alloc();
|
||||
hugepages = false;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages)
|
||||
{
|
||||
if (hugepages) // decommitting huge pages seems not to work under Windows
|
||||
return bytesOld;
|
||||
|
||||
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
|
||||
bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1);
|
||||
bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1);
|
||||
if (bytesNew >= bytesOld)
|
||||
return bytesOld;
|
||||
|
||||
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
|
||||
abort(); //throw std::bad_alloc();
|
||||
|
||||
return bytesNew;
|
||||
}
|
||||
|
||||
void os_free(void* ptr, size_t bytes, bool hugepages)
|
||||
{
|
||||
if (bytes == 0)
|
||||
return;
|
||||
|
||||
if (!VirtualFree(ptr,0,MEM_RELEASE))
|
||||
abort(); //throw std::bad_alloc();
|
||||
}
|
||||
|
||||
void os_advise(void *ptr, size_t bytes)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unix Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__UNIX__)
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sstream>
|
||||
|
||||
#if defined(__MACOSX__)
|
||||
#include <mach/vm_statistics.h>
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
bool os_init(bool hugepages, bool verbose)
|
||||
{
|
||||
Lock<MutexSys> lock(os_init_mutex);
|
||||
|
||||
if (!hugepages) {
|
||||
huge_pages_enabled = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined(__LINUX__)
|
||||
|
||||
int hugepagesize = 0;
|
||||
|
||||
std::ifstream file;
|
||||
file.open("/proc/meminfo",std::ios::in);
|
||||
if (!file.is_open()) {
|
||||
if (verbose) std::cout << "WARNING: Could not open /proc/meminfo. Huge page support cannot get enabled!" << std::endl;
|
||||
huge_pages_enabled = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
while (getline(file,line))
|
||||
{
|
||||
std::stringstream sline(line);
|
||||
while (!sline.eof() && sline.peek() == ' ') sline.ignore();
|
||||
std::string tag; getline(sline,tag,' ');
|
||||
while (!sline.eof() && sline.peek() == ' ') sline.ignore();
|
||||
std::string val; getline(sline,val,' ');
|
||||
while (!sline.eof() && sline.peek() == ' ') sline.ignore();
|
||||
std::string unit; getline(sline,unit,' ');
|
||||
if (tag == "Hugepagesize:" && unit == "kB") {
|
||||
hugepagesize = std::stoi(val)*1024;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hugepagesize != PAGE_SIZE_2M)
|
||||
{
|
||||
if (verbose) std::cout << "WARNING: Only 2MB huge pages supported. Huge page support cannot get enabled!" << std::endl;
|
||||
huge_pages_enabled = false;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
huge_pages_enabled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void* os_malloc(size_t bytes, bool& hugepages)
|
||||
{
|
||||
if (bytes == 0) {
|
||||
hugepages = false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* try direct huge page allocation first */
|
||||
if (isHugePageCandidate(bytes))
|
||||
{
|
||||
#if defined(__MACOSX__)
|
||||
void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||
if (ptr != MAP_FAILED) {
|
||||
hugepages = true;
|
||||
return ptr;
|
||||
}
|
||||
#elif defined(MAP_HUGETLB)
|
||||
void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_HUGETLB, -1, 0);
|
||||
if (ptr != MAP_FAILED) {
|
||||
hugepages = true;
|
||||
return ptr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* fallback to 4k pages */
|
||||
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc();
|
||||
hugepages = false;
|
||||
|
||||
/* advise huge page hint for THP */
|
||||
os_advise(ptr,bytes);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages)
|
||||
{
|
||||
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
|
||||
bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1);
|
||||
bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1);
|
||||
if (bytesNew >= bytesOld)
|
||||
return bytesOld;
|
||||
|
||||
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
|
||||
abort(); //throw std::bad_alloc();
|
||||
|
||||
return bytesNew;
|
||||
}
|
||||
|
||||
void os_free(void* ptr, size_t bytes, bool hugepages)
|
||||
{
|
||||
if (bytes == 0)
|
||||
return;
|
||||
|
||||
/* for hugepages we need to also align the size */
|
||||
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
|
||||
bytes = (bytes+pageSize-1) & ~(pageSize-1);
|
||||
if (munmap(ptr,bytes) == -1)
|
||||
abort(); //throw std::bad_alloc();
|
||||
}
|
||||
|
||||
/* hint for transparent huge pages (THP) */
|
||||
void os_advise(void* pptr, size_t bytes)
|
||||
{
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(pptr,bytes,MADV_HUGEPAGE);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
215
thirdparty/embree/common/sys/alloc.h
vendored
Normal file
215
thirdparty/embree/common/sys/alloc.h
vendored
Normal file
@@ -0,0 +1,215 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
#define ALIGNED_STRUCT_(align) \
|
||||
void* operator new(size_t size) { return alignedMalloc(size,align); } \
|
||||
void operator delete(void* ptr) { alignedFree(ptr); } \
|
||||
void* operator new[](size_t size) { return alignedMalloc(size,align); } \
|
||||
void operator delete[](void* ptr) { alignedFree(ptr); }
|
||||
|
||||
#define ALIGNED_CLASS_(align) \
|
||||
public: \
|
||||
ALIGNED_STRUCT_(align) \
|
||||
private:
|
||||
|
||||
/*! aligned allocation */
|
||||
void* alignedMalloc(size_t size, size_t align);
|
||||
void alignedFree(void* ptr);
|
||||
|
||||
|
||||
enum class EmbreeUSMMode {
|
||||
DEFAULT = 0,
|
||||
DEVICE_READ_WRITE = 0,
|
||||
DEVICE_READ_ONLY = 1
|
||||
};
|
||||
|
||||
enum class EmbreeMemoryType {
|
||||
USM_HOST = 0,
|
||||
USM_DEVICE = 1,
|
||||
USM_SHARED = 2,
|
||||
MALLOC = 3
|
||||
};
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
/*! aligned allocation using SYCL USM */
|
||||
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
|
||||
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type);
|
||||
void alignedSYCLFree(sycl::context* context, void* ptr);
|
||||
|
||||
#endif
|
||||
|
||||
/*! allocator that performs aligned allocations */
|
||||
template<typename T, size_t alignment>
|
||||
struct aligned_allocator
|
||||
{
|
||||
typedef T value_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
|
||||
__forceinline pointer allocate( size_type n ) {
|
||||
return (pointer) alignedMalloc(n*sizeof(value_type),alignment);
|
||||
}
|
||||
|
||||
__forceinline void deallocate( pointer p, size_type n ) {
|
||||
return alignedFree(p);
|
||||
}
|
||||
|
||||
__forceinline void construct( pointer p, const_reference val ) {
|
||||
new (p) T(val);
|
||||
}
|
||||
|
||||
__forceinline void destroy( pointer p ) {
|
||||
p->~T();
|
||||
}
|
||||
};
|
||||
|
||||
/*! allocates pages directly from OS */
|
||||
bool win_enable_selockmemoryprivilege(bool verbose);
|
||||
bool os_init(bool hugepages, bool verbose);
|
||||
void* os_malloc (size_t bytes, bool& hugepages);
|
||||
size_t os_shrink (void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages);
|
||||
void os_free (void* ptr, size_t bytes, bool hugepages);
|
||||
void os_advise (void* ptr, size_t bytes);
|
||||
|
||||
/*! allocator that performs OS allocations */
|
||||
template<typename T>
|
||||
struct os_allocator
|
||||
{
|
||||
typedef T value_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
|
||||
__forceinline os_allocator ()
|
||||
: hugepages(false) {}
|
||||
|
||||
__forceinline pointer allocate( size_type n ) {
|
||||
return (pointer) os_malloc(n*sizeof(value_type),hugepages);
|
||||
}
|
||||
|
||||
__forceinline void deallocate( pointer p, size_type n ) {
|
||||
return os_free(p,n*sizeof(value_type),hugepages);
|
||||
}
|
||||
|
||||
__forceinline void construct( pointer p, const_reference val ) {
|
||||
new (p) T(val);
|
||||
}
|
||||
|
||||
__forceinline void destroy( pointer p ) {
|
||||
p->~T();
|
||||
}
|
||||
|
||||
bool hugepages;
|
||||
};
|
||||
|
||||
/*! allocator that newer performs allocations */
|
||||
template<typename T>
|
||||
struct no_allocator
|
||||
{
|
||||
typedef T value_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
|
||||
__forceinline pointer allocate( size_type n ) {
|
||||
abort(); //throw std::runtime_error("no allocation supported");
|
||||
}
|
||||
|
||||
__forceinline void deallocate( pointer p, size_type n ) {
|
||||
}
|
||||
|
||||
__forceinline void construct( pointer p, const_reference val ) {
|
||||
new (p) T(val);
|
||||
}
|
||||
|
||||
__forceinline void destroy( pointer p ) {
|
||||
p->~T();
|
||||
}
|
||||
};
|
||||
|
||||
/*! allocator for IDs */
|
||||
template<typename T, size_t max_id>
|
||||
struct IDPool
|
||||
{
|
||||
typedef T value_type;
|
||||
|
||||
IDPool ()
|
||||
: nextID(0) {}
|
||||
|
||||
T allocate()
|
||||
{
|
||||
/* return ID from list */
|
||||
if (!IDs.empty())
|
||||
{
|
||||
T id = *IDs.begin();
|
||||
IDs.erase(IDs.begin());
|
||||
return id;
|
||||
}
|
||||
|
||||
/* allocate new ID */
|
||||
else
|
||||
{
|
||||
if (size_t(nextID)+1 > max_id)
|
||||
return -1;
|
||||
|
||||
return nextID++;
|
||||
}
|
||||
}
|
||||
|
||||
/* adds an ID provided by the user */
|
||||
bool add(T id)
|
||||
{
|
||||
if (id > max_id)
|
||||
return false;
|
||||
|
||||
/* check if ID should be in IDs set */
|
||||
if (id < nextID) {
|
||||
auto p = IDs.find(id);
|
||||
if (p == IDs.end()) return false;
|
||||
IDs.erase(p);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* otherwise increase ID set */
|
||||
else
|
||||
{
|
||||
for (T i=nextID; i<id; i++) {
|
||||
IDs.insert(i);
|
||||
}
|
||||
nextID = id+1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void deallocate( T id )
|
||||
{
|
||||
assert(id < nextID);
|
||||
MAYBE_UNUSED auto done = IDs.insert(id).second;
|
||||
assert(done);
|
||||
}
|
||||
|
||||
private:
|
||||
std::set<T> IDs; //!< stores deallocated IDs to be reused
|
||||
T nextID; //!< next ID to use when IDs vector is empty
|
||||
};
|
||||
}
|
||||
|
222
thirdparty/embree/common/sys/array.h
vendored
Normal file
222
thirdparty/embree/common/sys/array.h
vendored
Normal file
@@ -0,0 +1,222 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
#include "alloc.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! static array with static size */
|
||||
template<typename T, size_t N>
|
||||
class array_t
|
||||
{
|
||||
public:
|
||||
|
||||
/********************** Iterators ****************************/
|
||||
|
||||
__forceinline T* begin() const { return items; };
|
||||
__forceinline T* end () const { return items+N; };
|
||||
|
||||
|
||||
/********************** Capacity ****************************/
|
||||
|
||||
__forceinline bool empty () const { return N == 0; }
|
||||
__forceinline size_t size () const { return N; }
|
||||
__forceinline size_t max_size () const { return N; }
|
||||
|
||||
|
||||
/******************** Element access **************************/
|
||||
|
||||
__forceinline T& operator[](size_t i) { assert(i < N); return items[i]; }
|
||||
__forceinline const T& operator[](size_t i) const { assert(i < N); return items[i]; }
|
||||
|
||||
__forceinline T& at(size_t i) { assert(i < N); return items[i]; }
|
||||
__forceinline const T& at(size_t i) const { assert(i < N); return items[i]; }
|
||||
|
||||
__forceinline T& front() const { assert(N > 0); return items[0]; };
|
||||
__forceinline T& back () const { assert(N > 0); return items[N-1]; };
|
||||
|
||||
__forceinline T* data() { return items; };
|
||||
__forceinline const T* data() const { return items; };
|
||||
|
||||
private:
|
||||
T items[N];
|
||||
};
|
||||
|
||||
/*! static array with dynamic size */
|
||||
template<typename T, size_t N>
|
||||
class darray_t
|
||||
{
|
||||
public:
|
||||
|
||||
__forceinline darray_t () : M(0) {}
|
||||
|
||||
__forceinline darray_t (const T& v) : M(0) {
|
||||
for (size_t i=0; i<N; i++) items[i] = v;
|
||||
}
|
||||
|
||||
/********************** Iterators ****************************/
|
||||
|
||||
__forceinline T* begin() const { return (T*)items; };
|
||||
__forceinline T* end () const { return (T*)items+M; };
|
||||
|
||||
|
||||
/********************** Capacity ****************************/
|
||||
|
||||
__forceinline bool empty () const { return M == 0; }
|
||||
__forceinline size_t size () const { return M; }
|
||||
__forceinline size_t capacity () const { return N; }
|
||||
__forceinline size_t max_size () const { return N; }
|
||||
|
||||
void resize(size_t new_size) {
|
||||
assert(new_size < max_size());
|
||||
M = new_size;
|
||||
}
|
||||
|
||||
/******************** Modifiers **************************/
|
||||
|
||||
__forceinline void push_back(const T& v)
|
||||
{
|
||||
assert(M+1 < max_size());
|
||||
items[M++] = v;
|
||||
}
|
||||
|
||||
__forceinline void pop_back()
|
||||
{
|
||||
assert(!empty());
|
||||
M--;
|
||||
}
|
||||
|
||||
__forceinline void clear() {
|
||||
M = 0;
|
||||
}
|
||||
|
||||
/******************** Element access **************************/
|
||||
|
||||
__forceinline T& operator[](size_t i) { assert(i < M); return items[i]; }
|
||||
__forceinline const T& operator[](size_t i) const { assert(i < M); return items[i]; }
|
||||
|
||||
__forceinline T& at(size_t i) { assert(i < M); return items[i]; }
|
||||
__forceinline const T& at(size_t i) const { assert(i < M); return items[i]; }
|
||||
|
||||
__forceinline T& front() { assert(M > 0); return items[0]; };
|
||||
__forceinline T& back () { assert(M > 0); return items[M-1]; };
|
||||
|
||||
__forceinline T* data() { return items; };
|
||||
__forceinline const T* data() const { return items; };
|
||||
|
||||
private:
|
||||
size_t M;
|
||||
T items[N];
|
||||
};
|
||||
|
||||
/*! dynamic sized array that is allocated on the stack */
|
||||
#define dynamic_large_stack_array(Ty,Name,N,max_stack_bytes) StackArray<Ty,max_stack_bytes> Name(N)
|
||||
template<typename Ty, size_t max_stack_bytes>
|
||||
struct __aligned(64) StackArray
|
||||
{
|
||||
__forceinline StackArray (const size_t N)
|
||||
: N(N)
|
||||
{
|
||||
if (N*sizeof(Ty) <= max_stack_bytes)
|
||||
data = &arr[0];
|
||||
else
|
||||
data = (Ty*) alignedMalloc(N*sizeof(Ty),64);
|
||||
}
|
||||
|
||||
__forceinline ~StackArray () {
|
||||
if (data != &arr[0]) alignedFree(data);
|
||||
}
|
||||
|
||||
__forceinline operator Ty* () { return data; }
|
||||
__forceinline operator const Ty* () const { return data; }
|
||||
|
||||
__forceinline Ty& operator[](const int i) { assert(i>=0 && i<N); return data[i]; }
|
||||
__forceinline const Ty& operator[](const int i) const { assert(i>=0 && i<N); return data[i]; }
|
||||
|
||||
__forceinline Ty& operator[](const unsigned i) { assert(i<N); return data[i]; }
|
||||
__forceinline const Ty& operator[](const unsigned i) const { assert(i<N); return data[i]; }
|
||||
|
||||
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
|
||||
__forceinline Ty& operator[](const size_t i) { assert(i<N); return data[i]; }
|
||||
__forceinline const Ty& operator[](const size_t i) const { assert(i<N); return data[i]; }
|
||||
#endif
|
||||
|
||||
private:
|
||||
Ty arr[max_stack_bytes/sizeof(Ty)];
|
||||
Ty* data;
|
||||
size_t N;
|
||||
|
||||
private:
|
||||
StackArray (const StackArray& other) DELETED; // do not implement
|
||||
StackArray& operator= (const StackArray& other) DELETED; // do not implement
|
||||
|
||||
};
|
||||
|
||||
/*! dynamic sized array that is allocated on the stack */
|
||||
template<typename Ty, size_t max_stack_elements, size_t max_total_elements>
|
||||
struct __aligned(64) DynamicStackArray
|
||||
{
|
||||
__forceinline DynamicStackArray ()
|
||||
: data(&arr[0]) {}
|
||||
|
||||
__forceinline ~DynamicStackArray ()
|
||||
{
|
||||
if (!isStackAllocated())
|
||||
delete[] data;
|
||||
}
|
||||
|
||||
__forceinline bool isStackAllocated() const {
|
||||
return data == &arr[0];
|
||||
}
|
||||
|
||||
__forceinline size_t size() const
|
||||
{
|
||||
if (isStackAllocated()) return max_stack_elements;
|
||||
else return max_total_elements;
|
||||
}
|
||||
|
||||
__forceinline void resize(size_t M)
|
||||
{
|
||||
assert(M <= max_total_elements);
|
||||
if (likely(M <= max_stack_elements)) return;
|
||||
if (likely(!isStackAllocated())) return;
|
||||
|
||||
data = new Ty[max_total_elements];
|
||||
|
||||
for (size_t i=0; i<max_stack_elements; i++)
|
||||
data[i] = arr[i];
|
||||
}
|
||||
|
||||
__forceinline operator Ty* () { return data; }
|
||||
__forceinline operator const Ty* () const { return data; }
|
||||
|
||||
__forceinline Ty& operator[](const int i) { assert(i>=0 && i<max_total_elements); resize(i+1); return data[i]; }
|
||||
__forceinline Ty& operator[](const unsigned i) { assert(i<max_total_elements); resize(i+1); return data[i]; }
|
||||
|
||||
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
|
||||
__forceinline Ty& operator[](const size_t i) { assert(i<max_total_elements); resize(i+1); return data[i]; }
|
||||
#endif
|
||||
|
||||
__forceinline DynamicStackArray (const DynamicStackArray& other)
|
||||
: data(&arr[0])
|
||||
{
|
||||
for (size_t i=0; i<other.size(); i++)
|
||||
this->operator[] (i) = other[i];
|
||||
}
|
||||
|
||||
DynamicStackArray& operator= (const DynamicStackArray& other)
|
||||
{
|
||||
for (size_t i=0; i<other.size(); i++)
|
||||
this->operator[] (i) = other[i];
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
Ty arr[max_stack_elements];
|
||||
Ty* data;
|
||||
};
|
||||
}
|
59
thirdparty/embree/common/sys/atomic.h
vendored
Normal file
59
thirdparty/embree/common/sys/atomic.h
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include "intrinsics.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* compiler memory barriers */
|
||||
#if defined(__INTEL_COMPILER)
|
||||
//#define __memory_barrier() __memory_barrier()
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
# define __memory_barrier() asm volatile("" ::: "memory")
|
||||
#elif defined(_MSC_VER)
|
||||
# define __memory_barrier() _ReadWriteBarrier()
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
struct atomic : public std::atomic<T>
|
||||
{
|
||||
atomic () {}
|
||||
|
||||
atomic (const T& a)
|
||||
: std::atomic<T>(a) {}
|
||||
|
||||
atomic (const atomic<T>& a) {
|
||||
this->store(a.load());
|
||||
}
|
||||
|
||||
atomic& operator=(const atomic<T>& other) {
|
||||
this->store(other.load());
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
__forceinline void _atomic_min(std::atomic<T>& aref, const T& bref)
|
||||
{
|
||||
const T b = bref.load();
|
||||
while (true) {
|
||||
T a = aref.load();
|
||||
if (a <= b) break;
|
||||
if (aref.compare_exchange_strong(a,b)) break;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline void _atomic_max(std::atomic<T>& aref, const T& bref)
|
||||
{
|
||||
const T b = bref.load();
|
||||
while (true) {
|
||||
T a = aref.load();
|
||||
if (a >= b) break;
|
||||
if (aref.compare_exchange_strong(a,b)) break;
|
||||
}
|
||||
}
|
||||
}
|
289
thirdparty/embree/common/sys/barrier.cpp
vendored
Normal file
289
thirdparty/embree/common/sys/barrier.cpp
vendored
Normal file
@@ -0,0 +1,289 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "barrier.h"
|
||||
#include "condition.h"
|
||||
#include "regression.h"
|
||||
#include "thread.h"
|
||||
|
||||
#if defined (__WIN32__)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct BarrierSysImplementation
|
||||
{
|
||||
__forceinline BarrierSysImplementation (size_t N)
|
||||
: i(0), enterCount(0), exitCount(0), barrierSize(0)
|
||||
{
|
||||
events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
|
||||
events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
|
||||
init(N);
|
||||
}
|
||||
|
||||
__forceinline ~BarrierSysImplementation ()
|
||||
{
|
||||
CloseHandle(events[0]);
|
||||
CloseHandle(events[1]);
|
||||
}
|
||||
|
||||
__forceinline void init(size_t N)
|
||||
{
|
||||
barrierSize = N;
|
||||
enterCount.store(N);
|
||||
exitCount.store(N);
|
||||
}
|
||||
|
||||
__forceinline void wait()
|
||||
{
|
||||
/* every thread entering the barrier decrements this count */
|
||||
size_t i0 = i;
|
||||
size_t cnt0 = enterCount--;
|
||||
|
||||
/* all threads except the last one are wait in the barrier */
|
||||
if (cnt0 > 1)
|
||||
{
|
||||
if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0)
|
||||
THROW_RUNTIME_ERROR("WaitForSingleObjects failed");
|
||||
}
|
||||
|
||||
/* the last thread starts all threads waiting at the barrier */
|
||||
else
|
||||
{
|
||||
i = 1-i;
|
||||
enterCount.store(barrierSize);
|
||||
if (SetEvent(events[i0]) == 0)
|
||||
THROW_RUNTIME_ERROR("SetEvent failed");
|
||||
}
|
||||
|
||||
/* every thread leaving the barrier decrements this count */
|
||||
size_t cnt1 = exitCount--;
|
||||
|
||||
/* the last thread that left the barrier resets the event again */
|
||||
if (cnt1 == 1)
|
||||
{
|
||||
exitCount.store(barrierSize);
|
||||
if (ResetEvent(events[i0]) == 0)
|
||||
THROW_RUNTIME_ERROR("ResetEvent failed");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
HANDLE events[2];
|
||||
atomic<size_t> i;
|
||||
atomic<size_t> enterCount;
|
||||
atomic<size_t> exitCount;
|
||||
size_t barrierSize;
|
||||
};
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct BarrierSysImplementation
|
||||
{
|
||||
__forceinline BarrierSysImplementation (size_t N)
|
||||
: count(0), barrierSize(0)
|
||||
{
|
||||
init(N);
|
||||
}
|
||||
|
||||
__forceinline void init(size_t N)
|
||||
{
|
||||
assert(count == 0);
|
||||
count = 0;
|
||||
barrierSize = N;
|
||||
}
|
||||
|
||||
__forceinline void wait()
|
||||
{
|
||||
mutex.lock();
|
||||
count++;
|
||||
|
||||
if (count == barrierSize) {
|
||||
count = 0;
|
||||
cond.notify_all();
|
||||
mutex.unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
cond.wait(mutex);
|
||||
mutex.unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
public:
|
||||
MutexSys mutex;
|
||||
ConditionSys cond;
|
||||
volatile size_t count;
|
||||
volatile size_t barrierSize;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
BarrierSys::BarrierSys (size_t N) {
|
||||
opaque = new BarrierSysImplementation(N);
|
||||
}
|
||||
|
||||
BarrierSys::~BarrierSys () {
|
||||
delete (BarrierSysImplementation*) opaque;
|
||||
}
|
||||
|
||||
void BarrierSys::init(size_t count) {
|
||||
((BarrierSysImplementation*) opaque)->init(count);
|
||||
}
|
||||
|
||||
void BarrierSys::wait() {
|
||||
((BarrierSysImplementation*) opaque)->wait();
|
||||
}
|
||||
|
||||
LinearBarrierActive::LinearBarrierActive (size_t N)
|
||||
: count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0)
|
||||
{
|
||||
if (N == 0) N = getNumberOfLogicalThreads();
|
||||
init(N);
|
||||
}
|
||||
|
||||
LinearBarrierActive::~LinearBarrierActive()
|
||||
{
|
||||
delete[] count0;
|
||||
delete[] count1;
|
||||
}
|
||||
|
||||
void LinearBarrierActive::init(size_t N)
|
||||
{
|
||||
if (threadCount != N) {
|
||||
threadCount = N;
|
||||
if (count0) delete[] count0; count0 = new unsigned char[N];
|
||||
if (count1) delete[] count1; count1 = new unsigned char[N];
|
||||
}
|
||||
mode = 0;
|
||||
flag0 = 0;
|
||||
flag1 = 0;
|
||||
for (size_t i=0; i<N; i++) count0[i] = 0;
|
||||
for (size_t i=0; i<N; i++) count1[i] = 0;
|
||||
}
|
||||
|
||||
void LinearBarrierActive::wait (const size_t threadIndex)
|
||||
{
|
||||
if (mode == 0)
|
||||
{
|
||||
if (threadIndex == 0)
|
||||
{
|
||||
for (size_t i=0; i<threadCount; i++)
|
||||
count1[i] = 0;
|
||||
|
||||
for (size_t i=1; i<threadCount; i++)
|
||||
{
|
||||
while (likely(count0[i] == 0))
|
||||
pause_cpu();
|
||||
}
|
||||
mode = 1;
|
||||
flag1 = 0;
|
||||
__memory_barrier();
|
||||
flag0 = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
count0[threadIndex] = 1;
|
||||
{
|
||||
while (likely(flag0 == 0))
|
||||
pause_cpu();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (threadIndex == 0)
|
||||
{
|
||||
for (size_t i=0; i<threadCount; i++)
|
||||
count0[i] = 0;
|
||||
|
||||
for (size_t i=1; i<threadCount; i++)
|
||||
{
|
||||
while (likely(count1[i] == 0))
|
||||
pause_cpu();
|
||||
}
|
||||
|
||||
mode = 0;
|
||||
flag0 = 0;
|
||||
__memory_barrier();
|
||||
flag1 = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
count1[threadIndex] = 1;
|
||||
{
|
||||
while (likely(flag1 == 0))
|
||||
pause_cpu();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct barrier_sys_regression_test : public RegressionTest
|
||||
{
|
||||
BarrierSys barrier;
|
||||
std::atomic<size_t> threadID;
|
||||
std::atomic<size_t> numFailed;
|
||||
std::vector<size_t> threadResults;
|
||||
|
||||
barrier_sys_regression_test()
|
||||
: RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0)
|
||||
{
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
static void thread_alloc(barrier_sys_regression_test* This)
|
||||
{
|
||||
size_t tid = This->threadID++;
|
||||
for (size_t j=0; j<1000; j++)
|
||||
{
|
||||
This->barrier.wait();
|
||||
This->threadResults[tid] = tid;
|
||||
This->barrier.wait();
|
||||
}
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
threadID.store(0);
|
||||
numFailed.store(0);
|
||||
|
||||
size_t numThreads = getNumberOfLogicalThreads();
|
||||
threadResults.resize(numThreads);
|
||||
barrier.init(numThreads+1);
|
||||
|
||||
/* create threads */
|
||||
std::vector<thread_t> threads;
|
||||
for (size_t i=0; i<numThreads; i++)
|
||||
threads.push_back(createThread((thread_func)thread_alloc,this));
|
||||
|
||||
/* run test */
|
||||
for (size_t i=0; i<1000; i++)
|
||||
{
|
||||
for (size_t i=0; i<numThreads; i++) threadResults[i] = 0;
|
||||
barrier.wait();
|
||||
barrier.wait();
|
||||
for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i;
|
||||
}
|
||||
|
||||
/* destroy threads */
|
||||
for (size_t i=0; i<numThreads; i++)
|
||||
join(threads[i]);
|
||||
|
||||
return numFailed == 0;
|
||||
}
|
||||
};
|
||||
|
||||
barrier_sys_regression_test barrier_sys_regression_test;
|
||||
}
|
||||
|
||||
|
112
thirdparty/embree/common/sys/barrier.h
vendored
Normal file
112
thirdparty/embree/common/sys/barrier.h
vendored
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "intrinsics.h"
|
||||
#include "sysinfo.h"
|
||||
#include "atomic.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! system barrier using operating system */
|
||||
class BarrierSys
|
||||
{
|
||||
public:
|
||||
|
||||
/*! construction / destruction */
|
||||
BarrierSys (size_t N = 0);
|
||||
~BarrierSys ();
|
||||
|
||||
private:
|
||||
/*! class in non-copyable */
|
||||
BarrierSys (const BarrierSys& other) DELETED; // do not implement
|
||||
BarrierSys& operator= (const BarrierSys& other) DELETED; // do not implement
|
||||
|
||||
public:
|
||||
/*! initializes the barrier with some number of threads */
|
||||
void init(size_t count);
|
||||
|
||||
/*! lets calling thread wait in barrier */
|
||||
void wait();
|
||||
|
||||
private:
|
||||
void* opaque;
|
||||
};
|
||||
|
||||
/*! fast active barrier using atomic counter */
|
||||
struct BarrierActive
|
||||
{
|
||||
public:
|
||||
BarrierActive ()
|
||||
: cntr(0) {}
|
||||
|
||||
void reset() {
|
||||
cntr.store(0);
|
||||
}
|
||||
|
||||
void wait (size_t numThreads)
|
||||
{
|
||||
cntr++;
|
||||
while (cntr.load() != numThreads)
|
||||
pause_cpu();
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<size_t> cntr;
|
||||
};
|
||||
|
||||
/*! fast active barrier that does not require initialization to some number of threads */
|
||||
struct BarrierActiveAutoReset
|
||||
{
|
||||
public:
|
||||
BarrierActiveAutoReset ()
|
||||
: cntr0(0), cntr1(0) {}
|
||||
|
||||
void wait (size_t threadCount)
|
||||
{
|
||||
cntr0.fetch_add(1);
|
||||
while (cntr0 != threadCount) pause_cpu();
|
||||
cntr1.fetch_add(1);
|
||||
while (cntr1 != threadCount) pause_cpu();
|
||||
cntr0.fetch_add(-1);
|
||||
while (cntr0 != 0) pause_cpu();
|
||||
cntr1.fetch_add(-1);
|
||||
while (cntr1 != 0) pause_cpu();
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<size_t> cntr0;
|
||||
std::atomic<size_t> cntr1;
|
||||
};
|
||||
|
||||
class LinearBarrierActive
|
||||
{
|
||||
public:
|
||||
|
||||
/*! construction and destruction */
|
||||
LinearBarrierActive (size_t threadCount = 0);
|
||||
~LinearBarrierActive();
|
||||
|
||||
private:
|
||||
/*! class in non-copyable */
|
||||
LinearBarrierActive (const LinearBarrierActive& other) DELETED; // do not implement
|
||||
LinearBarrierActive& operator= (const LinearBarrierActive& other) DELETED; // do not implement
|
||||
|
||||
public:
|
||||
/*! initializes the barrier with some number of threads */
|
||||
void init(size_t threadCount);
|
||||
|
||||
/*! thread with threadIndex waits in the barrier */
|
||||
void wait (const size_t threadIndex);
|
||||
|
||||
private:
|
||||
volatile unsigned char* count0;
|
||||
volatile unsigned char* count1;
|
||||
volatile unsigned int mode;
|
||||
volatile unsigned int flag0;
|
||||
volatile unsigned int flag1;
|
||||
volatile size_t threadCount;
|
||||
};
|
||||
}
|
||||
|
85
thirdparty/embree/common/sys/condition.cpp
vendored
Normal file
85
thirdparty/embree/common/sys/condition.cpp
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "condition.h"
|
||||
|
||||
#if defined(__WIN32__) && !defined(PTHREADS_WIN32)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct ConditionImplementation
|
||||
{
|
||||
__forceinline ConditionImplementation () {
|
||||
InitializeConditionVariable(&cond);
|
||||
}
|
||||
|
||||
__forceinline ~ConditionImplementation () {
|
||||
}
|
||||
|
||||
__forceinline void wait(MutexSys& mutex_in) {
|
||||
SleepConditionVariableCS(&cond, (LPCRITICAL_SECTION)mutex_in.mutex, INFINITE);
|
||||
}
|
||||
|
||||
__forceinline void notify_all() {
|
||||
WakeAllConditionVariable(&cond);
|
||||
}
|
||||
|
||||
public:
|
||||
CONDITION_VARIABLE cond;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
|
||||
#include <pthread.h>
|
||||
namespace embree
|
||||
{
|
||||
struct ConditionImplementation
|
||||
{
|
||||
__forceinline ConditionImplementation () {
|
||||
if (pthread_cond_init(&cond,nullptr) != 0)
|
||||
THROW_RUNTIME_ERROR("pthread_cond_init failed");
|
||||
}
|
||||
|
||||
__forceinline ~ConditionImplementation() {
|
||||
MAYBE_UNUSED bool ok = pthread_cond_destroy(&cond) == 0;
|
||||
assert(ok);
|
||||
}
|
||||
|
||||
__forceinline void wait(MutexSys& mutex) {
|
||||
if (pthread_cond_wait(&cond, (pthread_mutex_t*)mutex.mutex) != 0)
|
||||
THROW_RUNTIME_ERROR("pthread_cond_wait failed");
|
||||
}
|
||||
|
||||
__forceinline void notify_all() {
|
||||
if (pthread_cond_broadcast(&cond) != 0)
|
||||
THROW_RUNTIME_ERROR("pthread_cond_broadcast failed");
|
||||
}
|
||||
|
||||
public:
|
||||
pthread_cond_t cond;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
ConditionSys::ConditionSys () {
|
||||
cond = new ConditionImplementation;
|
||||
}
|
||||
|
||||
ConditionSys::~ConditionSys() {
|
||||
delete (ConditionImplementation*) cond;
|
||||
}
|
||||
|
||||
void ConditionSys::wait(MutexSys& mutex) {
|
||||
((ConditionImplementation*) cond)->wait(mutex);
|
||||
}
|
||||
|
||||
void ConditionSys::notify_all() {
|
||||
((ConditionImplementation*) cond)->notify_all();
|
||||
}
|
||||
}
|
31
thirdparty/embree/common/sys/condition.h
vendored
Normal file
31
thirdparty/embree/common/sys/condition.h
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mutex.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class ConditionSys
|
||||
{
|
||||
public:
|
||||
ConditionSys();
|
||||
~ConditionSys();
|
||||
void wait( class MutexSys& mutex );
|
||||
void notify_all();
|
||||
|
||||
template<typename Predicate>
|
||||
__forceinline void wait( class MutexSys& mutex, const Predicate& pred )
|
||||
{
|
||||
while (!pred()) wait(mutex);
|
||||
}
|
||||
|
||||
private:
|
||||
ConditionSys (const ConditionSys& other) DELETED; // do not implement
|
||||
ConditionSys& operator= (const ConditionSys& other) DELETED; // do not implement
|
||||
|
||||
protected:
|
||||
void* cond;
|
||||
};
|
||||
}
|
42
thirdparty/embree/common/sys/estring.cpp
vendored
Normal file
42
thirdparty/embree/common/sys/estring.cpp
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "estring.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <ctype.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
char to_lower(char c) { return char(tolower(int(c))); }
|
||||
char to_upper(char c) { return char(toupper(int(c))); }
|
||||
std::string toLowerCase(const std::string& s) { std::string dst(s); std::transform(dst.begin(), dst.end(), dst.begin(), to_lower); return dst; }
|
||||
std::string toUpperCase(const std::string& s) { std::string dst(s); std::transform(dst.begin(), dst.end(), dst.begin(), to_upper); return dst; }
|
||||
|
||||
Vec2f string_to_Vec2f ( std::string str )
|
||||
{
|
||||
size_t next = 0;
|
||||
const float x = std::stof(str,&next); str = str.substr(next+1);
|
||||
const float y = std::stof(str,&next);
|
||||
return Vec2f(x,y);
|
||||
}
|
||||
|
||||
Vec3f string_to_Vec3f ( std::string str )
|
||||
{
|
||||
size_t next = 0;
|
||||
const float x = std::stof(str,&next); str = str.substr(next+1);
|
||||
const float y = std::stof(str,&next); str = str.substr(next+1);
|
||||
const float z = std::stof(str,&next);
|
||||
return Vec3f(x,y,z);
|
||||
}
|
||||
|
||||
Vec4f string_to_Vec4f ( std::string str )
|
||||
{
|
||||
size_t next = 0;
|
||||
const float x = std::stof(str,&next); str = str.substr(next+1);
|
||||
const float y = std::stof(str,&next); str = str.substr(next+1);
|
||||
const float z = std::stof(str,&next); str = str.substr(next+1);
|
||||
const float w = std::stof(str,&next);
|
||||
return Vec4f(x,y,z,w);
|
||||
}
|
||||
}
|
73
thirdparty/embree/common/sys/estring.h
vendored
Normal file
73
thirdparty/embree/common/sys/estring.h
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
#include "../math/vec2.h"
|
||||
#include "../math/vec3.h"
|
||||
#include "../math/vec4.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class IOStreamStateRestorer
|
||||
{
|
||||
public:
|
||||
IOStreamStateRestorer(std::ostream& iostream)
|
||||
: iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) {
|
||||
}
|
||||
|
||||
~IOStreamStateRestorer() {
|
||||
iostream.flags(flags);
|
||||
iostream.precision(precision);
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostream& iostream;
|
||||
std::ios::fmtflags flags;
|
||||
std::streamsize precision;
|
||||
};
|
||||
|
||||
struct IndentOStream : public std::streambuf
|
||||
{
|
||||
explicit IndentOStream(std::ostream &ostream, int indent = 2)
|
||||
: streambuf(ostream.rdbuf())
|
||||
, start_of_line(true)
|
||||
, ident_str(indent, ' ')
|
||||
, stream(&ostream)
|
||||
{
|
||||
// set streambuf of ostream to this and save original streambuf
|
||||
stream->rdbuf(this);
|
||||
}
|
||||
|
||||
virtual ~IndentOStream()
|
||||
{
|
||||
if (stream != NULL) {
|
||||
// restore old streambuf
|
||||
stream->rdbuf(streambuf);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int overflow(int ch) {
|
||||
if (start_of_line && ch != '\n') {
|
||||
streambuf->sputn(ident_str.data(), ident_str.size());
|
||||
}
|
||||
start_of_line = ch == '\n';
|
||||
return streambuf->sputc(ch);
|
||||
}
|
||||
|
||||
private:
|
||||
std::streambuf *streambuf;
|
||||
bool start_of_line;
|
||||
std::string ident_str;
|
||||
std::ostream *stream;
|
||||
};
|
||||
|
||||
std::string toLowerCase(const std::string& s);
|
||||
std::string toUpperCase(const std::string& s);
|
||||
|
||||
Vec2f string_to_Vec2f ( std::string str );
|
||||
Vec3f string_to_Vec3f ( std::string str );
|
||||
Vec4f string_to_Vec4f ( std::string str );
|
||||
}
|
126
thirdparty/embree/common/sys/filename.cpp
vendored
Normal file
126
thirdparty/embree/common/sys/filename.cpp
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "filename.h"
|
||||
#include "sysinfo.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
#ifdef __WIN32__
|
||||
const char path_sep = '\\';
|
||||
#else
|
||||
const char path_sep = '/';
|
||||
#endif
|
||||
|
||||
/*! create an empty filename */
|
||||
FileName::FileName () {}
|
||||
|
||||
/*! create a valid filename from a string */
|
||||
FileName::FileName (const char* in) {
|
||||
filename = in;
|
||||
for (size_t i=0; i<filename.size(); i++)
|
||||
if (filename[i] == '\\' || filename[i] == '/')
|
||||
filename[i] = path_sep;
|
||||
while (!filename.empty() && filename[filename.size()-1] == path_sep)
|
||||
filename.resize(filename.size()-1);
|
||||
}
|
||||
|
||||
/*! create a valid filename from a string */
|
||||
FileName::FileName (const std::string& in) {
|
||||
filename = in;
|
||||
for (size_t i=0; i<filename.size(); i++)
|
||||
if (filename[i] == '\\' || filename[i] == '/')
|
||||
filename[i] = path_sep;
|
||||
while (!filename.empty() && filename[filename.size()-1] == path_sep)
|
||||
filename.resize(filename.size()-1);
|
||||
}
|
||||
|
||||
/*! returns path to executable */
|
||||
FileName FileName::executableFolder() {
|
||||
return FileName(getExecutableFileName()).path();
|
||||
}
|
||||
|
||||
/*! returns the path */
|
||||
FileName FileName::path() const {
|
||||
size_t pos = filename.find_last_of(path_sep);
|
||||
if (pos == std::string::npos) return FileName();
|
||||
return filename.substr(0,pos);
|
||||
}
|
||||
|
||||
/*! returns the basename */
|
||||
std::string FileName::base() const {
|
||||
size_t pos = filename.find_last_of(path_sep);
|
||||
if (pos == std::string::npos) return filename;
|
||||
return filename.substr(pos+1);
|
||||
}
|
||||
|
||||
/*! returns the extension */
|
||||
std::string FileName::ext() const {
|
||||
size_t pos = filename.find_last_of('.');
|
||||
if (pos == std::string::npos) return "";
|
||||
return filename.substr(pos+1);
|
||||
}
|
||||
|
||||
/*! returns the extension */
|
||||
FileName FileName::dropExt() const {
|
||||
size_t pos = filename.find_last_of('.');
|
||||
if (pos == std::string::npos) return filename;
|
||||
return filename.substr(0,pos);
|
||||
}
|
||||
|
||||
/*! returns the basename without extension */
|
||||
std::string FileName::name() const {
|
||||
size_t start = filename.find_last_of(path_sep);
|
||||
if (start == std::string::npos) start = 0; else start++;
|
||||
size_t end = filename.find_last_of('.');
|
||||
if (end == std::string::npos || end < start) end = filename.size();
|
||||
return filename.substr(start, end - start);
|
||||
}
|
||||
|
||||
/*! replaces the extension */
|
||||
FileName FileName::setExt(const std::string& ext) const {
|
||||
size_t start = filename.find_last_of(path_sep);
|
||||
if (start == std::string::npos) start = 0; else start++;
|
||||
size_t end = filename.find_last_of('.');
|
||||
if (end == std::string::npos || end < start) return FileName(filename+ext);
|
||||
return FileName(filename.substr(0,end)+ext);
|
||||
}
|
||||
|
||||
/*! adds the extension */
|
||||
FileName FileName::addExt(const std::string& ext) const {
|
||||
return FileName(filename+ext);
|
||||
}
|
||||
|
||||
/*! concatenates two filenames to this/other */
|
||||
FileName FileName::operator +( const FileName& other ) const {
|
||||
if (filename == "") return FileName(other);
|
||||
else return FileName(filename + path_sep + other.filename);
|
||||
}
|
||||
|
||||
/*! concatenates two filenames to this/other */
|
||||
FileName FileName::operator +( const std::string& other ) const {
|
||||
return operator+(FileName(other));
|
||||
}
|
||||
|
||||
/*! removes the base from a filename (if possible) */
|
||||
FileName FileName::operator -( const FileName& base ) const {
|
||||
size_t pos = filename.find_first_of(base);
|
||||
if (pos == std::string::npos) return *this;
|
||||
return FileName(filename.substr(pos+1));
|
||||
}
|
||||
|
||||
/*! == operator */
|
||||
bool operator== (const FileName& a, const FileName& b) {
|
||||
return a.filename == b.filename;
|
||||
}
|
||||
|
||||
/*! != operator */
|
||||
bool operator!= (const FileName& a, const FileName& b) {
|
||||
return a.filename != b.filename;
|
||||
}
|
||||
|
||||
/*! output operator */
|
||||
std::ostream& operator<<(std::ostream& cout, const FileName& filename) {
|
||||
return cout << filename.filename;
|
||||
}
|
||||
}
|
78
thirdparty/embree/common/sys/filename.h
vendored
Normal file
78
thirdparty/embree/common/sys/filename.h
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Convenience class for handling file names and paths. */
|
||||
class FileName
|
||||
{
|
||||
public:
|
||||
|
||||
/*! create an empty filename */
|
||||
FileName ();
|
||||
|
||||
/*! create a valid filename from a string */
|
||||
FileName (const char* filename);
|
||||
|
||||
/*! create a valid filename from a string */
|
||||
FileName (const std::string& filename);
|
||||
|
||||
/*! returns path to executable */
|
||||
static FileName executableFolder();
|
||||
|
||||
/*! auto convert into a string */
|
||||
operator std::string() const { return filename; }
|
||||
|
||||
/*! returns a string of the filename */
|
||||
const std::string str() const { return filename; }
|
||||
|
||||
/*! returns a c-string of the filename */
|
||||
const char* c_str() const { return filename.c_str(); }
|
||||
|
||||
/*! returns the path of a filename */
|
||||
FileName path() const;
|
||||
|
||||
/*! returns the file of a filename */
|
||||
std::string base() const;
|
||||
|
||||
/*! returns the base of a filename without extension */
|
||||
std::string name() const;
|
||||
|
||||
/*! returns the file extension */
|
||||
std::string ext() const;
|
||||
|
||||
/*! drops the file extension */
|
||||
FileName dropExt() const;
|
||||
|
||||
/*! replaces the file extension */
|
||||
FileName setExt(const std::string& ext = "") const;
|
||||
|
||||
/*! adds file extension */
|
||||
FileName addExt(const std::string& ext = "") const;
|
||||
|
||||
/*! concatenates two filenames to this/other */
|
||||
FileName operator +( const FileName& other ) const;
|
||||
|
||||
/*! concatenates two filenames to this/other */
|
||||
FileName operator +( const std::string& other ) const;
|
||||
|
||||
/*! removes the base from a filename (if possible) */
|
||||
FileName operator -( const FileName& base ) const;
|
||||
|
||||
/*! == operator */
|
||||
friend bool operator==(const FileName& a, const FileName& b);
|
||||
|
||||
/*! != operator */
|
||||
friend bool operator!=(const FileName& a, const FileName& b);
|
||||
|
||||
/*! output operator */
|
||||
friend std::ostream& operator<<(std::ostream& cout, const FileName& filename);
|
||||
|
||||
private:
|
||||
std::string filename;
|
||||
};
|
||||
}
|
566
thirdparty/embree/common/sys/intrinsics.h
vendored
Normal file
566
thirdparty/embree/common/sys/intrinsics.h
vendored
Normal file
@@ -0,0 +1,566 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
#if defined(__WIN32__)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
#include "../simd/arm/emulation.h"
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#include "../simd/wasm/emulation.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__BMI__) && defined(__GNUC__) && !defined(__INTEL_COMPILER)
|
||||
#if !defined(_tzcnt_u32)
|
||||
#define _tzcnt_u32 __tzcnt_u32
|
||||
#endif
|
||||
#if !defined(_tzcnt_u64)
|
||||
#define _tzcnt_u64 __tzcnt_u64
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#if !defined(_lzcnt_u32)
|
||||
#define _lzcnt_u32 __builtin_clz
|
||||
#endif
|
||||
#else
|
||||
#if defined(__LZCNT__)
|
||||
#if !defined(_lzcnt_u32)
|
||||
#define _lzcnt_u32 __lzcnt32
|
||||
#endif
|
||||
#if !defined(_lzcnt_u64)
|
||||
#define _lzcnt_u64 __lzcnt64
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__)
|
||||
# if !defined(NOMINMAX)
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
/* normally defined in pmmintrin.h, but we always need this */
|
||||
#if !defined(_MM_SET_DENORMALS_ZERO_MODE)
|
||||
#define _MM_DENORMALS_ZERO_ON (0x0040)
|
||||
#define _MM_DENORMALS_ZERO_OFF (0x0000)
|
||||
#define _MM_DENORMALS_ZERO_MASK (0x0040)
|
||||
#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Windows Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__WIN32__) && !defined(__INTEL_LLVM_COMPILER)
|
||||
|
||||
__forceinline size_t read_tsc()
|
||||
{
|
||||
LARGE_INTEGER li;
|
||||
QueryPerformanceCounter(&li);
|
||||
return (size_t)li.QuadPart;
|
||||
}
|
||||
|
||||
__forceinline int bsf(int v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return _tzcnt_u32(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanForward(&r,v); return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline unsigned bsf(unsigned v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return _tzcnt_u32(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanForward(&r,v); return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bsf(size_t v) {
|
||||
#if defined(__AVX2__)
|
||||
return _tzcnt_u64(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanForward64(&r,v); return r;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline int bscf(int& v)
|
||||
{
|
||||
int i = bsf(v);
|
||||
v &= v-1;
|
||||
return i;
|
||||
}
|
||||
|
||||
__forceinline unsigned bscf(unsigned& v)
|
||||
{
|
||||
unsigned i = bsf(v);
|
||||
v &= v-1;
|
||||
return i;
|
||||
}
|
||||
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bscf(size_t& v)
|
||||
{
|
||||
size_t i = bsf(v);
|
||||
v &= v-1;
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline int bsr(int v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return 31 - _lzcnt_u32(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanReverse(&r,v); return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline unsigned bsr(unsigned v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return 31 - _lzcnt_u32(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanReverse(&r,v); return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bsr(size_t v) {
|
||||
#if defined(__AVX2__)
|
||||
return 63 -_lzcnt_u64(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanReverse64(&r, v); return r;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline int lzcnt(const int x)
|
||||
{
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return _lzcnt_u32(x);
|
||||
#else
|
||||
if (unlikely(x == 0)) return 32;
|
||||
return 31 - bsr(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline int btc(int v, int i) {
|
||||
long r = v; _bittestandcomplement(&r,i); return r;
|
||||
}
|
||||
|
||||
__forceinline int bts(int v, int i) {
|
||||
long r = v; _bittestandset(&r,i); return r;
|
||||
}
|
||||
|
||||
__forceinline int btr(int v, int i) {
|
||||
long r = v; _bittestandreset(&r,i); return r;
|
||||
}
|
||||
|
||||
#if defined(__X86_64__)
|
||||
|
||||
__forceinline size_t btc(size_t v, size_t i) {
|
||||
size_t r = v; _bittestandcomplement64((__int64*)&r,i); return r;
|
||||
}
|
||||
|
||||
__forceinline size_t bts(size_t v, size_t i) {
|
||||
__int64 r = v; _bittestandset64(&r,i); return r;
|
||||
}
|
||||
|
||||
__forceinline size_t btr(size_t v, size_t i) {
|
||||
__int64 r = v; _bittestandreset64(&r,i); return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__forceinline int32_t atomic_cmpxchg(volatile int32_t* p, const int32_t c, const int32_t v) {
|
||||
return _InterlockedCompareExchange((volatile long*)p,v,c);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unix Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#else
|
||||
|
||||
__forceinline uint64_t read_tsc() {
|
||||
#if defined(__X86_ASM__)
|
||||
uint32_t high,low;
|
||||
asm volatile ("rdtsc" : "=d"(high), "=a"(low));
|
||||
return (((uint64_t)high) << 32) + (uint64_t)low;
|
||||
#else
|
||||
/* Not supported yet, meaning measuring traversal cost per pixel does not work. */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline int bsf(int v) {
|
||||
#if defined(__ARM_NEON)
|
||||
return __builtin_ctz(v);
|
||||
#else
|
||||
#if defined(__AVX2__)
|
||||
return _tzcnt_u32(v);
|
||||
#elif defined(__X86_ASM__)
|
||||
int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
|
||||
#else
|
||||
return __builtin_ctz(v);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
__forceinline unsigned int bsf(unsigned v) {
|
||||
return sycl::ctz(v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__64BIT__)
|
||||
__forceinline unsigned bsf(unsigned v)
|
||||
{
|
||||
#if defined(__ARM_NEON)
|
||||
return __builtin_ctz(v);
|
||||
#else
|
||||
#if defined(__AVX2__)
|
||||
return _tzcnt_u32(v);
|
||||
#elif defined(__X86_ASM__)
|
||||
unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
|
||||
#else
|
||||
return __builtin_ctz(v);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
__forceinline size_t bsf(size_t v) {
|
||||
return sycl::ctz(v);
|
||||
}
|
||||
#else
|
||||
|
||||
__forceinline size_t bsf(size_t v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
#if defined(__X86_64__)
|
||||
return _tzcnt_u64(v);
|
||||
#else
|
||||
return _tzcnt_u32(v);
|
||||
#endif
|
||||
#elif defined(__X86_ASM__)
|
||||
size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
|
||||
#else
|
||||
return __builtin_ctzl(v);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline int bscf(int& v)
|
||||
{
|
||||
int i = bsf(v);
|
||||
v &= v-1;
|
||||
return i;
|
||||
}
|
||||
|
||||
#if defined(__64BIT__)
|
||||
__forceinline unsigned int bscf(unsigned int& v)
|
||||
{
|
||||
unsigned int i = bsf(v);
|
||||
v &= v-1;
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline size_t bscf(size_t& v)
|
||||
{
|
||||
size_t i = bsf(v);
|
||||
v &= v-1;
|
||||
return i;
|
||||
}
|
||||
|
||||
__forceinline int bsr(int v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return 31 - _lzcnt_u32(v);
|
||||
#elif defined(__X86_ASM__)
|
||||
int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
|
||||
#else
|
||||
return __builtin_clz(v) ^ 31;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
|
||||
__forceinline unsigned bsr(unsigned v) {
|
||||
#if defined(__AVX2__)
|
||||
return 31 - _lzcnt_u32(v);
|
||||
#elif defined(__X86_ASM__)
|
||||
unsigned r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
|
||||
#else
|
||||
return __builtin_clz(v) ^ 31;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline size_t bsr(size_t v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
#if defined(__X86_64__)
|
||||
return 63 - _lzcnt_u64(v);
|
||||
#else
|
||||
return 31 - _lzcnt_u32(v);
|
||||
#endif
|
||||
#elif defined(__X86_ASM__)
|
||||
size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
|
||||
#else
|
||||
return (sizeof(v) * 8 - 1) - __builtin_clzl(v);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline int lzcnt(const int x)
|
||||
{
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return _lzcnt_u32(x);
|
||||
#else
|
||||
if (unlikely(x == 0)) return 32;
|
||||
return 31 - bsr(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline size_t blsr(size_t v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
#if defined(__INTEL_COMPILER)
|
||||
return _blsr_u64(v);
|
||||
#else
|
||||
#if defined(__X86_64__)
|
||||
return __blsr_u64(v);
|
||||
#else
|
||||
return __blsr_u32(v);
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
return v & (v-1);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline int btc(int v, int i) {
|
||||
#if defined(__X86_ASM__)
|
||||
int r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r;
|
||||
#else
|
||||
return (v ^ (1 << i));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline int bts(int v, int i) {
|
||||
#if defined(__X86_ASM__)
|
||||
int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
|
||||
#else
|
||||
return (v | (1 << i));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline int btr(int v, int i) {
|
||||
#if defined(__X86_ASM__)
|
||||
int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
|
||||
#else
|
||||
return (v & ~(1 << i));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline size_t btc(size_t v, size_t i) {
|
||||
#if defined(__X86_ASM__)
|
||||
size_t r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r;
|
||||
#else
|
||||
return (v ^ (1 << i));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline size_t bts(size_t v, size_t i) {
|
||||
#if defined(__X86_ASM__)
|
||||
size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
|
||||
#else
|
||||
return (v | (1 << i));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline size_t btr(size_t v, size_t i) {
|
||||
#if defined(__X86_ASM__)
|
||||
size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
|
||||
#else
|
||||
return (v & ~(1 << i));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline int32_t atomic_cmpxchg(int32_t volatile* value, int32_t comparand, const int32_t input) {
|
||||
return __sync_val_compare_and_swap(value, comparand, input);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__)
|
||||
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
|
||||
__forceinline void __cpuid(int out[4], int op)
|
||||
{
|
||||
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "0"(op));
|
||||
}
|
||||
|
||||
__forceinline void __cpuid_count(int out[4], int op1, int op2)
|
||||
{
|
||||
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
|
||||
: "0" (op1), "2" (op2));
|
||||
}
|
||||
|
||||
#elif defined(__X86_ASM__)
|
||||
|
||||
__forceinline void __cpuid(int out[4], int op) {
|
||||
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
|
||||
}
|
||||
|
||||
__forceinline void __cpuid_count(int out[4], int op1, int op2) {
|
||||
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// All Platforms
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#if !defined(_mm_undefined_ps)
|
||||
__forceinline __m128 _mm_undefined_ps() { return _mm_setzero_ps(); }
|
||||
#endif
|
||||
#if !defined(_mm_undefined_si128)
|
||||
__forceinline __m128i _mm_undefined_si128() { return _mm_setzero_si128(); }
|
||||
#endif
|
||||
#if !defined(_mm256_undefined_ps) && defined(__AVX__)
|
||||
__forceinline __m256 _mm256_undefined_ps() { return _mm256_setzero_ps(); }
|
||||
#endif
|
||||
#if !defined(_mm256_undefined_si256) && defined(__AVX__)
|
||||
__forceinline __m256i _mm256_undefined_si256() { return _mm256_setzero_si256(); }
|
||||
#endif
|
||||
#if !defined(_mm512_undefined_ps) && defined(__AVX512F__)
|
||||
__forceinline __m512 _mm512_undefined_ps() { return _mm512_setzero_ps(); }
|
||||
#endif
|
||||
#if !defined(_mm512_undefined_epi32) && defined(__AVX512F__)
|
||||
__forceinline __m512i _mm512_undefined_epi32() { return _mm512_setzero_si512(); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
__forceinline unsigned int popcnt(unsigned int in) {
|
||||
return sycl::popcount(in);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||
|
||||
__forceinline int popcnt(int in) {
|
||||
return _mm_popcnt_u32(in);
|
||||
}
|
||||
|
||||
__forceinline unsigned popcnt(unsigned in) {
|
||||
return _mm_popcnt_u32(in);
|
||||
}
|
||||
|
||||
#if defined(__64BIT__)
|
||||
__forceinline size_t popcnt(size_t in) {
|
||||
return _mm_popcnt_u64(in);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__X86_ASM__)
|
||||
__forceinline uint64_t rdtsc()
|
||||
{
|
||||
int dummy[4];
|
||||
__cpuid(dummy,0);
|
||||
uint64_t clock = read_tsc();
|
||||
__cpuid(dummy,0);
|
||||
return clock;
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline void pause_cpu(const size_t N = 8)
|
||||
{
|
||||
for (size_t i=0; i<N; i++)
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
/* prefetches */
|
||||
__forceinline void prefetchL1 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T0); }
|
||||
__forceinline void prefetchL2 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T1); }
|
||||
__forceinline void prefetchL3 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T2); }
|
||||
__forceinline void prefetchNTA(const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_NTA); }
|
||||
__forceinline void prefetchEX (const void* ptr) {
|
||||
#if defined(__INTEL_COMPILER)
|
||||
_mm_prefetch((const char*)ptr,_MM_HINT_ET0);
|
||||
#else
|
||||
_mm_prefetch((const char*)ptr,_MM_HINT_T0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void prefetchL1EX(const void* ptr) {
|
||||
prefetchEX(ptr);
|
||||
}
|
||||
|
||||
__forceinline void prefetchL2EX(const void* ptr) {
|
||||
prefetchEX(ptr);
|
||||
}
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
__forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); }
|
||||
__forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); }
|
||||
#if defined(__X86_64__)
|
||||
__forceinline size_t pext(size_t a, size_t b) { return _pext_u64(a, b); }
|
||||
__forceinline size_t pdep(size_t a, size_t b) { return _pdep_u64(a, b); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
#if defined(__INTEL_COMPILER)
|
||||
__forceinline float mm512_cvtss_f32(__m512 v) {
|
||||
return _mm512_cvtss_f32(v);
|
||||
}
|
||||
__forceinline int mm512_mask2int(__mmask16 k1) {
|
||||
return _mm512_mask2int(k1);
|
||||
}
|
||||
__forceinline __mmask16 mm512_int2mask(int mask) {
|
||||
return _mm512_int2mask(mask);
|
||||
}
|
||||
#else
|
||||
__forceinline float mm512_cvtss_f32(__m512 v) { // FIXME: _mm512_cvtss_f32 neither supported by clang v4.0.0 nor GCC 6.3
|
||||
return _mm_cvtss_f32(_mm512_castps512_ps128(v));
|
||||
}
|
||||
__forceinline int mm512_mask2int(__mmask16 k1) { // FIXME: _mm512_mask2int not yet supported by GCC 6.3
|
||||
return (int)k1;
|
||||
}
|
||||
__forceinline __mmask16 mm512_int2mask(int mask) { // FIXME: _mm512_int2mask not yet supported by GCC 6.3
|
||||
return (__mmask16)mask;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
83
thirdparty/embree/common/sys/library.cpp
vendored
Normal file
83
thirdparty/embree/common/sys/library.cpp
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "library.h"
|
||||
#include "sysinfo.h"
|
||||
#include "filename.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Windows Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__WIN32__)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* opens a shared library */
|
||||
lib_t openLibrary(const std::string& file)
|
||||
{
|
||||
std::string fullName = file+".dll";
|
||||
FileName executable = getExecutableFileName();
|
||||
HANDLE handle = LoadLibrary((executable.path() + fullName).c_str());
|
||||
return lib_t(handle);
|
||||
}
|
||||
|
||||
/* returns address of a symbol from the library */
|
||||
void* getSymbol(lib_t lib, const std::string& sym) {
|
||||
return (void*)GetProcAddress(HMODULE(lib),sym.c_str());
|
||||
}
|
||||
|
||||
/* closes the shared library */
|
||||
void closeLibrary(lib_t lib) {
|
||||
FreeLibrary(HMODULE(lib));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unix Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__UNIX__)
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* opens a shared library */
|
||||
lib_t openLibrary(const std::string& file)
|
||||
{
|
||||
#if defined(__MACOSX__)
|
||||
std::string fullName = "lib"+file+".dylib";
|
||||
#else
|
||||
std::string fullName = "lib"+file+".so";
|
||||
#endif
|
||||
void* lib = dlopen(fullName.c_str(), RTLD_NOW);
|
||||
if (lib) return lib_t(lib);
|
||||
FileName executable = getExecutableFileName();
|
||||
lib = dlopen((executable.path() + fullName).c_str(),RTLD_NOW);
|
||||
if (lib == nullptr) {
|
||||
const char* error = dlerror();
|
||||
if (error) {
|
||||
THROW_RUNTIME_ERROR(error);
|
||||
} else {
|
||||
THROW_RUNTIME_ERROR("could not load library "+executable.str());
|
||||
}
|
||||
}
|
||||
return lib_t(lib);
|
||||
}
|
||||
|
||||
/* returns address of a symbol from the library */
|
||||
void* getSymbol(lib_t lib, const std::string& sym) {
|
||||
return dlsym(lib,sym.c_str());
|
||||
}
|
||||
|
||||
/* closes the shared library */
|
||||
void closeLibrary(lib_t lib) {
|
||||
dlclose(lib);
|
||||
}
|
||||
}
|
||||
#endif
|
21
thirdparty/embree/common/sys/library.h
vendored
Normal file
21
thirdparty/embree/common/sys/library.h
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! type for shared library */
|
||||
typedef struct opaque_lib_t* lib_t;
|
||||
|
||||
/*! loads a shared library */
|
||||
lib_t openLibrary(const std::string& file);
|
||||
|
||||
/*! returns address of a symbol from the library */
|
||||
void* getSymbol(lib_t lib, const std::string& sym);
|
||||
|
||||
/*! unloads a shared library */
|
||||
void closeLibrary(lib_t lib);
|
||||
}
|
58
thirdparty/embree/common/sys/mutex.cpp
vendored
Normal file
58
thirdparty/embree/common/sys/mutex.cpp
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "mutex.h"
|
||||
#include "regression.h"
|
||||
|
||||
#if defined(__WIN32__) && !defined(PTHREADS_WIN32)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
MutexSys::MutexSys() { mutex = new CRITICAL_SECTION; InitializeCriticalSection((CRITICAL_SECTION*)mutex); }
|
||||
MutexSys::~MutexSys() { DeleteCriticalSection((CRITICAL_SECTION*)mutex); delete (CRITICAL_SECTION*)mutex; }
|
||||
void MutexSys::lock() { EnterCriticalSection((CRITICAL_SECTION*)mutex); }
|
||||
bool MutexSys::try_lock() { return TryEnterCriticalSection((CRITICAL_SECTION*)mutex) != 0; }
|
||||
void MutexSys::unlock() { LeaveCriticalSection((CRITICAL_SECTION*)mutex); }
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
|
||||
#include <pthread.h>
|
||||
namespace embree
|
||||
{
|
||||
/*! system mutex using pthreads */
|
||||
MutexSys::MutexSys()
|
||||
{
|
||||
mutex = new pthread_mutex_t;
|
||||
if (pthread_mutex_init((pthread_mutex_t*)mutex, nullptr) != 0)
|
||||
THROW_RUNTIME_ERROR("pthread_mutex_init failed");
|
||||
}
|
||||
|
||||
MutexSys::~MutexSys()
|
||||
{
|
||||
MAYBE_UNUSED bool ok = pthread_mutex_destroy((pthread_mutex_t*)mutex) == 0;
|
||||
assert(ok);
|
||||
delete (pthread_mutex_t*)mutex;
|
||||
mutex = nullptr;
|
||||
}
|
||||
|
||||
void MutexSys::lock()
|
||||
{
|
||||
if (pthread_mutex_lock((pthread_mutex_t*)mutex) != 0)
|
||||
THROW_RUNTIME_ERROR("pthread_mutex_lock failed");
|
||||
}
|
||||
|
||||
bool MutexSys::try_lock() {
|
||||
return pthread_mutex_trylock((pthread_mutex_t*)mutex) == 0;
|
||||
}
|
||||
|
||||
void MutexSys::unlock()
|
||||
{
|
||||
if (pthread_mutex_unlock((pthread_mutex_t*)mutex) != 0)
|
||||
THROW_RUNTIME_ERROR("pthread_mutex_unlock failed");
|
||||
}
|
||||
};
|
||||
#endif
|
104
thirdparty/embree/common/sys/mutex.h
vendored
Normal file
104
thirdparty/embree/common/sys/mutex.h
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
#include "intrinsics.h"
|
||||
#include "atomic.h"
|
||||
|
||||
#define CPU_CACHELINE_SIZE 64
|
||||
namespace embree
|
||||
{
|
||||
/*! system mutex */
|
||||
class MutexSys {
|
||||
friend struct ConditionImplementation;
|
||||
public:
|
||||
MutexSys();
|
||||
~MutexSys();
|
||||
|
||||
private:
|
||||
MutexSys (const MutexSys& other) DELETED; // do not implement
|
||||
MutexSys& operator= (const MutexSys& other) DELETED; // do not implement
|
||||
|
||||
public:
|
||||
void lock();
|
||||
bool try_lock();
|
||||
void unlock();
|
||||
|
||||
protected:
|
||||
void* mutex;
|
||||
};
|
||||
|
||||
/*! spinning mutex */
|
||||
class SpinLock
|
||||
{
|
||||
public:
|
||||
|
||||
SpinLock ()
|
||||
: flag(false) {}
|
||||
|
||||
__forceinline bool isLocked() {
|
||||
return flag.load();
|
||||
}
|
||||
|
||||
__forceinline void lock()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
while (flag.load())
|
||||
{
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
bool expected = false;
|
||||
if (flag.compare_exchange_strong(expected,true,std::memory_order_acquire))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline bool try_lock()
|
||||
{
|
||||
bool expected = false;
|
||||
if (flag.load() != expected) {
|
||||
return false;
|
||||
}
|
||||
return flag.compare_exchange_strong(expected,true,std::memory_order_acquire);
|
||||
}
|
||||
|
||||
__forceinline void unlock() {
|
||||
flag.store(false,std::memory_order_release);
|
||||
}
|
||||
|
||||
__forceinline void wait_until_unlocked()
|
||||
{
|
||||
while(flag.load())
|
||||
{
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
atomic<bool> flag;
|
||||
};
|
||||
|
||||
class PaddedSpinLock : public SpinLock
|
||||
{
|
||||
private:
|
||||
MAYBE_UNUSED char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
|
||||
};
|
||||
/*! safe mutex lock and unlock helper */
|
||||
template<typename Mutex> class Lock {
|
||||
public:
|
||||
Lock (Mutex& mutex) : mutex(mutex), locked(true) { mutex.lock(); }
|
||||
Lock (Mutex& mutex, bool locked) : mutex(mutex), locked(locked) {}
|
||||
~Lock() { if (locked) mutex.unlock(); }
|
||||
__forceinline void lock() { assert(!locked); locked = true; mutex.lock(); }
|
||||
__forceinline bool isLocked() const { return locked; }
|
||||
protected:
|
||||
Mutex& mutex;
|
||||
bool locked;
|
||||
};
|
||||
}
|
616
thirdparty/embree/common/sys/platform.h
vendored
Normal file
616
thirdparty/embree/common/sys/platform.h
vendored
Normal file
@@ -0,0 +1,616 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <stdint.h>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
#define __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#pragma clang diagnostic ignored "-W#pragma-messages"
|
||||
|
||||
#include <sycl/sycl.hpp>
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
#include "sycl.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
#define CONSTANT __attribute__((opencl_constant))
|
||||
#else
|
||||
#define CONSTANT
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// detect platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* detect 32 or 64 Intel platform */
|
||||
#if defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
|
||||
#define __X86_64__
|
||||
#define __X86_ASM__
|
||||
#elif defined(__i386__) || defined(_M_IX86)
|
||||
#define __X86_ASM__
|
||||
#endif
|
||||
|
||||
/* detect 64 bit platform */
|
||||
#if defined(__X86_64__) || defined(__aarch64__)
|
||||
#define __64BIT__
|
||||
#endif
|
||||
|
||||
/* detect Linux platform */
|
||||
#if defined(linux) || defined(__linux__) || defined(__LINUX__)
|
||||
# if !defined(__LINUX__)
|
||||
# define __LINUX__
|
||||
# endif
|
||||
# if !defined(__UNIX__)
|
||||
# define __UNIX__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detect FreeBSD platform */
|
||||
#if defined(__FreeBSD__) || defined(__FREEBSD__)
|
||||
# if !defined(__FREEBSD__)
|
||||
# define __FREEBSD__
|
||||
# endif
|
||||
# if !defined(__UNIX__)
|
||||
# define __UNIX__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detect Windows 95/98/NT/2000/XP/Vista/7/8/10 platform */
|
||||
#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__)
|
||||
# if !defined(__WIN32__)
|
||||
# define __WIN32__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detect Cygwin platform */
|
||||
#if defined(__CYGWIN__)
|
||||
# if !defined(__UNIX__)
|
||||
# define __UNIX__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detect MAC OS X platform */
|
||||
#if defined(__APPLE__) || defined(MACOSX) || defined(__MACOSX__)
|
||||
# if !defined(__MACOSX__)
|
||||
# define __MACOSX__
|
||||
# endif
|
||||
# if !defined(__UNIX__)
|
||||
# define __UNIX__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* try to detect other Unix systems */
|
||||
#if defined(__unix__) || defined (unix) || defined(__unix) || defined(_unix)
|
||||
# if !defined(__UNIX__)
|
||||
# define __UNIX__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Macros
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef __WIN32__
|
||||
# if defined(EMBREE_STATIC_LIB)
|
||||
# define dll_export
|
||||
# define dll_import
|
||||
# else
|
||||
# define dll_export __declspec(dllexport)
|
||||
# define dll_import __declspec(dllimport)
|
||||
# endif
|
||||
#else
|
||||
# define dll_export __attribute__ ((visibility ("default")))
|
||||
# define dll_import
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) && !defined(__MINGW32__)
|
||||
#if !defined(__noinline)
|
||||
#define __noinline __declspec(noinline)
|
||||
#endif
|
||||
//#define __forceinline __forceinline
|
||||
//#define __restrict __restrict
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define __restrict__ __restrict
|
||||
#else
|
||||
#define __restrict__ //__restrict // causes issues with MSVC
|
||||
#endif
|
||||
#if !defined(__thread) && !defined(__INTEL_LLVM_COMPILER)
|
||||
#define __thread __declspec(thread)
|
||||
#endif
|
||||
#if !defined(__aligned)
|
||||
#define __aligned(...) __declspec(align(__VA_ARGS__))
|
||||
#endif
|
||||
//#define __FUNCTION__ __FUNCTION__
|
||||
#define debugbreak() __debugbreak()
|
||||
|
||||
#else
|
||||
#if !defined(__noinline)
|
||||
#define __noinline __attribute__((noinline))
|
||||
#endif
|
||||
#if !defined(__forceinline)
|
||||
#define __forceinline inline __attribute__((always_inline))
|
||||
#endif
|
||||
//#define __restrict __restrict
|
||||
//#define __thread __thread
|
||||
#if !defined(__aligned)
|
||||
#define __aligned(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
#endif
|
||||
#if !defined(__FUNCTION__)
|
||||
#define __FUNCTION__ __PRETTY_FUNCTION__
|
||||
#endif
|
||||
#define debugbreak() asm ("int $3")
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define MAYBE_UNUSED __attribute__((unused))
|
||||
#else
|
||||
#define MAYBE_UNUSED
|
||||
#endif
|
||||
|
||||
#if !defined(_unused)
|
||||
#define _unused(x) ((void)(x))
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900) // before VS2015 deleted functions are not supported properly
|
||||
#define DELETED
|
||||
#else
|
||||
#define DELETED = delete
|
||||
#endif
|
||||
|
||||
#if !defined(likely)
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) || defined(__SYCL_DEVICE_ONLY__)
|
||||
#define likely(expr) (expr)
|
||||
#define unlikely(expr) (expr)
|
||||
#else
|
||||
#define likely(expr) __builtin_expect((bool)(expr),true )
|
||||
#define unlikely(expr) __builtin_expect((bool)(expr),false)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Error handling and debugging
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* debug printing macros */
|
||||
#define STRING(x) #x
|
||||
#define TOSTRING(x) STRING(x)
|
||||
#define PING embree_cout_uniform << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
|
||||
#define PRINT(x) embree_cout << STRING(x) << " = " << (x) << embree_endl
|
||||
#define PRINT2(x,y) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
|
||||
#define PRINT3(x,y,z) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
|
||||
#define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
|
||||
|
||||
#define UPRINT(x) embree_cout_uniform << STRING(x) << " = " << (x) << embree_endl
|
||||
#define UPRINT2(x,y) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
|
||||
#define UPRINT3(x,y,z) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
|
||||
#define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
|
||||
|
||||
#if defined(DEBUG) // only report file and line in debug mode
|
||||
#define THROW_RUNTIME_ERROR(str) \
|
||||
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
|
||||
//throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
|
||||
#else
|
||||
#define THROW_RUNTIME_ERROR(str) \
|
||||
abort(); //throw std::runtime_error(str);
|
||||
#endif
|
||||
|
||||
#define FATAL(x) THROW_RUNTIME_ERROR(x)
|
||||
#define WARNING(x) { std::cerr << "Warning: " << x << embree_endl << std::flush; }
|
||||
|
||||
#define NOT_IMPLEMENTED FATAL(std::string(__FUNCTION__) + " not implemented")
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Basic types
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* default floating-point type */
|
||||
namespace embree {
|
||||
typedef float real;
|
||||
}
|
||||
|
||||
/* windows does not have ssize_t */
|
||||
#if defined(__WIN32__)
|
||||
#if defined(__64BIT__)
|
||||
typedef int64_t ssize_t;
|
||||
#else
|
||||
typedef int32_t ssize_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Basic utility functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline std::string toString(long long value) {
|
||||
return std::to_string(value);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Disable some compiler warnings
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
//#pragma warning(disable:265 ) // floating-point operation result is out of range
|
||||
//#pragma warning(disable:383 ) // value copied to temporary, reference to temporary used
|
||||
//#pragma warning(disable:869 ) // parameter was never referenced
|
||||
//#pragma warning(disable:981 ) // operands are evaluated in unspecified order
|
||||
//#pragma warning(disable:1418) // external function definition with no prior declaration
|
||||
//#pragma warning(disable:1419) // external declaration in primary source file
|
||||
//#pragma warning(disable:1572) // floating-point equality and inequality comparisons are unreliable
|
||||
//#pragma warning(disable:94 ) // the size of an array must be greater than zero
|
||||
//#pragma warning(disable:1599) // declaration hides parameter
|
||||
//#pragma warning(disable:424 ) // extra ";" ignored
|
||||
#pragma warning(disable:2196) // routine is both "inline" and "noinline"
|
||||
//#pragma warning(disable:177 ) // label was declared but never referenced
|
||||
//#pragma warning(disable:114 ) // function was referenced but not defined
|
||||
//#pragma warning(disable:819 ) // template nesting depth does not match the previous declaration of function
|
||||
#pragma warning(disable:15335) // was not vectorized: vectorization possible but seems inefficient
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
//#pragma warning(disable:4200) // nonstandard extension used : zero-sized array in struct/union
|
||||
#pragma warning(disable:4800) // forcing value to bool 'true' or 'false' (performance warning)
|
||||
//#pragma warning(disable:4267) // '=' : conversion from 'size_t' to 'unsigned long', possible loss of data
|
||||
#pragma warning(disable:4244) // 'argument' : conversion from 'ssize_t' to 'unsigned int', possible loss of data
|
||||
#pragma warning(disable:4267) // conversion from 'size_t' to 'const int', possible loss of data
|
||||
//#pragma warning(disable:4355) // 'this' : used in base member initializer list
|
||||
//#pragma warning(disable:391 ) // '<=' : signed / unsigned mismatch
|
||||
//#pragma warning(disable:4018) // '<' : signed / unsigned mismatch
|
||||
//#pragma warning(disable:4305) // 'initializing' : truncation from 'double' to 'float'
|
||||
//#pragma warning(disable:4068) // unknown pragma
|
||||
//#pragma warning(disable:4146) // unary minus operator applied to unsigned type, result still unsigned
|
||||
//#pragma warning(disable:4838) // conversion from 'unsigned int' to 'const int' requires a narrowing conversion)
|
||||
//#pragma warning(disable:4227) // anachronism used : qualifiers on reference are ignored
|
||||
#pragma warning(disable:4503) // decorated name length exceeded, name was truncated
|
||||
#pragma warning(disable:4180) // qualifier applied to function type has no meaning; ignored
|
||||
#pragma warning(disable:4258) // definition from the for loop is ignored; the definition from the enclosing scope is used
|
||||
|
||||
# if _MSC_VER < 1910 // prior to Visual studio 2017 (V141)
|
||||
# pragma warning(disable:4101) // warning C4101: 'x': unreferenced local variable // a compiler bug issues wrong warnings
|
||||
# pragma warning(disable:4789) // buffer '' of size 8 bytes will be overrun; 32 bytes will be written starting at offset 0
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && !defined(__INTEL_COMPILER)
|
||||
//#pragma clang diagnostic ignored "-Wunknown-pragmas"
|
||||
//#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
//#pragma clang diagnostic ignored "-Wreorder"
|
||||
//#pragma clang diagnostic ignored "-Wmicrosoft"
|
||||
//#pragma clang diagnostic ignored "-Wunused-private-field"
|
||||
//#pragma clang diagnostic ignored "-Wunused-local-typedef"
|
||||
//#pragma clang diagnostic ignored "-Wunused-function"
|
||||
//#pragma clang diagnostic ignored "-Wnarrowing"
|
||||
//#pragma clang diagnostic ignored "-Wc++11-narrowing"
|
||||
//#pragma clang diagnostic ignored "-Wdeprecated-register"
|
||||
//#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wpragmas"
|
||||
//#pragma GCC diagnostic ignored "-Wnarrowing"
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
//#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
//#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#pragma GCC diagnostic ignored "-Wattributes"
|
||||
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
#pragma GCC diagnostic ignored "-Wparentheses"
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && defined(__WIN32__)
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
#pragma clang diagnostic ignored "-Wmicrosoft-cast"
|
||||
#pragma clang diagnostic ignored "-Wmicrosoft-enum-value"
|
||||
#pragma clang diagnostic ignored "-Wmicrosoft-include"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wunknown-pragmas"
|
||||
#endif
|
||||
|
||||
/* disabling deprecated warning, please use only where use of deprecated Embree API functions is desired */
|
||||
#if defined(__WIN32__) && defined(__INTEL_COMPILER)
|
||||
#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 1478)) // warning: function was declared deprecated
|
||||
#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable: 1478)) // warning: function was declared deprecated
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#define DISABLE_DEPRECATED_WARNING _Pragma("warning (disable: 1478)") // warning: function was declared deprecated
|
||||
#define ENABLE_DEPRECATED_WARNING _Pragma("warning (enable : 1478)") // warning: function was declared deprecated
|
||||
#elif defined(__clang__)
|
||||
#define DISABLE_DEPRECATED_WARNING _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
|
||||
#define ENABLE_DEPRECATED_WARNING _Pragma("clang diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
|
||||
#elif defined(__GNUC__)
|
||||
#define DISABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
|
||||
#define ENABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
|
||||
#elif defined(_MSC_VER)
|
||||
#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 4996)) // warning: function was declared deprecated
|
||||
#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable : 4996)) // warning: function was declared deprecated
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SYCL specific
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
#define sycl_printf0(format, ...) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
|
||||
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
|
||||
}
|
||||
|
||||
#define sycl_printf0_(format) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
|
||||
sycl::ext::oneapi::experimental::printf(fmt); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define sycl_printf0(format, ...) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
|
||||
}
|
||||
|
||||
#define sycl_printf0_(format) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define sycl_printf(format, ...) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
|
||||
}
|
||||
|
||||
#define sycl_printf_(format) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt); \
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct sycl_ostream_ {
|
||||
sycl_ostream_ (bool uniform) : uniform(uniform) {}
|
||||
bool uniform = false;
|
||||
};
|
||||
struct sycl_endl_ {};
|
||||
|
||||
#define embree_ostream embree::sycl_ostream_
|
||||
#define embree_cout embree::sycl_ostream_(false)
|
||||
#define embree_cout_uniform embree::sycl_ostream_(true)
|
||||
#define embree_endl embree::sycl_endl_()
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, int i)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%i",i);
|
||||
}
|
||||
else
|
||||
sycl_printf("%i ",i);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, unsigned int i)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%u",i);
|
||||
} else
|
||||
sycl_printf("%u ",i);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, float f)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%f",f);
|
||||
} else
|
||||
sycl_printf("%f ",f);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, double d)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%f",d);
|
||||
} else
|
||||
sycl_printf("%f ",d);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, uint64_t l)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%lu",l);
|
||||
} else
|
||||
sycl_printf("%lu ",l);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, long l)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%l",l);
|
||||
} else
|
||||
sycl_printf("%l ",l);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, void* p)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%p",p);
|
||||
} else
|
||||
sycl_printf("%p ",p);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, const char* c)
|
||||
{
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%s",c);
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, sycl_endl_)
|
||||
{
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf_("\n");
|
||||
return cout;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define embree_ostream std::ostream&
|
||||
#define embree_cout std::cout
|
||||
#define embree_cout_uniform std::cout
|
||||
#define embree_endl std::endl
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
/* printing out sycle vector types */
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float4& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float3& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float2& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int4& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int3& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int2& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint4& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint3& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint2& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << ")";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
inline void tab(std::ostream& cout, int n) {
|
||||
for (int i=0; i<n; i++) cout << " ";
|
||||
}
|
||||
|
||||
inline std::string tab(int depth) {
|
||||
return std::string(2*depth,' ');
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Some macros for static profiling
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined (__GNUC__)
|
||||
#define IACA_SSC_MARK( MARK_ID ) \
|
||||
__asm__ __volatile__ ( \
|
||||
"\n\t movl $"#MARK_ID", %%ebx" \
|
||||
"\n\t .byte 0x64, 0x67, 0x90" \
|
||||
: : : "memory" );
|
||||
|
||||
#define IACA_UD_BYTES __asm__ __volatile__ ("\n\t .byte 0x0F, 0x0B");
|
||||
|
||||
#else
|
||||
#define IACA_UD_BYTES {__asm _emit 0x0F \
|
||||
__asm _emit 0x0B}
|
||||
|
||||
#define IACA_SSC_MARK(x) {__asm mov ebx, x\
|
||||
__asm _emit 0x64 \
|
||||
__asm _emit 0x67 \
|
||||
__asm _emit 0x90 }
|
||||
|
||||
#define IACA_VC64_START __writegsbyte(111, 111);
|
||||
#define IACA_VC64_END __writegsbyte(222, 222);
|
||||
|
||||
#endif
|
||||
|
||||
#define IACA_START {IACA_UD_BYTES \
|
||||
IACA_SSC_MARK(111)}
|
||||
#define IACA_END {IACA_SSC_MARK(222) \
|
||||
IACA_UD_BYTES}
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Closure>
|
||||
struct OnScopeExitHelper
|
||||
{
|
||||
OnScopeExitHelper (const Closure f) : active(true), f(f) {}
|
||||
~OnScopeExitHelper() { if (active) f(); }
|
||||
void deactivate() { active = false; }
|
||||
bool active;
|
||||
const Closure f;
|
||||
};
|
||||
|
||||
template <typename Closure>
|
||||
OnScopeExitHelper<Closure> OnScopeExit(const Closure f) {
|
||||
return OnScopeExitHelper<Closure>(f);
|
||||
}
|
||||
|
||||
#define STRING_JOIN2(arg1, arg2) DO_STRING_JOIN2(arg1, arg2)
|
||||
#define DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
|
||||
#define ON_SCOPE_EXIT(code) \
|
||||
auto STRING_JOIN2(on_scope_exit_, __LINE__) = OnScopeExit([&](){code;})
|
||||
|
||||
template<typename Ty>
|
||||
std::unique_ptr<Ty> make_unique(Ty* ptr) {
|
||||
return std::unique_ptr<Ty>(ptr);
|
||||
}
|
||||
|
||||
}
|
122
thirdparty/embree/common/sys/ref.h
vendored
Normal file
122
thirdparty/embree/common/sys/ref.h
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "atomic.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct NullTy {
|
||||
};
|
||||
|
||||
extern MAYBE_UNUSED NullTy null;
|
||||
|
||||
class RefCount
|
||||
{
|
||||
public:
|
||||
RefCount(int val = 0) : refCounter(val) {}
|
||||
virtual ~RefCount() {};
|
||||
|
||||
virtual RefCount* refInc() { refCounter.fetch_add(1); return this; }
|
||||
virtual void refDec() { if (refCounter.fetch_add(-1) == 1) delete this; }
|
||||
private:
|
||||
std::atomic<size_t> refCounter;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Reference to single object
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename Type>
|
||||
class Ref
|
||||
{
|
||||
public:
|
||||
Type* ptr;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constructors, Assignment & Cast Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Ref() : ptr(nullptr) {}
|
||||
__forceinline Ref(NullTy) : ptr(nullptr) {}
|
||||
__forceinline Ref(const Ref& input) : ptr(input.ptr) { if (ptr) ptr->refInc(); }
|
||||
__forceinline Ref(Ref&& input) : ptr(input.ptr) { input.ptr = nullptr; }
|
||||
|
||||
__forceinline Ref(Type* const input) : ptr(input)
|
||||
{
|
||||
if (ptr)
|
||||
ptr->refInc();
|
||||
}
|
||||
|
||||
__forceinline ~Ref()
|
||||
{
|
||||
if (ptr)
|
||||
ptr->refDec();
|
||||
}
|
||||
|
||||
__forceinline Ref& operator =(const Ref& input)
|
||||
{
|
||||
if (input.ptr)
|
||||
input.ptr->refInc();
|
||||
if (ptr)
|
||||
ptr->refDec();
|
||||
ptr = input.ptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline Ref& operator =(Ref&& input)
|
||||
{
|
||||
if (ptr)
|
||||
ptr->refDec();
|
||||
ptr = input.ptr;
|
||||
input.ptr = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline Ref& operator =(Type* const input)
|
||||
{
|
||||
if (input)
|
||||
input->refInc();
|
||||
if (ptr)
|
||||
ptr->refDec();
|
||||
ptr = input;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline Ref& operator =(NullTy)
|
||||
{
|
||||
if (ptr)
|
||||
ptr->refDec();
|
||||
ptr = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline operator bool() const { return ptr != nullptr; }
|
||||
|
||||
__forceinline const Type& operator *() const { return *ptr; }
|
||||
__forceinline Type& operator *() { return *ptr; }
|
||||
__forceinline const Type* operator ->() const { return ptr; }
|
||||
__forceinline Type* operator ->() { return ptr; }
|
||||
|
||||
template<typename TypeOut>
|
||||
__forceinline Ref<TypeOut> cast() { return Ref<TypeOut>(static_cast<TypeOut*>(ptr)); }
|
||||
template<typename TypeOut>
|
||||
__forceinline const Ref<TypeOut> cast() const { return Ref<TypeOut>(static_cast<TypeOut*>(ptr)); }
|
||||
|
||||
template<typename TypeOut>
|
||||
__forceinline Ref<TypeOut> dynamicCast() { return Ref<TypeOut>(dynamic_cast<TypeOut*>(ptr)); }
|
||||
template<typename TypeOut>
|
||||
__forceinline const Ref<TypeOut> dynamicCast() const { return Ref<TypeOut>(dynamic_cast<TypeOut*>(ptr)); }
|
||||
};
|
||||
|
||||
template<typename Type> __forceinline bool operator < (const Ref<Type>& a, const Ref<Type>& b) { return a.ptr < b.ptr; }
|
||||
|
||||
template<typename Type> __forceinline bool operator ==(const Ref<Type>& a, NullTy ) { return a.ptr == nullptr; }
|
||||
template<typename Type> __forceinline bool operator ==(NullTy , const Ref<Type>& b) { return nullptr == b.ptr; }
|
||||
template<typename Type> __forceinline bool operator ==(const Ref<Type>& a, const Ref<Type>& b) { return a.ptr == b.ptr; }
|
||||
|
||||
template<typename Type> __forceinline bool operator !=(const Ref<Type>& a, NullTy ) { return a.ptr != nullptr; }
|
||||
template<typename Type> __forceinline bool operator !=(NullTy , const Ref<Type>& b) { return nullptr != b.ptr; }
|
||||
template<typename Type> __forceinline bool operator !=(const Ref<Type>& a, const Ref<Type>& b) { return a.ptr != b.ptr; }
|
||||
}
|
30
thirdparty/embree/common/sys/regression.cpp
vendored
Normal file
30
thirdparty/embree/common/sys/regression.cpp
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* registerRegressionTest is invoked from static initializers, thus
|
||||
* we cannot have the regression_tests variable as global static
|
||||
* variable due to issues with static variable initialization
|
||||
* order. */
|
||||
std::vector<RegressionTest*>& get_regression_tests()
|
||||
{
|
||||
static std::vector<RegressionTest*> regression_tests;
|
||||
return regression_tests;
|
||||
}
|
||||
|
||||
void registerRegressionTest(RegressionTest* test)
|
||||
{
|
||||
get_regression_tests().push_back(test);
|
||||
}
|
||||
|
||||
RegressionTest* getRegressionTest(size_t index)
|
||||
{
|
||||
if (index >= get_regression_tests().size())
|
||||
return nullptr;
|
||||
|
||||
return get_regression_tests()[index];
|
||||
}
|
||||
}
|
25
thirdparty/embree/common/sys/regression.h
vendored
Normal file
25
thirdparty/embree/common/sys/regression.h
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! virtual interface for all regression tests */
|
||||
struct RegressionTest
|
||||
{
|
||||
RegressionTest (std::string name) : name(name) {}
|
||||
virtual bool run() = 0;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
/*! registers a regression test */
|
||||
void registerRegressionTest(RegressionTest* test);
|
||||
|
||||
/*! run all regression tests */
|
||||
RegressionTest* getRegressionTest(size_t index);
|
||||
}
|
302
thirdparty/embree/common/sys/sycl.h
vendored
Normal file
302
thirdparty/embree/common/sys/sycl.h
vendored
Normal file
@@ -0,0 +1,302 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
using sycl::float16;
|
||||
using sycl::float8;
|
||||
using sycl::float4;
|
||||
using sycl::float3;
|
||||
using sycl::float2;
|
||||
using sycl::int16;
|
||||
using sycl::int8;
|
||||
using sycl::int4;
|
||||
using sycl::int3;
|
||||
using sycl::int2;
|
||||
using sycl::uint16;
|
||||
using sycl::uint8;
|
||||
using sycl::uint4;
|
||||
using sycl::uint3;
|
||||
using sycl::uint2;
|
||||
using sycl::uchar16;
|
||||
using sycl::uchar8;
|
||||
using sycl::uchar4;
|
||||
using sycl::uchar3;
|
||||
using sycl::uchar2;
|
||||
using sycl::ushort16;
|
||||
using sycl::ushort8;
|
||||
using sycl::ushort4;
|
||||
using sycl::ushort3;
|
||||
using sycl::ushort2;
|
||||
|
||||
#ifdef __SYCL_DEVICE_ONLY__
|
||||
#define GLOBAL __attribute__((opencl_global))
|
||||
#define LOCAL __attribute__((opencl_local))
|
||||
|
||||
SYCL_EXTERNAL extern int work_group_reduce_add(int x);
|
||||
SYCL_EXTERNAL extern float work_group_reduce_min(float x);
|
||||
SYCL_EXTERNAL extern float work_group_reduce_max(float x);
|
||||
|
||||
SYCL_EXTERNAL extern float atomic_min(volatile GLOBAL float *p, float val);
|
||||
SYCL_EXTERNAL extern float atomic_min(volatile LOCAL float *p, float val);
|
||||
SYCL_EXTERNAL extern float atomic_max(volatile GLOBAL float *p, float val);
|
||||
SYCL_EXTERNAL extern float atomic_max(volatile LOCAL float *p, float val);
|
||||
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_sub_group_ballot(bool valid);
|
||||
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_assume_uniform(void *p);
|
||||
|
||||
// Load message caching control
|
||||
|
||||
enum LSC_LDCC {
|
||||
LSC_LDCC_DEFAULT,
|
||||
LSC_LDCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
|
||||
LSC_LDCC_L1UC_L3C, // Override to L1 uncached and L3 cached
|
||||
LSC_LDCC_L1C_L3UC, // Override to L1 cached and L3 uncached
|
||||
LSC_LDCC_L1C_L3C, // Override to L1 cached and L3 cached
|
||||
LSC_LDCC_L1S_L3UC, // Override to L1 streaming load and L3 uncached
|
||||
LSC_LDCC_L1S_L3C, // Override to L1 streaming load and L3 cached
|
||||
LSC_LDCC_L1IAR_L3C, // Override to L1 invalidate-after-read, and L3 cached
|
||||
};
|
||||
|
||||
|
||||
|
||||
// Store message caching control (also used for atomics)
|
||||
|
||||
enum LSC_STCC {
|
||||
LSC_STCC_DEFAULT,
|
||||
LSC_STCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
|
||||
LSC_STCC_L1UC_L3WB, // Override to L1 uncached and L3 written back
|
||||
LSC_STCC_L1WT_L3UC, // Override to L1 written through and L3 uncached
|
||||
LSC_STCC_L1WT_L3WB, // Override to L1 written through and L3 written back
|
||||
LSC_STCC_L1S_L3UC, // Override to L1 streaming and L3 uncached
|
||||
LSC_STCC_L1S_L3WB, // Override to L1 streaming and L3 written back
|
||||
LSC_STCC_L1WB_L3WB, // Override to L1 written through and L3 written back
|
||||
};
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
// LSC Loads
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uchar_to_uint (const GLOBAL uint8_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D8U32
|
||||
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_ushort_to_uint(const GLOBAL uint16_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D16U32
|
||||
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uint (const GLOBAL uint32_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V1
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint2 __builtin_IB_lsc_load_global_uint2 (const GLOBAL sycl::uint2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V2
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint3 __builtin_IB_lsc_load_global_uint3 (const GLOBAL sycl::uint3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V3
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint4 __builtin_IB_lsc_load_global_uint4 (const GLOBAL sycl::uint4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V4
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint8 __builtin_IB_lsc_load_global_uint8 (const GLOBAL sycl::uint8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V8
|
||||
SYCL_EXTERNAL /* extern "C" */ uint64_t __builtin_IB_lsc_load_global_ulong (const GLOBAL uint64_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V1
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong2 __builtin_IB_lsc_load_global_ulong2 (const GLOBAL sycl::ulong2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V2
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong3 __builtin_IB_lsc_load_global_ulong3 (const GLOBAL sycl::ulong3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V3
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong4 __builtin_IB_lsc_load_global_ulong4 (const GLOBAL sycl::ulong4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V4
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong8 __builtin_IB_lsc_load_global_ulong8 (const GLOBAL sycl::ulong8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V8
|
||||
|
||||
// global address space
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uchar_from_uint (GLOBAL uint8_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D8U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ushort_from_uint(GLOBAL uint16_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D16U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint (GLOBAL uint32_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D32V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint2 (GLOBAL sycl::uint2 *base, int immElemOff, sycl::uint2 val, enum LSC_STCC cacheOpt); //D32V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint3 (GLOBAL sycl::uint3 *base, int immElemOff, sycl::uint3 val, enum LSC_STCC cacheOpt); //D32V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint4 (GLOBAL sycl::uint4 *base, int immElemOff, sycl::uint4 val, enum LSC_STCC cacheOpt); //D32V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint8 (GLOBAL sycl::uint8 *base, int immElemOff, sycl::uint8 val, enum LSC_STCC cacheOpt); //D32V8
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong (GLOBAL uint64_t *base, int immElemOff, uint64_t val, enum LSC_STCC cacheOpt); //D64V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong2 (GLOBAL sycl::ulong2 *base, int immElemOff, sycl::ulong2 val, enum LSC_STCC cacheOpt); //D64V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong3 (GLOBAL sycl::ulong3 *base, int immElemOff, sycl::ulong3 val, enum LSC_STCC cacheOpt); //D64V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong4 (GLOBAL sycl::ulong4 *base, int immElemOff, sycl::ulong4 val, enum LSC_STCC cacheOpt); //D64V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong8 (GLOBAL sycl::ulong8 *base, int immElemOff, sycl::ulong8 val, enum LSC_STCC cacheOpt); //D64V8
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// prefetching
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// LSC Pre-Fetch Load functions with CacheControls
|
||||
// global address space
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uchar (const GLOBAL uint8_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D8U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ushort(const GLOBAL uint16_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D16U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint (const GLOBAL uint32_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint2 (const GLOBAL sycl::uint2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint3 (const GLOBAL sycl::uint3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint4 (const GLOBAL sycl::uint4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint8 (const GLOBAL sycl::uint8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V8
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong (const GLOBAL uint64_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong2(const GLOBAL sycl::ulong2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong3(const GLOBAL sycl::ulong3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong4(const GLOBAL sycl::ulong4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong8(const GLOBAL sycl::ulong8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V8
|
||||
|
||||
#else
|
||||
|
||||
#define GLOBAL
|
||||
#define LOCAL
|
||||
|
||||
/* dummy functions for host */
|
||||
inline int work_group_reduce_add(int x) { return x; }
|
||||
inline float work_group_reduce_min(float x) { return x; }
|
||||
inline float work_group_reduce_max(float x) { return x; }
|
||||
|
||||
inline float atomic_min(volatile float *p, float val) { return val; };
|
||||
inline float atomic_max(volatile float *p, float val) { return val; };
|
||||
|
||||
inline uint32_t intel_sub_group_ballot(bool valid) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
/* creates a temporary that is enforced to be uniform */
|
||||
#define SYCL_UNIFORM_VAR(Ty,tmp,k) \
|
||||
Ty tmp##_data; \
|
||||
Ty* p##tmp##_data = (Ty*) sub_group_broadcast((uint64_t)&tmp##_data,k); \
|
||||
Ty& tmp = *p##tmp##_data;
|
||||
|
||||
#if !defined(__forceinline)
|
||||
#define __forceinline inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
#if __SYCL_COMPILER_VERSION < 20210801
|
||||
#define all_of_group all_of
|
||||
#define any_of_group any_of
|
||||
#define none_of_group none_of
|
||||
#define group_broadcast broadcast
|
||||
#define reduce_over_group reduce
|
||||
#define exclusive_scan_over_group exclusive_scan
|
||||
#define inclusive_scan_over_group inclusive_scan
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename T>
|
||||
__forceinline T cselect(const bool mask, const T &a, const T &b)
|
||||
{
|
||||
return sycl::select(b,a,(int)mask);
|
||||
}
|
||||
|
||||
template<typename T, typename M>
|
||||
__forceinline T cselect(const M &mask, const T &a, const T &b)
|
||||
{
|
||||
return sycl::select(b,a,mask);
|
||||
}
|
||||
|
||||
#define XSTR(x) STR(x)
|
||||
#define STR(x) #x
|
||||
|
||||
__forceinline const sycl::sub_group this_sub_group() {
|
||||
#if __LIBSYCL_MAJOR_VERSION >= 8
|
||||
return sycl::ext::oneapi::this_work_item::get_sub_group();
|
||||
#else
|
||||
return sycl::ext::oneapi::experimental::this_sub_group();
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_sub_group_local_id() {
|
||||
return this_sub_group().get_local_id()[0];
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_sub_group_size() {
|
||||
return this_sub_group().get_max_local_range().size();
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_sub_group_id() {
|
||||
return this_sub_group().get_group_id()[0];
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_num_sub_groups() {
|
||||
return this_sub_group().get_group_range().size();
|
||||
}
|
||||
|
||||
__forceinline uint32_t sub_group_ballot(bool pred) {
|
||||
return intel_sub_group_ballot(pred);
|
||||
}
|
||||
|
||||
__forceinline bool sub_group_all_of(bool pred) {
|
||||
return sycl::all_of_group(this_sub_group(),pred);
|
||||
}
|
||||
|
||||
__forceinline bool sub_group_any_of(bool pred) {
|
||||
return sycl::any_of_group(this_sub_group(),pred);
|
||||
}
|
||||
|
||||
__forceinline bool sub_group_none_of(bool pred) {
|
||||
return sycl::none_of_group(this_sub_group(),pred);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_broadcast(T x, sycl::id<1> local_id) {
|
||||
return sycl::group_broadcast<sycl::sub_group>(this_sub_group(),x,local_id);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_make_uniform(T x) {
|
||||
return sub_group_broadcast(x,sycl::ctz(intel_sub_group_ballot(true)));
|
||||
}
|
||||
|
||||
__forceinline void assume_uniform_array(void* ptr) {
|
||||
#ifdef __SYCL_DEVICE_ONLY__
|
||||
__builtin_IB_assume_uniform(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, BinaryOperation binary_op) {
|
||||
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, T init, BinaryOperation binary_op) {
|
||||
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,init,binary_op);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_min(T x, T init) {
|
||||
return sub_group_reduce(x, init, sycl::ext::oneapi::minimum<T>());
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_min(T x) {
|
||||
return sub_group_reduce(x, sycl::ext::oneapi::minimum<T>());
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_max(T x) {
|
||||
return sub_group_reduce(x, sycl::ext::oneapi::maximum<T>());
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_add(T x) {
|
||||
return sub_group_reduce(x, sycl::ext::oneapi::plus<T>());
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, BinaryOperation binary_op) {
|
||||
return sycl::exclusive_scan_over_group(this_sub_group(),x,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan_min(T x) {
|
||||
return sub_group_exclusive_scan(x,sycl::ext::oneapi::minimum<T>());
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, T init, BinaryOperation binary_op) {
|
||||
return sycl::exclusive_scan_over_group(this_sub_group(),x,init,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op) {
|
||||
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op, T init) {
|
||||
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_load(const void* src) {
|
||||
return this_sub_group().load(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)src));
|
||||
}
|
||||
|
||||
template <typename T> __forceinline void sub_group_store(void* dst, const T& x) {
|
||||
this_sub_group().store(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)dst),x);
|
||||
}
|
||||
}
|
||||
|
||||
#if __SYCL_COMPILER_VERSION < 20210801
|
||||
#undef all_of_group
|
||||
#undef any_of_group
|
||||
#undef none_of_group
|
||||
#undef group_broadcast
|
||||
#undef reduce_over_group
|
||||
#undef exclusive_scan_over_group
|
||||
#undef inclusive_scan_over_group
|
||||
#endif
|
727
thirdparty/embree/common/sys/sysinfo.cpp
vendored
Normal file
727
thirdparty/embree/common/sys/sysinfo.cpp
vendored
Normal file
@@ -0,0 +1,727 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
#include "sysinfo.h"
|
||||
#include "intrinsics.h"
|
||||
#include "estring.h"
|
||||
#include "ref.h"
|
||||
#if defined(__FREEBSD__)
|
||||
#include <sys/cpuset.h>
|
||||
#include <pthread_np.h>
|
||||
typedef cpuset_t cpu_set_t;
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// All Platforms
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace embree
|
||||
{
|
||||
NullTy null;
|
||||
|
||||
std::string getPlatformName()
|
||||
{
|
||||
#if defined(__ANDROID__) && !defined(__64BIT__)
|
||||
return "Android (32bit)";
|
||||
#elif defined(__ANDROID__) && defined(__64BIT__)
|
||||
return "Android (64bit)";
|
||||
#elif defined(__LINUX__) && !defined(__64BIT__)
|
||||
return "Linux (32bit)";
|
||||
#elif defined(__LINUX__) && defined(__64BIT__)
|
||||
return "Linux (64bit)";
|
||||
#elif defined(__FREEBSD__) && !defined(__64BIT__)
|
||||
return "FreeBSD (32bit)";
|
||||
#elif defined(__FREEBSD__) && defined(__64BIT__)
|
||||
return "FreeBSD (64bit)";
|
||||
#elif defined(__CYGWIN__) && !defined(__64BIT__)
|
||||
return "Cygwin (32bit)";
|
||||
#elif defined(__CYGWIN__) && defined(__64BIT__)
|
||||
return "Cygwin (64bit)";
|
||||
#elif defined(__WIN32__) && !defined(__64BIT__)
|
||||
return "Windows (32bit)";
|
||||
#elif defined(__WIN32__) && defined(__64BIT__)
|
||||
return "Windows (64bit)";
|
||||
#elif defined(__MACOSX__) && !defined(__64BIT__)
|
||||
return "Mac OS X (32bit)";
|
||||
#elif defined(__MACOSX__) && defined(__64BIT__)
|
||||
return "Mac OS X (64bit)";
|
||||
#elif defined(__UNIX__) && !defined(__64BIT__)
|
||||
return "Unix (32bit)";
|
||||
#elif defined(__UNIX__) && defined(__64BIT__)
|
||||
return "Unix (64bit)";
|
||||
#else
|
||||
return "Unknown";
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string getCompilerName()
|
||||
{
|
||||
#if defined(__INTEL_COMPILER)
|
||||
int icc_mayor = __INTEL_COMPILER / 100 % 100;
|
||||
int icc_minor = __INTEL_COMPILER % 100;
|
||||
std::string version = "Intel Compiler ";
|
||||
version += toString(icc_mayor);
|
||||
version += "." + toString(icc_minor);
|
||||
#if defined(__INTEL_COMPILER_UPDATE)
|
||||
version += "." + toString(__INTEL_COMPILER_UPDATE);
|
||||
#endif
|
||||
return version;
|
||||
#elif defined(__clang__)
|
||||
return "CLANG " __clang_version__;
|
||||
#elif defined (__GNUC__)
|
||||
return "GCC " __VERSION__;
|
||||
#elif defined(_MSC_VER)
|
||||
std::string version = toString(_MSC_FULL_VER);
|
||||
version.insert(4,".");
|
||||
version.insert(9,".");
|
||||
version.insert(2,".");
|
||||
return "Visual C++ Compiler " + version;
|
||||
#else
|
||||
return "Unknown Compiler";
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string getCPUVendor()
|
||||
{
|
||||
#if defined(__X86_ASM__)
|
||||
int cpuinfo[4];
|
||||
__cpuid (cpuinfo, 0);
|
||||
int name[4];
|
||||
name[0] = cpuinfo[1];
|
||||
name[1] = cpuinfo[3];
|
||||
name[2] = cpuinfo[2];
|
||||
name[3] = 0;
|
||||
return (char*)name;
|
||||
#elif defined(__ARM_NEON)
|
||||
return "ARM";
|
||||
#else
|
||||
return "Unknown";
|
||||
#endif
|
||||
}
|
||||
|
||||
CPU getCPUModel()
|
||||
{
|
||||
#if defined(__X86_ASM__)
|
||||
if (getCPUVendor() != "GenuineIntel")
|
||||
return CPU::UNKNOWN;
|
||||
|
||||
int out[4];
|
||||
__cpuid(out, 0);
|
||||
if (out[0] < 1) return CPU::UNKNOWN;
|
||||
__cpuid(out, 1);
|
||||
|
||||
/* please see CPUID documentation for these formulas */
|
||||
uint32_t family_ID = (out[0] >> 8) & 0x0F;
|
||||
uint32_t extended_family_ID = (out[0] >> 20) & 0xFF;
|
||||
|
||||
uint32_t model_ID = (out[0] >> 4) & 0x0F;
|
||||
uint32_t extended_model_ID = (out[0] >> 16) & 0x0F;
|
||||
|
||||
uint32_t DisplayFamily = family_ID;
|
||||
if (family_ID == 0x0F)
|
||||
DisplayFamily += extended_family_ID;
|
||||
|
||||
uint32_t DisplayModel = model_ID;
|
||||
if (family_ID == 0x06 || family_ID == 0x0F)
|
||||
DisplayModel += extended_model_ID << 4;
|
||||
|
||||
uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0);
|
||||
|
||||
// Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel)
|
||||
if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE;
|
||||
if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL;
|
||||
if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE;
|
||||
if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE;
|
||||
if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE;
|
||||
if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE;
|
||||
if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE;
|
||||
if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM;
|
||||
if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM;
|
||||
if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM;
|
||||
if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM;
|
||||
if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM;
|
||||
if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM;
|
||||
if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM;
|
||||
if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2;
|
||||
if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2;
|
||||
if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1;
|
||||
|
||||
if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL;
|
||||
if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING;
|
||||
|
||||
#elif defined(__ARM_NEON)
|
||||
return CPU::ARM;
|
||||
#endif
|
||||
|
||||
return CPU::UNKNOWN;
|
||||
}
|
||||
|
||||
std::string stringOfCPUModel(CPU model)
|
||||
{
|
||||
switch (model) {
|
||||
case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake";
|
||||
case CPU::CORE_ICE_LAKE : return "Core Ice Lake";
|
||||
case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake";
|
||||
case CPU::CORE_COMET_LAKE : return "Core Comet Lake";
|
||||
case CPU::CORE_CANNON_LAKE : return "Core Cannon Lake";
|
||||
case CPU::CORE_KABY_LAKE : return "Core Kaby Lake";
|
||||
case CPU::XEON_SKY_LAKE : return "Xeon Sky Lake";
|
||||
case CPU::CORE_SKY_LAKE : return "Core Sky Lake";
|
||||
case CPU::XEON_PHI_KNIGHTS_MILL : return "Xeon Phi Knights Mill";
|
||||
case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing";
|
||||
case CPU::XEON_BROADWELL : return "Xeon Broadwell";
|
||||
case CPU::CORE_BROADWELL : return "Core Broadwell";
|
||||
case CPU::XEON_HASWELL : return "Xeon Haswell";
|
||||
case CPU::CORE_HASWELL : return "Core Haswell";
|
||||
case CPU::XEON_IVY_BRIDGE : return "Xeon Ivy Bridge";
|
||||
case CPU::CORE_IVY_BRIDGE : return "Core Ivy Bridge";
|
||||
case CPU::SANDY_BRIDGE : return "Sandy Bridge";
|
||||
case CPU::NEHALEM : return "Nehalem";
|
||||
case CPU::CORE2 : return "Core2";
|
||||
case CPU::CORE1 : return "Core";
|
||||
case CPU::ARM : return "ARM";
|
||||
case CPU::UNKNOWN : return "Unknown CPU";
|
||||
}
|
||||
return "Unknown CPU (error)";
|
||||
}
|
||||
|
||||
#if defined(__X86_ASM__)
|
||||
/* constants to access destination registers of CPUID instruction */
|
||||
static const int EAX = 0;
|
||||
static const int EBX = 1;
|
||||
static const int ECX = 2;
|
||||
static const int EDX = 3;
|
||||
|
||||
/* cpuid[eax=1].ecx */
|
||||
static const int CPU_FEATURE_BIT_SSE3 = 1 << 0;
|
||||
static const int CPU_FEATURE_BIT_SSSE3 = 1 << 9;
|
||||
static const int CPU_FEATURE_BIT_FMA3 = 1 << 12;
|
||||
static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19;
|
||||
static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20;
|
||||
//static const int CPU_FEATURE_BIT_MOVBE = 1 << 22;
|
||||
static const int CPU_FEATURE_BIT_POPCNT = 1 << 23;
|
||||
//static const int CPU_FEATURE_BIT_XSAVE = 1 << 26;
|
||||
static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27;
|
||||
static const int CPU_FEATURE_BIT_AVX = 1 << 28;
|
||||
static const int CPU_FEATURE_BIT_F16C = 1 << 29;
|
||||
static const int CPU_FEATURE_BIT_RDRAND = 1 << 30;
|
||||
|
||||
/* cpuid[eax=1].edx */
|
||||
static const int CPU_FEATURE_BIT_SSE = 1 << 25;
|
||||
static const int CPU_FEATURE_BIT_SSE2 = 1 << 26;
|
||||
|
||||
/* cpuid[eax=0x80000001].ecx */
|
||||
static const int CPU_FEATURE_BIT_LZCNT = 1 << 5;
|
||||
|
||||
/* cpuid[eax=7,ecx=0].ebx */
|
||||
static const int CPU_FEATURE_BIT_BMI1 = 1 << 3;
|
||||
static const int CPU_FEATURE_BIT_AVX2 = 1 << 5;
|
||||
static const int CPU_FEATURE_BIT_BMI2 = 1 << 8;
|
||||
static const int CPU_FEATURE_BIT_AVX512F = 1 << 16; // AVX512F (foundation)
|
||||
static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17; // AVX512DQ (doubleword and quadword instructions)
|
||||
static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26; // AVX512PF (prefetch gather/scatter instructions)
|
||||
static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27; // AVX512ER (exponential and reciprocal instructions)
|
||||
static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28; // AVX512CD (conflict detection instructions)
|
||||
static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions)
|
||||
static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions)
|
||||
static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions)
|
||||
|
||||
/* cpuid[eax=7,ecx=0].ecx */
|
||||
static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions)
|
||||
#endif
|
||||
|
||||
#if defined(__X86_ASM__)
|
||||
__noinline int64_t get_xcr0()
|
||||
{
|
||||
#if defined (__WIN32__) && !defined (__MINGW32__) && defined(_XCR_XFEATURE_ENABLED_MASK)
|
||||
int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32
|
||||
xcr0 = _xgetbv(0);
|
||||
return xcr0;
|
||||
#else
|
||||
int xcr0 = 0;
|
||||
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
|
||||
return xcr0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
int getCPUFeatures()
|
||||
{
|
||||
#if defined(__X86_ASM__)
|
||||
/* cache CPU features access */
|
||||
static int cpu_features = 0;
|
||||
if (cpu_features)
|
||||
return cpu_features;
|
||||
|
||||
/* get number of CPUID leaves */
|
||||
int cpuid_leaf0[4];
|
||||
__cpuid(cpuid_leaf0, 0x00000000);
|
||||
unsigned nIds = cpuid_leaf0[EAX];
|
||||
|
||||
/* get number of extended CPUID leaves */
|
||||
int cpuid_leafe[4];
|
||||
__cpuid(cpuid_leafe, 0x80000000);
|
||||
unsigned nExIds = cpuid_leafe[EAX];
|
||||
|
||||
/* get CPUID leaves for EAX = 1,7, and 0x80000001 */
|
||||
int cpuid_leaf_1[4] = { 0,0,0,0 };
|
||||
int cpuid_leaf_7[4] = { 0,0,0,0 };
|
||||
int cpuid_leaf_e1[4] = { 0,0,0,0 };
|
||||
if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001);
|
||||
#if _WIN32
|
||||
#if _MSC_VER && (_MSC_FULL_VER < 160040219)
|
||||
#elif defined(_MSC_VER)
|
||||
if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0);
|
||||
#endif
|
||||
#else
|
||||
if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0);
|
||||
#endif
|
||||
if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001);
|
||||
|
||||
/* detect if OS saves XMM, YMM, and ZMM states */
|
||||
bool xmm_enabled = true;
|
||||
bool ymm_enabled = false;
|
||||
bool zmm_enabled = false;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) {
|
||||
int64_t xcr0 = get_xcr0();
|
||||
xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */
|
||||
ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */
|
||||
zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */
|
||||
}
|
||||
if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED;
|
||||
if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED;
|
||||
if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED;
|
||||
|
||||
if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE;
|
||||
if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT;
|
||||
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2;
|
||||
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3 ) cpu_features |= CPU_FEATURE_FMA3;
|
||||
if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT;
|
||||
if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1;
|
||||
if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2;
|
||||
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA;
|
||||
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL;
|
||||
if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
|
||||
|
||||
#if defined(__MACOSX__)
|
||||
if ( (cpu_features & CPU_FEATURE_AVX512F)
|
||||
|| (cpu_features & CPU_FEATURE_AVX512DQ)
|
||||
|| (cpu_features & CPU_FEATURE_AVX512CD)
|
||||
|| (cpu_features & CPU_FEATURE_AVX512BW)
|
||||
|| (cpu_features & CPU_FEATURE_AVX512VL) )
|
||||
{
|
||||
// on macOS AVX512 will be enabled automatically by the kernel when the first AVX512 instruction is called
|
||||
// see https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176
|
||||
// therefore we ignore the state of XCR0
|
||||
cpu_features |= CPU_FEATURE_ZMM_ENABLED;
|
||||
}
|
||||
#endif
|
||||
return cpu_features;
|
||||
|
||||
#elif defined(__ARM_NEON) || defined(__EMSCRIPTEN__)
|
||||
|
||||
int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2;
|
||||
cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42;
|
||||
cpu_features |= CPU_FEATURE_XMM_ENABLED;
|
||||
cpu_features |= CPU_FEATURE_YMM_ENABLED;
|
||||
cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C;
|
||||
cpu_features |= CPU_FEATURE_POPCNT;
|
||||
cpu_features |= CPU_FEATURE_AVX;
|
||||
cpu_features |= CPU_FEATURE_AVX2;
|
||||
cpu_features |= CPU_FEATURE_FMA3;
|
||||
cpu_features |= CPU_FEATURE_LZCNT;
|
||||
cpu_features |= CPU_FEATURE_BMI1;
|
||||
cpu_features |= CPU_FEATURE_BMI2;
|
||||
cpu_features |= CPU_FEATURE_NEON_2X;
|
||||
return cpu_features;
|
||||
|
||||
#else
|
||||
/* Unknown CPU. */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string stringOfCPUFeatures(int features)
|
||||
{
|
||||
std::string str;
|
||||
if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM ";
|
||||
if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM ";
|
||||
if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM ";
|
||||
if (features & CPU_FEATURE_SSE ) str += "SSE ";
|
||||
if (features & CPU_FEATURE_SSE2 ) str += "SSE2 ";
|
||||
if (features & CPU_FEATURE_SSE3 ) str += "SSE3 ";
|
||||
if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 ";
|
||||
if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 ";
|
||||
if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 ";
|
||||
if (features & CPU_FEATURE_POPCNT) str += "POPCNT ";
|
||||
if (features & CPU_FEATURE_AVX ) str += "AVX ";
|
||||
if (features & CPU_FEATURE_F16C ) str += "F16C ";
|
||||
if (features & CPU_FEATURE_RDRAND) str += "RDRAND ";
|
||||
if (features & CPU_FEATURE_AVX2 ) str += "AVX2 ";
|
||||
if (features & CPU_FEATURE_FMA3 ) str += "FMA3 ";
|
||||
if (features & CPU_FEATURE_LZCNT ) str += "LZCNT ";
|
||||
if (features & CPU_FEATURE_BMI1 ) str += "BMI1 ";
|
||||
if (features & CPU_FEATURE_BMI2 ) str += "BMI2 ";
|
||||
if (features & CPU_FEATURE_AVX512F) str += "AVX512F ";
|
||||
if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ ";
|
||||
if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF ";
|
||||
if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER ";
|
||||
if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD ";
|
||||
if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW ";
|
||||
if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL ";
|
||||
if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA ";
|
||||
if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI ";
|
||||
if (features & CPU_FEATURE_NEON) str += "NEON ";
|
||||
if (features & CPU_FEATURE_NEON_2X) str += "2xNEON ";
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string stringOfISA (int isa)
|
||||
{
|
||||
if (isa == SSE) return "SSE";
|
||||
if (isa == SSE2) return "SSE2";
|
||||
if (isa == SSE3) return "SSE3";
|
||||
if (isa == SSSE3) return "SSSE3";
|
||||
if (isa == SSE41) return "SSE4.1";
|
||||
if (isa == SSE42) return "SSE4.2";
|
||||
if (isa == AVX) return "AVX";
|
||||
if (isa == AVX2) return "AVX2";
|
||||
if (isa == AVX512) return "AVX512";
|
||||
|
||||
if (isa == NEON) return "NEON";
|
||||
if (isa == NEON_2X) return "2xNEON";
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
bool hasISA(int features, int isa) {
|
||||
return (features & isa) == isa;
|
||||
}
|
||||
|
||||
std::string supportedTargetList (int features)
|
||||
{
|
||||
std::string v;
|
||||
if (hasISA(features,SSE)) v += "SSE ";
|
||||
if (hasISA(features,SSE2)) v += "SSE2 ";
|
||||
if (hasISA(features,SSE3)) v += "SSE3 ";
|
||||
if (hasISA(features,SSSE3)) v += "SSSE3 ";
|
||||
if (hasISA(features,SSE41)) v += "SSE4.1 ";
|
||||
if (hasISA(features,SSE42)) v += "SSE4.2 ";
|
||||
if (hasISA(features,AVX)) v += "AVX ";
|
||||
if (hasISA(features,AVXI)) v += "AVXI ";
|
||||
if (hasISA(features,AVX2)) v += "AVX2 ";
|
||||
if (hasISA(features,AVX512)) v += "AVX512 ";
|
||||
|
||||
if (hasISA(features,NEON)) v += "NEON ";
|
||||
if (hasISA(features,NEON_2X)) v += "2xNEON ";
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Windows Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__WIN32__)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
std::string getExecutableFileName() {
|
||||
char filename[1024];
|
||||
if (!GetModuleFileName(nullptr, filename, sizeof(filename)))
|
||||
return std::string();
|
||||
return std::string(filename);
|
||||
}
|
||||
|
||||
unsigned int getNumberOfLogicalThreads()
|
||||
{
|
||||
static int nThreads = -1;
|
||||
if (nThreads != -1) return nThreads;
|
||||
|
||||
typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
|
||||
typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
|
||||
HMODULE hlib = LoadLibrary("Kernel32");
|
||||
GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
|
||||
GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc) GetProcAddress(hlib, "GetActiveProcessorCount");
|
||||
|
||||
if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount)
|
||||
{
|
||||
int groups = pGetActiveProcessorGroupCount();
|
||||
int totalProcessors = 0;
|
||||
for (int i = 0; i < groups; i++)
|
||||
totalProcessors += pGetActiveProcessorCount(i);
|
||||
nThreads = totalProcessors;
|
||||
}
|
||||
else
|
||||
{
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo(&sysinfo);
|
||||
nThreads = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
assert(nThreads);
|
||||
return nThreads;
|
||||
}
|
||||
|
||||
int getTerminalWidth()
|
||||
{
|
||||
HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
if (handle == INVALID_HANDLE_VALUE) return 80;
|
||||
CONSOLE_SCREEN_BUFFER_INFO info;
|
||||
memset(&info,0,sizeof(info));
|
||||
GetConsoleScreenBufferInfo(handle, &info);
|
||||
return info.dwSize.X;
|
||||
}
|
||||
|
||||
double getSeconds()
|
||||
{
|
||||
LARGE_INTEGER freq, val;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
QueryPerformanceCounter(&val);
|
||||
return (double)val.QuadPart / (double)freq.QuadPart;
|
||||
}
|
||||
|
||||
void sleepSeconds(double t) {
|
||||
Sleep(DWORD(1000.0*t));
|
||||
}
|
||||
|
||||
size_t getVirtualMemoryBytes()
|
||||
{
|
||||
PROCESS_MEMORY_COUNTERS info;
|
||||
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
|
||||
return (size_t)info.QuotaPeakPagedPoolUsage;
|
||||
}
|
||||
|
||||
size_t getResidentMemoryBytes()
|
||||
{
|
||||
PROCESS_MEMORY_COUNTERS info;
|
||||
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
|
||||
return (size_t)info.WorkingSetSize;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Linux Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__LINUX__)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
std::string getExecutableFileName()
|
||||
{
|
||||
std::string pid = "/proc/" + toString(getpid()) + "/exe";
|
||||
char buf[4096];
|
||||
memset(buf,0,sizeof(buf));
|
||||
if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1)
|
||||
return std::string();
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
size_t getVirtualMemoryBytes()
|
||||
{
|
||||
size_t virt, resident, shared;
|
||||
std::ifstream buffer("/proc/self/statm");
|
||||
buffer >> virt >> resident >> shared;
|
||||
return virt*sysconf(_SC_PAGE_SIZE);
|
||||
}
|
||||
|
||||
size_t getResidentMemoryBytes()
|
||||
{
|
||||
size_t virt, resident, shared;
|
||||
std::ifstream buffer("/proc/self/statm");
|
||||
buffer >> virt >> resident >> shared;
|
||||
return resident*sysconf(_SC_PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// FreeBSD Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined (__FreeBSD__)
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
std::string getExecutableFileName()
|
||||
{
|
||||
const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
|
||||
char buf[4096];
|
||||
memset(buf,0,sizeof(buf));
|
||||
size_t len = sizeof(buf)-1;
|
||||
if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1)
|
||||
return std::string();
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
size_t getVirtualMemoryBytes() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t getResidentMemoryBytes() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Mac OS X Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__MACOSX__)
|
||||
|
||||
#include <mach-o/dyld.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
std::string getExecutableFileName()
|
||||
{
|
||||
char buf[4096];
|
||||
uint32_t size = sizeof(buf);
|
||||
if (_NSGetExecutablePath(buf, &size) != 0)
|
||||
return std::string();
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
size_t getVirtualMemoryBytes() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t getResidentMemoryBytes() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unix Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__UNIX__)
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/time.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#include <emscripten.h>
|
||||
|
||||
extern "C" {
|
||||
extern int godot_js_os_hw_concurrency_get();
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
unsigned int getNumberOfLogicalThreads()
|
||||
{
|
||||
static int nThreads = -1;
|
||||
if (nThreads != -1) return nThreads;
|
||||
|
||||
#if defined(__MACOSX__) || defined(__ANDROID__)
|
||||
nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
|
||||
assert(nThreads);
|
||||
#elif defined(__EMSCRIPTEN__)
|
||||
nThreads = godot_js_os_hw_concurrency_get();
|
||||
#if 0
|
||||
// WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
|
||||
// threads from the browser or Node.js using JavaScript.
|
||||
nThreads = MAIN_THREAD_EM_ASM_INT({
|
||||
const isBrowser = typeof window !== 'undefined';
|
||||
const isNode = typeof process !== 'undefined' && process.versions != null &&
|
||||
process.versions.node != null;
|
||||
if (isBrowser) {
|
||||
// Return 1 if the browser does not expose hardwareConcurrency.
|
||||
return window.navigator.hardwareConcurrency || 1;
|
||||
} else if (isNode) {
|
||||
return require('os').cpus().length;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
});
|
||||
#endif
|
||||
#else
|
||||
cpu_set_t set;
|
||||
if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
|
||||
nThreads = CPU_COUNT(&set);
|
||||
#endif
|
||||
|
||||
assert(nThreads);
|
||||
return nThreads;
|
||||
}
|
||||
|
||||
int getTerminalWidth()
|
||||
{
|
||||
struct winsize info;
|
||||
if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80;
|
||||
return info.ws_col;
|
||||
}
|
||||
|
||||
double getSeconds() {
|
||||
struct timeval tp; gettimeofday(&tp,nullptr);
|
||||
return double(tp.tv_sec) + double(tp.tv_usec)/1E6;
|
||||
}
|
||||
|
||||
void sleepSeconds(double t) {
|
||||
usleep(1000000.0*t);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
187
thirdparty/embree/common/sys/sysinfo.h
vendored
Normal file
187
thirdparty/embree/common/sys/sysinfo.h
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#define CACHELINE_SIZE 64
|
||||
|
||||
#if !defined(PAGE_SIZE)
|
||||
#define PAGE_SIZE 4096
|
||||
#endif
|
||||
|
||||
#define PAGE_SIZE_2M (2*1024*1024)
|
||||
#define PAGE_SIZE_4K (4*1024)
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
/* define isa namespace and ISA bitvector */
|
||||
#if defined (__AVX512VL__)
|
||||
# define isa avx512
|
||||
# define ISA AVX512
|
||||
# define ISA_STR "AVX512"
|
||||
#elif defined (__AVX2__)
|
||||
# define isa avx2
|
||||
# define ISA AVX2
|
||||
# define ISA_STR "AVX2"
|
||||
#elif defined(__AVXI__)
|
||||
# define isa avxi
|
||||
# define ISA AVXI
|
||||
# define ISA_STR "AVXI"
|
||||
#elif defined(__AVX__)
|
||||
# define isa avx
|
||||
# define ISA AVX
|
||||
# define ISA_STR "AVX"
|
||||
#elif defined (__SSE4_2__)
|
||||
# define isa sse42
|
||||
# define ISA SSE42
|
||||
# define ISA_STR "SSE4.2"
|
||||
//#elif defined (__SSE4_1__) // we demote this to SSE2, MacOSX code compiles with SSE41 by default with XCode 11
|
||||
//# define isa sse41
|
||||
//# define ISA SSE41
|
||||
//# define ISA_STR "SSE4.1"
|
||||
//#elif defined(__SSSE3__) // we demote this to SSE2, MacOSX code compiles with SSSE3 by default with ICC
|
||||
//# define isa ssse3
|
||||
//# define ISA SSSE3
|
||||
//# define ISA_STR "SSSE3"
|
||||
//#elif defined(__SSE3__) // we demote this to SSE2, MacOSX code compiles with SSE3 by default with clang
|
||||
//# define isa sse3
|
||||
//# define ISA SSE3
|
||||
//# define ISA_STR "SSE3"
|
||||
#elif defined(__SSE2__) || defined(__SSE3__) || defined(__SSSE3__)
|
||||
# define isa sse2
|
||||
# define ISA SSE2
|
||||
# define ISA_STR "SSE2"
|
||||
#elif defined(__SSE__)
|
||||
# define isa sse
|
||||
# define ISA SSE
|
||||
# define ISA_STR "SSE"
|
||||
#elif defined(__ARM_NEON)
|
||||
// NOTE(LTE): Use sse2 for `isa` for the compatibility at the moment.
|
||||
#define isa sse2
|
||||
#define ISA NEON
|
||||
#define ISA_STR "NEON"
|
||||
#else
|
||||
#error Unknown ISA
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
enum class CPU
|
||||
{
|
||||
XEON_ICE_LAKE,
|
||||
CORE_ICE_LAKE,
|
||||
CORE_TIGER_LAKE,
|
||||
CORE_COMET_LAKE,
|
||||
CORE_CANNON_LAKE,
|
||||
CORE_KABY_LAKE,
|
||||
XEON_SKY_LAKE,
|
||||
CORE_SKY_LAKE,
|
||||
XEON_PHI_KNIGHTS_MILL,
|
||||
XEON_PHI_KNIGHTS_LANDING,
|
||||
XEON_BROADWELL,
|
||||
CORE_BROADWELL,
|
||||
XEON_HASWELL,
|
||||
CORE_HASWELL,
|
||||
XEON_IVY_BRIDGE,
|
||||
CORE_IVY_BRIDGE,
|
||||
SANDY_BRIDGE,
|
||||
NEHALEM,
|
||||
CORE2,
|
||||
CORE1,
|
||||
ARM,
|
||||
UNKNOWN,
|
||||
};
|
||||
|
||||
/*! get the full path to the running executable */
|
||||
std::string getExecutableFileName();
|
||||
|
||||
/*! return platform name */
|
||||
std::string getPlatformName();
|
||||
|
||||
/*! get the full name of the compiler */
|
||||
std::string getCompilerName();
|
||||
|
||||
/*! return the name of the CPU */
|
||||
std::string getCPUVendor();
|
||||
|
||||
/*! get microprocessor model */
|
||||
CPU getCPUModel();
|
||||
|
||||
/*! converts CPU model into string */
|
||||
std::string stringOfCPUModel(CPU model);
|
||||
|
||||
/*! CPU features */
|
||||
static const int CPU_FEATURE_SSE = 1 << 0;
|
||||
static const int CPU_FEATURE_SSE2 = 1 << 1;
|
||||
static const int CPU_FEATURE_SSE3 = 1 << 2;
|
||||
static const int CPU_FEATURE_SSSE3 = 1 << 3;
|
||||
static const int CPU_FEATURE_SSE41 = 1 << 4;
|
||||
static const int CPU_FEATURE_SSE42 = 1 << 5;
|
||||
static const int CPU_FEATURE_POPCNT = 1 << 6;
|
||||
static const int CPU_FEATURE_AVX = 1 << 7;
|
||||
static const int CPU_FEATURE_F16C = 1 << 8;
|
||||
static const int CPU_FEATURE_RDRAND = 1 << 9;
|
||||
static const int CPU_FEATURE_AVX2 = 1 << 10;
|
||||
static const int CPU_FEATURE_FMA3 = 1 << 11;
|
||||
static const int CPU_FEATURE_LZCNT = 1 << 12;
|
||||
static const int CPU_FEATURE_BMI1 = 1 << 13;
|
||||
static const int CPU_FEATURE_BMI2 = 1 << 14;
|
||||
static const int CPU_FEATURE_AVX512F = 1 << 16;
|
||||
static const int CPU_FEATURE_AVX512DQ = 1 << 17;
|
||||
static const int CPU_FEATURE_AVX512PF = 1 << 18;
|
||||
static const int CPU_FEATURE_AVX512ER = 1 << 19;
|
||||
static const int CPU_FEATURE_AVX512CD = 1 << 20;
|
||||
static const int CPU_FEATURE_AVX512BW = 1 << 21;
|
||||
static const int CPU_FEATURE_AVX512VL = 1 << 22;
|
||||
static const int CPU_FEATURE_AVX512IFMA = 1 << 23;
|
||||
static const int CPU_FEATURE_AVX512VBMI = 1 << 24;
|
||||
static const int CPU_FEATURE_XMM_ENABLED = 1 << 25;
|
||||
static const int CPU_FEATURE_YMM_ENABLED = 1 << 26;
|
||||
static const int CPU_FEATURE_ZMM_ENABLED = 1 << 27;
|
||||
static const int CPU_FEATURE_NEON = 1 << 28;
|
||||
static const int CPU_FEATURE_NEON_2X = 1 << 29;
|
||||
|
||||
/*! get CPU features */
|
||||
int getCPUFeatures();
|
||||
|
||||
/*! convert CPU features into a string */
|
||||
std::string stringOfCPUFeatures(int features);
|
||||
|
||||
/*! creates a string of all supported targets that are supported */
|
||||
std::string supportedTargetList (int isa);
|
||||
|
||||
/*! ISAs */
|
||||
static const int SSE = CPU_FEATURE_SSE | CPU_FEATURE_XMM_ENABLED;
|
||||
static const int SSE2 = SSE | CPU_FEATURE_SSE2;
|
||||
static const int SSE3 = SSE2 | CPU_FEATURE_SSE3;
|
||||
static const int SSSE3 = SSE3 | CPU_FEATURE_SSSE3;
|
||||
static const int SSE41 = SSSE3 | CPU_FEATURE_SSE41;
|
||||
static const int SSE42 = SSE41 | CPU_FEATURE_SSE42 | CPU_FEATURE_POPCNT;
|
||||
static const int AVX = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED;
|
||||
static const int AVXI = AVX | CPU_FEATURE_F16C;
|
||||
static const int AVX2 = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT;
|
||||
static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED;
|
||||
static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2;
|
||||
static const int NEON_2X = CPU_FEATURE_NEON_2X | AVX2;
|
||||
|
||||
/*! converts ISA bitvector into a string */
|
||||
std::string stringOfISA(int features);
|
||||
|
||||
/*! return the number of logical threads of the system */
|
||||
unsigned int getNumberOfLogicalThreads();
|
||||
|
||||
/*! returns the size of the terminal window in characters */
|
||||
int getTerminalWidth();
|
||||
|
||||
/*! returns performance counter in seconds */
|
||||
double getSeconds();
|
||||
|
||||
/*! sleeps the specified number of seconds */
|
||||
void sleepSeconds(double t);
|
||||
|
||||
/*! returns virtual address space occupied by process */
|
||||
size_t getVirtualMemoryBytes();
|
||||
|
||||
/*! returns resident memory required by process */
|
||||
size_t getResidentMemoryBytes();
|
||||
}
|
485
thirdparty/embree/common/sys/thread.cpp
vendored
Normal file
485
thirdparty/embree/common/sys/thread.cpp
vendored
Normal file
@@ -0,0 +1,485 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "thread.h"
|
||||
#include "sysinfo.h"
|
||||
#include "estring.h"
|
||||
|
||||
#include <iostream>
|
||||
#if defined(__ARM_NEON)
|
||||
#include "../simd/arm/emulation.h"
|
||||
#else
|
||||
#include <xmmintrin.h>
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#include "../simd/wasm/emulation.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(PTHREADS_WIN32)
|
||||
#pragma comment (lib, "pthreadVC.lib")
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Windows Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__WIN32__)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! set the affinity of a given thread */
|
||||
void setAffinity(HANDLE thread, ssize_t affinity)
|
||||
{
|
||||
typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
|
||||
typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
|
||||
typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
|
||||
typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
|
||||
HMODULE hlib = LoadLibrary("Kernel32");
|
||||
GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
|
||||
GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
|
||||
SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
|
||||
SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
|
||||
if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
|
||||
{
|
||||
int groups = pGetActiveProcessorGroupCount();
|
||||
int totalProcessors = 0, group = 0, number = 0;
|
||||
for (int i = 0; i<groups; i++) {
|
||||
int processors = pGetActiveProcessorCount(i);
|
||||
if (totalProcessors + processors > affinity) {
|
||||
group = i;
|
||||
number = (int)affinity - totalProcessors;
|
||||
break;
|
||||
}
|
||||
totalProcessors += processors;
|
||||
}
|
||||
|
||||
GROUP_AFFINITY groupAffinity;
|
||||
groupAffinity.Group = (WORD)group;
|
||||
groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
|
||||
groupAffinity.Reserved[0] = 0;
|
||||
groupAffinity.Reserved[1] = 0;
|
||||
groupAffinity.Reserved[2] = 0;
|
||||
if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
|
||||
WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
|
||||
|
||||
PROCESSOR_NUMBER processorNumber;
|
||||
processorNumber.Group = group;
|
||||
processorNumber.Number = number;
|
||||
processorNumber.Reserved = 0;
|
||||
if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
|
||||
WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
|
||||
WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
|
||||
if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
|
||||
WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
|
||||
}
|
||||
}
|
||||
|
||||
/*! set affinity of the calling thread */
|
||||
void setAffinity(ssize_t affinity) {
|
||||
setAffinity(GetCurrentThread(), affinity);
|
||||
}
|
||||
|
||||
struct ThreadStartupData
|
||||
{
|
||||
public:
|
||||
ThreadStartupData (thread_func f, void* arg)
|
||||
: f(f), arg(arg) {}
|
||||
public:
|
||||
thread_func f;
|
||||
void* arg;
|
||||
};
|
||||
|
||||
DWORD WINAPI threadStartup(LPVOID ptr)
|
||||
{
|
||||
ThreadStartupData* parg = (ThreadStartupData*) ptr;
|
||||
_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
|
||||
parg->f(parg->arg);
|
||||
delete parg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !defined(PTHREADS_WIN32)
|
||||
|
||||
/*! creates a hardware thread running on specific core */
|
||||
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
|
||||
{
|
||||
HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
|
||||
if (thread == nullptr) FATAL("CreateThread failed");
|
||||
if (threadID >= 0) setAffinity(thread, threadID);
|
||||
return thread_t(thread);
|
||||
}
|
||||
|
||||
/*! the thread calling this function gets yielded */
|
||||
void yield() {
|
||||
SwitchToThread();
|
||||
}
|
||||
|
||||
/*! waits until the given thread has terminated */
|
||||
void join(thread_t tid) {
|
||||
WaitForSingleObject(HANDLE(tid), INFINITE);
|
||||
CloseHandle(HANDLE(tid));
|
||||
}
|
||||
|
||||
/*! destroy a hardware thread by its handle */
|
||||
void destroyThread(thread_t tid) {
|
||||
TerminateThread(HANDLE(tid),0);
|
||||
CloseHandle(HANDLE(tid));
|
||||
}
|
||||
|
||||
/*! creates thread local storage */
|
||||
tls_t createTls() {
|
||||
return tls_t(size_t(TlsAlloc()));
|
||||
}
|
||||
|
||||
/*! set the thread local storage pointer */
|
||||
void setTls(tls_t tls, void* const ptr) {
|
||||
TlsSetValue(DWORD(size_t(tls)), ptr);
|
||||
}
|
||||
|
||||
/*! return the thread local storage pointer */
|
||||
void* getTls(tls_t tls) {
|
||||
return TlsGetValue(DWORD(size_t(tls)));
|
||||
}
|
||||
|
||||
/*! destroys thread local storage identifier */
|
||||
void destroyTls(tls_t tls) {
|
||||
TlsFree(DWORD(size_t(tls)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Linux Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__LINUX__) && !defined(__ANDROID__)
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
static MutexSys mutex;
|
||||
static std::vector<size_t> threadIDs;
|
||||
|
||||
/* changes thread ID mapping such that we first fill up all thread on one core */
|
||||
size_t mapThreadID(size_t threadID)
|
||||
{
|
||||
Lock<MutexSys> lock(mutex);
|
||||
|
||||
if (threadIDs.size() == 0)
|
||||
{
|
||||
/* parse thread/CPU topology */
|
||||
for (size_t cpuID=0;;cpuID++)
|
||||
{
|
||||
std::fstream fs;
|
||||
std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
|
||||
fs.open (cpu.c_str(), std::fstream::in);
|
||||
if (fs.fail()) break;
|
||||
|
||||
int i;
|
||||
while (fs >> i)
|
||||
{
|
||||
if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
|
||||
threadIDs.push_back(i);
|
||||
if (fs.peek() == ',')
|
||||
fs.ignore();
|
||||
}
|
||||
fs.close();
|
||||
}
|
||||
|
||||
#if 0
|
||||
for (size_t i=0;i<threadIDs.size();i++)
|
||||
std::cout << i << " -> " << threadIDs[i] << std::endl;
|
||||
#endif
|
||||
|
||||
/* verify the mapping and do not use it if the mapping has errors */
|
||||
for (size_t i=0;i<threadIDs.size();i++) {
|
||||
for (size_t j=0;j<threadIDs.size();j++) {
|
||||
if (i != j && threadIDs[i] == threadIDs[j]) {
|
||||
threadIDs.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* re-map threadIDs if mapping is available */
|
||||
size_t ID = threadID;
|
||||
if (threadID < threadIDs.size())
|
||||
ID = threadIDs[threadID];
|
||||
|
||||
/* find correct thread to affinitize to */
|
||||
cpu_set_t set;
|
||||
CPU_ZERO(&set);
|
||||
|
||||
if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
|
||||
{
|
||||
for (int i=0, j=0; i<CPU_SETSIZE; i++)
|
||||
{
|
||||
if (!CPU_ISSET(i,&set)) continue;
|
||||
|
||||
if (j == ID) {
|
||||
ID = i;
|
||||
break;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
return ID;
|
||||
}
|
||||
|
||||
/*! set affinity of the calling thread */
|
||||
void setAffinity(ssize_t affinity)
|
||||
{
|
||||
cpu_set_t cset;
|
||||
CPU_ZERO(&cset);
|
||||
//size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled
|
||||
size_t threadID = affinity;
|
||||
CPU_SET(threadID, &cset);
|
||||
|
||||
pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Android Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! set affinity of the calling thread */
|
||||
void setAffinity(ssize_t affinity)
|
||||
{
|
||||
cpu_set_t cset;
|
||||
CPU_ZERO(&cset);
|
||||
CPU_SET(affinity, &cset);
|
||||
|
||||
sched_setaffinity(0, sizeof(cset), &cset);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// FreeBSD Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
|
||||
#include <pthread_np.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! set affinity of the calling thread */
|
||||
void setAffinity(ssize_t affinity)
|
||||
{
|
||||
cpuset_t cset;
|
||||
CPU_ZERO(&cset);
|
||||
CPU_SET(affinity, &cset);
|
||||
|
||||
pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// WebAssembly Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
namespace embree
|
||||
{
|
||||
/*! set affinity of the calling thread */
|
||||
void setAffinity(ssize_t affinity)
|
||||
{
|
||||
// Setting thread affinity is not supported in WASM.
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// MacOSX Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__MACOSX__)
|
||||
|
||||
#include <mach/thread_act.h>
|
||||
#include <mach/thread_policy.h>
|
||||
#include <mach/mach_init.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! set affinity of the calling thread */
|
||||
void setAffinity(ssize_t affinity)
|
||||
{
|
||||
#if !defined(__ARM_NEON) // affinity seems not supported on M1 chip
|
||||
|
||||
thread_affinity_policy ap;
|
||||
ap.affinity_tag = affinity;
|
||||
if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
|
||||
WARNING("setting thread affinity failed"); // on purpose only a warning
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unix Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
|
||||
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
|
||||
#if defined(__USE_NUMA__)
|
||||
#include <numa.h>
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct ThreadStartupData
|
||||
{
|
||||
public:
|
||||
ThreadStartupData (thread_func f, void* arg, int affinity)
|
||||
: f(f), arg(arg), affinity(affinity) {}
|
||||
public:
|
||||
thread_func f;
|
||||
void* arg;
|
||||
ssize_t affinity;
|
||||
};
|
||||
|
||||
static void* threadStartup(ThreadStartupData* parg)
|
||||
{
|
||||
_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
|
||||
|
||||
/*! Mac OS X does not support setting affinity at thread creation time */
|
||||
#if defined(__MACOSX__)
|
||||
if (parg->affinity >= 0)
|
||||
setAffinity(parg->affinity);
|
||||
#endif
|
||||
|
||||
parg->f(parg->arg);
|
||||
delete parg;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/*! creates a hardware thread running on specific core */
|
||||
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
|
||||
{
|
||||
/* set stack size */
|
||||
pthread_attr_t attr;
|
||||
pthread_attr_init(&attr);
|
||||
if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
|
||||
|
||||
/* create thread */
|
||||
pthread_t* tid = new pthread_t;
|
||||
if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
|
||||
pthread_attr_destroy(&attr);
|
||||
delete tid;
|
||||
FATAL("pthread_create failed");
|
||||
}
|
||||
pthread_attr_destroy(&attr);
|
||||
|
||||
/* set affinity */
|
||||
#if defined(__LINUX__) && !defined(__ANDROID__)
|
||||
if (threadID >= 0) {
|
||||
cpu_set_t cset;
|
||||
CPU_ZERO(&cset);
|
||||
threadID = mapThreadID(threadID);
|
||||
CPU_SET(threadID, &cset);
|
||||
pthread_setaffinity_np(*tid, sizeof(cset), &cset);
|
||||
}
|
||||
#elif defined(__FreeBSD__)
|
||||
if (threadID >= 0) {
|
||||
cpuset_t cset;
|
||||
CPU_ZERO(&cset);
|
||||
CPU_SET(threadID, &cset);
|
||||
pthread_setaffinity_np(*tid, sizeof(cset), &cset);
|
||||
}
|
||||
#elif defined(__ANDROID__)
|
||||
if (threadID >= 0) {
|
||||
cpu_set_t cset;
|
||||
CPU_ZERO(&cset);
|
||||
CPU_SET(threadID, &cset);
|
||||
sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
|
||||
}
|
||||
#endif
|
||||
|
||||
return thread_t(tid);
|
||||
}
|
||||
|
||||
/*! the thread calling this function gets yielded */
|
||||
void yield() {
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
/*! waits until the given thread has terminated */
|
||||
void join(thread_t tid) {
|
||||
if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
|
||||
FATAL("pthread_join failed");
|
||||
delete (pthread_t*)tid;
|
||||
}
|
||||
|
||||
/*! destroy a hardware thread by its handle */
|
||||
void destroyThread(thread_t tid) {
|
||||
#if defined(__ANDROID__)
|
||||
FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.
|
||||
#else
|
||||
pthread_cancel(*(pthread_t*)tid);
|
||||
delete (pthread_t*)tid;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! creates thread local storage */
|
||||
tls_t createTls()
|
||||
{
|
||||
pthread_key_t* key = new pthread_key_t;
|
||||
if (pthread_key_create(key,nullptr) != 0) {
|
||||
delete key;
|
||||
FATAL("pthread_key_create failed");
|
||||
}
|
||||
|
||||
return tls_t(key);
|
||||
}
|
||||
|
||||
/*! return the thread local storage pointer */
|
||||
void* getTls(tls_t tls)
|
||||
{
|
||||
assert(tls);
|
||||
return pthread_getspecific(*(pthread_key_t*)tls);
|
||||
}
|
||||
|
||||
/*! set the thread local storage pointer */
|
||||
void setTls(tls_t tls, void* const ptr)
|
||||
{
|
||||
assert(tls);
|
||||
if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
|
||||
FATAL("pthread_setspecific failed");
|
||||
}
|
||||
|
||||
/*! destroys thread local storage identifier */
|
||||
void destroyTls(tls_t tls)
|
||||
{
|
||||
assert(tls);
|
||||
if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
|
||||
FATAL("pthread_key_delete failed");
|
||||
delete (pthread_key_t*)tls;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
49
thirdparty/embree/common/sys/thread.h
vendored
Normal file
49
thirdparty/embree/common/sys/thread.h
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
#include "mutex.h"
|
||||
#include "alloc.h"
|
||||
#include "vector.h"
|
||||
#include <vector>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! type for thread */
|
||||
typedef struct opaque_thread_t* thread_t;
|
||||
|
||||
/*! signature of thread start function */
|
||||
typedef void (*thread_func)(void*);
|
||||
|
||||
/*! creates a hardware thread running on specific logical thread */
|
||||
thread_t createThread(thread_func f, void* arg, size_t stack_size = 0, ssize_t threadID = -1);
|
||||
|
||||
/*! set affinity of the calling thread */
|
||||
void setAffinity(ssize_t affinity);
|
||||
|
||||
/*! the thread calling this function gets yielded */
|
||||
void yield();
|
||||
|
||||
/*! waits until the given thread has terminated */
|
||||
void join(thread_t tid);
|
||||
|
||||
/*! destroy handle of a thread */
|
||||
void destroyThread(thread_t tid);
|
||||
|
||||
/*! type for handle to thread local storage */
|
||||
typedef struct opaque_tls_t* tls_t;
|
||||
|
||||
/*! creates thread local storage */
|
||||
tls_t createTls();
|
||||
|
||||
/*! set the thread local storage pointer */
|
||||
void setTls(tls_t tls, void* const ptr);
|
||||
|
||||
/*! return the thread local storage pointer */
|
||||
void* getTls(tls_t tls);
|
||||
|
||||
/*! destroys thread local storage identifier */
|
||||
void destroyTls(tls_t tls);
|
||||
}
|
266
thirdparty/embree/common/sys/vector.h
vendored
Normal file
266
thirdparty/embree/common/sys/vector.h
vendored
Normal file
@@ -0,0 +1,266 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "alloc.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class Device;
|
||||
|
||||
template<typename T, typename allocator>
|
||||
class vector_t
|
||||
{
|
||||
public:
|
||||
typedef T value_type;
|
||||
typedef T* iterator;
|
||||
typedef const T* const_iterator;
|
||||
|
||||
__forceinline vector_t ()
|
||||
: size_active(0), size_alloced(0), items(nullptr) {}
|
||||
|
||||
__forceinline explicit vector_t (size_t sz)
|
||||
: size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); }
|
||||
|
||||
template<typename M>
|
||||
__forceinline explicit vector_t (M alloc, size_t sz)
|
||||
: alloc(alloc), size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); }
|
||||
|
||||
__forceinline vector_t (Device* alloc)
|
||||
: vector_t(alloc,0) {}
|
||||
|
||||
__forceinline vector_t(void* data, size_t bytes)
|
||||
: size_active(0), size_alloced(bytes/sizeof(T)), items((T*)data) {}
|
||||
|
||||
__forceinline ~vector_t() {
|
||||
clear();
|
||||
}
|
||||
|
||||
__forceinline vector_t (const vector_t& other)
|
||||
{
|
||||
size_active = other.size_active;
|
||||
size_alloced = other.size_alloced;
|
||||
items = alloc.allocate(size_alloced);
|
||||
for (size_t i=0; i<size_active; i++)
|
||||
::new (&items[i]) value_type(other.items[i]);
|
||||
}
|
||||
|
||||
__forceinline vector_t (vector_t&& other)
|
||||
: alloc(std::move(other.alloc))
|
||||
{
|
||||
size_active = other.size_active; other.size_active = 0;
|
||||
size_alloced = other.size_alloced; other.size_alloced = 0;
|
||||
items = other.items; other.items = nullptr;
|
||||
}
|
||||
|
||||
__forceinline vector_t& operator=(const vector_t& other)
|
||||
{
|
||||
resize(other.size_active);
|
||||
for (size_t i=0; i<size_active; i++)
|
||||
items[i] = value_type(other.items[i]);
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline vector_t& operator=(vector_t&& other)
|
||||
{
|
||||
clear();
|
||||
alloc = std::move(other.alloc);
|
||||
size_active = other.size_active; other.size_active = 0;
|
||||
size_alloced = other.size_alloced; other.size_alloced = 0;
|
||||
items = other.items; other.items = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline allocator& getAlloc() {
|
||||
return alloc;
|
||||
}
|
||||
|
||||
/********************** Iterators ****************************/
|
||||
|
||||
__forceinline iterator begin() { return items; };
|
||||
__forceinline const_iterator begin() const { return items; };
|
||||
|
||||
__forceinline iterator end () { return items+size_active; };
|
||||
__forceinline const_iterator end () const { return items+size_active; };
|
||||
|
||||
|
||||
/********************** Capacity ****************************/
|
||||
|
||||
__forceinline bool empty () const { return size_active == 0; }
|
||||
__forceinline size_t size () const { return size_active; }
|
||||
__forceinline size_t capacity () const { return size_alloced; }
|
||||
|
||||
|
||||
__forceinline void resize(size_t new_size) {
|
||||
internal_resize(new_size,internal_grow_size(new_size));
|
||||
}
|
||||
|
||||
__forceinline void reserve(size_t new_alloced)
|
||||
{
|
||||
/* do nothing if container already large enough */
|
||||
if (new_alloced <= size_alloced)
|
||||
return;
|
||||
|
||||
/* resize exact otherwise */
|
||||
internal_resize(size_active,new_alloced);
|
||||
}
|
||||
|
||||
__forceinline void shrink_to_fit() {
|
||||
internal_resize(size_active,size_active);
|
||||
}
|
||||
|
||||
/******************** Element access **************************/
|
||||
|
||||
__forceinline T& operator[](size_t i) { assert(i < size_active); return items[i]; }
|
||||
__forceinline const T& operator[](size_t i) const { assert(i < size_active); return items[i]; }
|
||||
|
||||
__forceinline T& at(size_t i) { assert(i < size_active); return items[i]; }
|
||||
__forceinline const T& at(size_t i) const { assert(i < size_active); return items[i]; }
|
||||
|
||||
__forceinline T& front() const { assert(size_active > 0); return items[0]; };
|
||||
__forceinline T& back () const { assert(size_active > 0); return items[size_active-1]; };
|
||||
|
||||
__forceinline T* data() { return items; };
|
||||
__forceinline const T* data() const { return items; };
|
||||
|
||||
/* dangerous only use if you know what you're doing */
|
||||
__forceinline void setDataPtr(T* data) { items = data; }
|
||||
|
||||
/******************** Modifiers **************************/
|
||||
|
||||
__forceinline void push_back(const T& nt)
|
||||
{
|
||||
const T v = nt; // need local copy as input reference could point to this vector
|
||||
internal_resize(size_active,internal_grow_size(size_active+1));
|
||||
::new (&items[size_active++]) T(v);
|
||||
}
|
||||
|
||||
__forceinline void pop_back()
|
||||
{
|
||||
assert(!empty());
|
||||
size_active--;
|
||||
items[size_active].~T();
|
||||
}
|
||||
|
||||
__forceinline void clear()
|
||||
{
|
||||
/* destroy elements */
|
||||
for (size_t i=0; i<size_active; i++){
|
||||
items[i].~T();
|
||||
}
|
||||
|
||||
/* free memory */
|
||||
alloc.deallocate(items,size_alloced);
|
||||
items = nullptr;
|
||||
size_active = size_alloced = 0;
|
||||
}
|
||||
|
||||
/******************** Comparisons **************************/
|
||||
|
||||
friend bool operator== (const vector_t& a, const vector_t& b)
|
||||
{
|
||||
if (a.size() != b.size()) return false;
|
||||
for (size_t i=0; i<a.size(); i++)
|
||||
if (a[i] != b[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
friend bool operator!= (const vector_t& a, const vector_t& b) {
|
||||
return !(a==b);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
__forceinline void internal_resize_init(size_t new_active)
|
||||
{
|
||||
assert(size_active == 0);
|
||||
assert(size_alloced == 0);
|
||||
assert(items == nullptr);
|
||||
if (new_active == 0) return;
|
||||
items = alloc.allocate(new_active);
|
||||
for (size_t i=0; i<new_active; i++) ::new (&items[i]) T();
|
||||
size_active = new_active;
|
||||
size_alloced = new_active;
|
||||
}
|
||||
|
||||
__forceinline void internal_resize(size_t new_active, size_t new_alloced)
|
||||
{
|
||||
assert(new_active <= new_alloced);
|
||||
|
||||
/* destroy elements */
|
||||
if (new_active < size_active)
|
||||
{
|
||||
for (size_t i=new_active; i<size_active; i++){
|
||||
items[i].~T();
|
||||
}
|
||||
size_active = new_active;
|
||||
}
|
||||
|
||||
/* only reallocate if necessary */
|
||||
if (new_alloced == size_alloced) {
|
||||
for (size_t i=size_active; i<new_active; i++) ::new (&items[i]) T;
|
||||
size_active = new_active;
|
||||
return;
|
||||
}
|
||||
|
||||
/* reallocate and copy items */
|
||||
T* old_items = items;
|
||||
items = alloc.allocate(new_alloced);
|
||||
for (size_t i=0; i<size_active; i++) {
|
||||
::new (&items[i]) T(std::move(old_items[i]));
|
||||
old_items[i].~T();
|
||||
}
|
||||
|
||||
for (size_t i=size_active; i<new_active; i++) {
|
||||
::new (&items[i]) T;
|
||||
}
|
||||
|
||||
alloc.deallocate(old_items,size_alloced);
|
||||
size_active = new_active;
|
||||
size_alloced = new_alloced;
|
||||
}
|
||||
|
||||
__forceinline size_t internal_grow_size(size_t new_alloced)
|
||||
{
|
||||
/* do nothing if container already large enough */
|
||||
if (new_alloced <= size_alloced)
|
||||
return size_alloced;
|
||||
|
||||
/* if current size is 0 allocate exact requested size */
|
||||
if (size_alloced == 0)
|
||||
return new_alloced;
|
||||
|
||||
/* resize to next power of 2 otherwise */
|
||||
size_t new_size_alloced = size_alloced;
|
||||
while (new_size_alloced < new_alloced) {
|
||||
new_size_alloced = std::max(size_t(1),2*new_size_alloced);
|
||||
}
|
||||
return new_size_alloced;
|
||||
}
|
||||
|
||||
private:
|
||||
allocator alloc;
|
||||
size_t size_active; // number of valid items
|
||||
size_t size_alloced; // number of items allocated
|
||||
T* items; // data array
|
||||
};
|
||||
|
||||
/*! vector class that performs standard allocations */
|
||||
template<typename T>
|
||||
using vector = vector_t<T,std::allocator<T>>;
|
||||
|
||||
/*! vector class that performs aligned allocations */
|
||||
template<typename T>
|
||||
using avector = vector_t<T,aligned_allocator<T,std::alignment_of<T>::value> >;
|
||||
|
||||
/*! vector class that performs OS allocations */
|
||||
template<typename T>
|
||||
using ovector = vector_t<T,os_allocator<T> >;
|
||||
|
||||
/*! vector class with externally managed data buffer */
|
||||
template<typename T>
|
||||
using evector = vector_t<T,no_allocator<T>>;
|
||||
}
|
Reference in New Issue
Block a user