initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

377
thirdparty/embree/common/sys/alloc.cpp vendored Normal file
View File

@@ -0,0 +1,377 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "alloc.h"
#include "intrinsics.h"
#include "sysinfo.h"
#include "mutex.h"
////////////////////////////////////////////////////////////////////////////////
/// All Platforms
////////////////////////////////////////////////////////////////////////////////
namespace embree
{
void* alignedMalloc(size_t size, size_t align)
{
if (size == 0)
return nullptr;
assert((align & (align-1)) == 0);
void* ptr = _mm_malloc(size,align);
if (size != 0 && ptr == nullptr)
abort(); //throw std::bad_alloc();
return ptr;
}
void alignedFree(void* ptr)
{
if (ptr) {
_mm_free(ptr);
}
}
#if defined(EMBREE_SYCL_SUPPORT)
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode)
{
assert(context);
assert(device);
if (size == 0)
return nullptr;
assert((align & (align-1)) == 0);
void* ptr = nullptr;
if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
else
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
if (size != 0 && ptr == nullptr)
abort(); //throw std::bad_alloc();
return ptr;
}
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type)
{
assert(context);
assert(device);
if (size == 0)
return nullptr;
assert((align & (align-1)) == 0);
void* ptr = nullptr;
if (type == EmbreeMemoryType::USM_SHARED) {
if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
else
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
}
else if (type == EmbreeMemoryType::USM_HOST) {
ptr = sycl::aligned_alloc_host(align,size,*context);
}
else if (type == EmbreeMemoryType::USM_DEVICE) {
ptr = sycl::aligned_alloc_device(align,size,*device,*context);
}
else {
ptr = alignedMalloc(size,align);
}
if (size != 0 && ptr == nullptr)
abort(); //throw std::bad_alloc();
return ptr;
}
void alignedSYCLFree(sycl::context* context, void* ptr)
{
assert(context);
if (ptr) {
sycl::usm::alloc type = sycl::get_pointer_type(ptr, *context);
if (type == sycl::usm::alloc::host || type == sycl::usm::alloc::device || type == sycl::usm::alloc::shared)
sycl::free(ptr,*context);
else {
alignedFree(ptr);
}
}
}
#endif
static bool huge_pages_enabled = false;
static MutexSys os_init_mutex;
__forceinline bool isHugePageCandidate(const size_t bytes)
{
if (!huge_pages_enabled)
return false;
/* use huge pages only when memory overhead is low */
const size_t hbytes = (bytes+PAGE_SIZE_2M-1) & ~size_t(PAGE_SIZE_2M-1);
return 66*(hbytes-bytes) < bytes; // at most 1.5% overhead
}
}
////////////////////////////////////////////////////////////////////////////////
/// Windows Platform
////////////////////////////////////////////////////////////////////////////////
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <malloc.h>
namespace embree
{
bool win_enable_selockmemoryprivilege (bool verbose)
{
HANDLE hToken;
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY | TOKEN_ADJUST_PRIVILEGES, &hToken)) {
if (verbose) std::cout << "WARNING: OpenProcessToken failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl;
return false;
}
TOKEN_PRIVILEGES tp;
tp.PrivilegeCount = 1;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
if (!LookupPrivilegeValueW(nullptr, L"SeLockMemoryPrivilege", &tp.Privileges[0].Luid)) {
if (verbose) std::cout << "WARNING: LookupPrivilegeValue failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl;
return false;
}
SetLastError(ERROR_SUCCESS);
if (!AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(tp), nullptr, 0)) {
if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed while trying to enable SeLockMemoryPrivilege" << std::endl;
return false;
}
if (GetLastError() == ERROR_NOT_ALL_ASSIGNED) {
if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed to enable SeLockMemoryPrivilege: Add SeLockMemoryPrivilege for current user and run process in elevated mode (Run as administrator)." << std::endl;
return false;
}
return true;
}
bool os_init(bool hugepages, bool verbose)
{
Lock<MutexSys> lock(os_init_mutex);
if (!hugepages) {
huge_pages_enabled = false;
return true;
}
if (GetLargePageMinimum() != PAGE_SIZE_2M) {
huge_pages_enabled = false;
return false;
}
huge_pages_enabled = true;
return true;
}
void* os_malloc(size_t bytes, bool& hugepages)
{
if (bytes == 0) {
hugepages = false;
return nullptr;
}
/* try direct huge page allocation first */
if (isHugePageCandidate(bytes))
{
int flags = MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES;
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
if (ptr != nullptr) {
hugepages = true;
return ptr;
}
}
/* fall back to 4k pages */
int flags = MEM_COMMIT | MEM_RESERVE;
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
if (ptr == nullptr) abort(); //throw std::bad_alloc();
hugepages = false;
return ptr;
}
size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages)
{
if (hugepages) // decommitting huge pages seems not to work under Windows
return bytesOld;
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1);
bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1);
if (bytesNew >= bytesOld)
return bytesOld;
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
abort(); //throw std::bad_alloc();
return bytesNew;
}
void os_free(void* ptr, size_t bytes, bool hugepages)
{
if (bytes == 0)
return;
if (!VirtualFree(ptr,0,MEM_RELEASE))
abort(); //throw std::bad_alloc();
}
void os_advise(void *ptr, size_t bytes)
{
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Unix Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__UNIX__)
#include <sys/mman.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <sstream>
#if defined(__MACOSX__)
#include <mach/vm_statistics.h>
#endif
namespace embree
{
bool os_init(bool hugepages, bool verbose)
{
Lock<MutexSys> lock(os_init_mutex);
if (!hugepages) {
huge_pages_enabled = false;
return true;
}
#if defined(__LINUX__)
int hugepagesize = 0;
std::ifstream file;
file.open("/proc/meminfo",std::ios::in);
if (!file.is_open()) {
if (verbose) std::cout << "WARNING: Could not open /proc/meminfo. Huge page support cannot get enabled!" << std::endl;
huge_pages_enabled = false;
return false;
}
std::string line;
while (getline(file,line))
{
std::stringstream sline(line);
while (!sline.eof() && sline.peek() == ' ') sline.ignore();
std::string tag; getline(sline,tag,' ');
while (!sline.eof() && sline.peek() == ' ') sline.ignore();
std::string val; getline(sline,val,' ');
while (!sline.eof() && sline.peek() == ' ') sline.ignore();
std::string unit; getline(sline,unit,' ');
if (tag == "Hugepagesize:" && unit == "kB") {
hugepagesize = std::stoi(val)*1024;
break;
}
}
if (hugepagesize != PAGE_SIZE_2M)
{
if (verbose) std::cout << "WARNING: Only 2MB huge pages supported. Huge page support cannot get enabled!" << std::endl;
huge_pages_enabled = false;
return false;
}
#endif
huge_pages_enabled = true;
return true;
}
void* os_malloc(size_t bytes, bool& hugepages)
{
if (bytes == 0) {
hugepages = false;
return nullptr;
}
/* try direct huge page allocation first */
if (isHugePageCandidate(bytes))
{
#if defined(__MACOSX__)
void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
if (ptr != MAP_FAILED) {
hugepages = true;
return ptr;
}
#elif defined(MAP_HUGETLB)
void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_HUGETLB, -1, 0);
if (ptr != MAP_FAILED) {
hugepages = true;
return ptr;
}
#endif
}
/* fallback to 4k pages */
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc();
hugepages = false;
/* advise huge page hint for THP */
os_advise(ptr,bytes);
return ptr;
}
size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages)
{
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1);
bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1);
if (bytesNew >= bytesOld)
return bytesOld;
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
abort(); //throw std::bad_alloc();
return bytesNew;
}
void os_free(void* ptr, size_t bytes, bool hugepages)
{
if (bytes == 0)
return;
/* for hugepages we need to also align the size */
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
bytes = (bytes+pageSize-1) & ~(pageSize-1);
if (munmap(ptr,bytes) == -1)
abort(); //throw std::bad_alloc();
}
/* hint for transparent huge pages (THP) */
void os_advise(void* pptr, size_t bytes)
{
#if defined(MADV_HUGEPAGE)
madvise(pptr,bytes,MADV_HUGEPAGE);
#endif
}
}
#endif

215
thirdparty/embree/common/sys/alloc.h vendored Normal file
View File

@@ -0,0 +1,215 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
#include <vector>
#include <set>
namespace embree
{
#define ALIGNED_STRUCT_(align) \
void* operator new(size_t size) { return alignedMalloc(size,align); } \
void operator delete(void* ptr) { alignedFree(ptr); } \
void* operator new[](size_t size) { return alignedMalloc(size,align); } \
void operator delete[](void* ptr) { alignedFree(ptr); }
#define ALIGNED_CLASS_(align) \
public: \
ALIGNED_STRUCT_(align) \
private:
/*! aligned allocation */
void* alignedMalloc(size_t size, size_t align);
void alignedFree(void* ptr);
enum class EmbreeUSMMode {
DEFAULT = 0,
DEVICE_READ_WRITE = 0,
DEVICE_READ_ONLY = 1
};
enum class EmbreeMemoryType {
USM_HOST = 0,
USM_DEVICE = 1,
USM_SHARED = 2,
MALLOC = 3
};
#if defined(EMBREE_SYCL_SUPPORT)
/*! aligned allocation using SYCL USM */
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type);
void alignedSYCLFree(sycl::context* context, void* ptr);
#endif
/*! allocator that performs aligned allocations */
template<typename T, size_t alignment>
struct aligned_allocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
__forceinline pointer allocate( size_type n ) {
return (pointer) alignedMalloc(n*sizeof(value_type),alignment);
}
__forceinline void deallocate( pointer p, size_type n ) {
return alignedFree(p);
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
};
/*! allocates pages directly from OS */
bool win_enable_selockmemoryprivilege(bool verbose);
bool os_init(bool hugepages, bool verbose);
void* os_malloc (size_t bytes, bool& hugepages);
size_t os_shrink (void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages);
void os_free (void* ptr, size_t bytes, bool hugepages);
void os_advise (void* ptr, size_t bytes);
/*! allocator that performs OS allocations */
template<typename T>
struct os_allocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
__forceinline os_allocator ()
: hugepages(false) {}
__forceinline pointer allocate( size_type n ) {
return (pointer) os_malloc(n*sizeof(value_type),hugepages);
}
__forceinline void deallocate( pointer p, size_type n ) {
return os_free(p,n*sizeof(value_type),hugepages);
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
bool hugepages;
};
/*! allocator that newer performs allocations */
template<typename T>
struct no_allocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
__forceinline pointer allocate( size_type n ) {
abort(); //throw std::runtime_error("no allocation supported");
}
__forceinline void deallocate( pointer p, size_type n ) {
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
};
/*! allocator for IDs */
template<typename T, size_t max_id>
struct IDPool
{
typedef T value_type;
IDPool ()
: nextID(0) {}
T allocate()
{
/* return ID from list */
if (!IDs.empty())
{
T id = *IDs.begin();
IDs.erase(IDs.begin());
return id;
}
/* allocate new ID */
else
{
if (size_t(nextID)+1 > max_id)
return -1;
return nextID++;
}
}
/* adds an ID provided by the user */
bool add(T id)
{
if (id > max_id)
return false;
/* check if ID should be in IDs set */
if (id < nextID) {
auto p = IDs.find(id);
if (p == IDs.end()) return false;
IDs.erase(p);
return true;
}
/* otherwise increase ID set */
else
{
for (T i=nextID; i<id; i++) {
IDs.insert(i);
}
nextID = id+1;
return true;
}
}
void deallocate( T id )
{
assert(id < nextID);
MAYBE_UNUSED auto done = IDs.insert(id).second;
assert(done);
}
private:
std::set<T> IDs; //!< stores deallocated IDs to be reused
T nextID; //!< next ID to use when IDs vector is empty
};
}

222
thirdparty/embree/common/sys/array.h vendored Normal file
View File

@@ -0,0 +1,222 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
#include "alloc.h"
namespace embree
{
/*! static array with static size */
template<typename T, size_t N>
class array_t
{
public:
/********************** Iterators ****************************/
__forceinline T* begin() const { return items; };
__forceinline T* end () const { return items+N; };
/********************** Capacity ****************************/
__forceinline bool empty () const { return N == 0; }
__forceinline size_t size () const { return N; }
__forceinline size_t max_size () const { return N; }
/******************** Element access **************************/
__forceinline T& operator[](size_t i) { assert(i < N); return items[i]; }
__forceinline const T& operator[](size_t i) const { assert(i < N); return items[i]; }
__forceinline T& at(size_t i) { assert(i < N); return items[i]; }
__forceinline const T& at(size_t i) const { assert(i < N); return items[i]; }
__forceinline T& front() const { assert(N > 0); return items[0]; };
__forceinline T& back () const { assert(N > 0); return items[N-1]; };
__forceinline T* data() { return items; };
__forceinline const T* data() const { return items; };
private:
T items[N];
};
/*! static array with dynamic size */
template<typename T, size_t N>
class darray_t
{
public:
__forceinline darray_t () : M(0) {}
__forceinline darray_t (const T& v) : M(0) {
for (size_t i=0; i<N; i++) items[i] = v;
}
/********************** Iterators ****************************/
__forceinline T* begin() const { return (T*)items; };
__forceinline T* end () const { return (T*)items+M; };
/********************** Capacity ****************************/
__forceinline bool empty () const { return M == 0; }
__forceinline size_t size () const { return M; }
__forceinline size_t capacity () const { return N; }
__forceinline size_t max_size () const { return N; }
void resize(size_t new_size) {
assert(new_size < max_size());
M = new_size;
}
/******************** Modifiers **************************/
__forceinline void push_back(const T& v)
{
assert(M+1 < max_size());
items[M++] = v;
}
__forceinline void pop_back()
{
assert(!empty());
M--;
}
__forceinline void clear() {
M = 0;
}
/******************** Element access **************************/
__forceinline T& operator[](size_t i) { assert(i < M); return items[i]; }
__forceinline const T& operator[](size_t i) const { assert(i < M); return items[i]; }
__forceinline T& at(size_t i) { assert(i < M); return items[i]; }
__forceinline const T& at(size_t i) const { assert(i < M); return items[i]; }
__forceinline T& front() { assert(M > 0); return items[0]; };
__forceinline T& back () { assert(M > 0); return items[M-1]; };
__forceinline T* data() { return items; };
__forceinline const T* data() const { return items; };
private:
size_t M;
T items[N];
};
/*! dynamic sized array that is allocated on the stack */
#define dynamic_large_stack_array(Ty,Name,N,max_stack_bytes) StackArray<Ty,max_stack_bytes> Name(N)
template<typename Ty, size_t max_stack_bytes>
struct __aligned(64) StackArray
{
__forceinline StackArray (const size_t N)
: N(N)
{
if (N*sizeof(Ty) <= max_stack_bytes)
data = &arr[0];
else
data = (Ty*) alignedMalloc(N*sizeof(Ty),64);
}
__forceinline ~StackArray () {
if (data != &arr[0]) alignedFree(data);
}
__forceinline operator Ty* () { return data; }
__forceinline operator const Ty* () const { return data; }
__forceinline Ty& operator[](const int i) { assert(i>=0 && i<N); return data[i]; }
__forceinline const Ty& operator[](const int i) const { assert(i>=0 && i<N); return data[i]; }
__forceinline Ty& operator[](const unsigned i) { assert(i<N); return data[i]; }
__forceinline const Ty& operator[](const unsigned i) const { assert(i<N); return data[i]; }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline Ty& operator[](const size_t i) { assert(i<N); return data[i]; }
__forceinline const Ty& operator[](const size_t i) const { assert(i<N); return data[i]; }
#endif
private:
Ty arr[max_stack_bytes/sizeof(Ty)];
Ty* data;
size_t N;
private:
StackArray (const StackArray& other) DELETED; // do not implement
StackArray& operator= (const StackArray& other) DELETED; // do not implement
};
/*! dynamic sized array that is allocated on the stack */
template<typename Ty, size_t max_stack_elements, size_t max_total_elements>
struct __aligned(64) DynamicStackArray
{
__forceinline DynamicStackArray ()
: data(&arr[0]) {}
__forceinline ~DynamicStackArray ()
{
if (!isStackAllocated())
delete[] data;
}
__forceinline bool isStackAllocated() const {
return data == &arr[0];
}
__forceinline size_t size() const
{
if (isStackAllocated()) return max_stack_elements;
else return max_total_elements;
}
__forceinline void resize(size_t M)
{
assert(M <= max_total_elements);
if (likely(M <= max_stack_elements)) return;
if (likely(!isStackAllocated())) return;
data = new Ty[max_total_elements];
for (size_t i=0; i<max_stack_elements; i++)
data[i] = arr[i];
}
__forceinline operator Ty* () { return data; }
__forceinline operator const Ty* () const { return data; }
__forceinline Ty& operator[](const int i) { assert(i>=0 && i<max_total_elements); resize(i+1); return data[i]; }
__forceinline Ty& operator[](const unsigned i) { assert(i<max_total_elements); resize(i+1); return data[i]; }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline Ty& operator[](const size_t i) { assert(i<max_total_elements); resize(i+1); return data[i]; }
#endif
__forceinline DynamicStackArray (const DynamicStackArray& other)
: data(&arr[0])
{
for (size_t i=0; i<other.size(); i++)
this->operator[] (i) = other[i];
}
DynamicStackArray& operator= (const DynamicStackArray& other)
{
for (size_t i=0; i<other.size(); i++)
this->operator[] (i) = other[i];
return *this;
}
private:
Ty arr[max_stack_elements];
Ty* data;
};
}

59
thirdparty/embree/common/sys/atomic.h vendored Normal file
View File

@@ -0,0 +1,59 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <atomic>
#include "intrinsics.h"
namespace embree
{
/* compiler memory barriers */
#if defined(__INTEL_COMPILER)
//#define __memory_barrier() __memory_barrier()
#elif defined(__GNUC__) || defined(__clang__)
# define __memory_barrier() asm volatile("" ::: "memory")
#elif defined(_MSC_VER)
# define __memory_barrier() _ReadWriteBarrier()
#endif
template <typename T>
struct atomic : public std::atomic<T>
{
atomic () {}
atomic (const T& a)
: std::atomic<T>(a) {}
atomic (const atomic<T>& a) {
this->store(a.load());
}
atomic& operator=(const atomic<T>& other) {
this->store(other.load());
return *this;
}
};
template<typename T>
__forceinline void _atomic_min(std::atomic<T>& aref, const T& bref)
{
const T b = bref.load();
while (true) {
T a = aref.load();
if (a <= b) break;
if (aref.compare_exchange_strong(a,b)) break;
}
}
template<typename T>
__forceinline void _atomic_max(std::atomic<T>& aref, const T& bref)
{
const T b = bref.load();
while (true) {
T a = aref.load();
if (a >= b) break;
if (aref.compare_exchange_strong(a,b)) break;
}
}
}

289
thirdparty/embree/common/sys/barrier.cpp vendored Normal file
View File

@@ -0,0 +1,289 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "barrier.h"
#include "condition.h"
#include "regression.h"
#include "thread.h"
#if defined (__WIN32__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace embree
{
struct BarrierSysImplementation
{
__forceinline BarrierSysImplementation (size_t N)
: i(0), enterCount(0), exitCount(0), barrierSize(0)
{
events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
init(N);
}
__forceinline ~BarrierSysImplementation ()
{
CloseHandle(events[0]);
CloseHandle(events[1]);
}
__forceinline void init(size_t N)
{
barrierSize = N;
enterCount.store(N);
exitCount.store(N);
}
__forceinline void wait()
{
/* every thread entering the barrier decrements this count */
size_t i0 = i;
size_t cnt0 = enterCount--;
/* all threads except the last one are wait in the barrier */
if (cnt0 > 1)
{
if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0)
THROW_RUNTIME_ERROR("WaitForSingleObjects failed");
}
/* the last thread starts all threads waiting at the barrier */
else
{
i = 1-i;
enterCount.store(barrierSize);
if (SetEvent(events[i0]) == 0)
THROW_RUNTIME_ERROR("SetEvent failed");
}
/* every thread leaving the barrier decrements this count */
size_t cnt1 = exitCount--;
/* the last thread that left the barrier resets the event again */
if (cnt1 == 1)
{
exitCount.store(barrierSize);
if (ResetEvent(events[i0]) == 0)
THROW_RUNTIME_ERROR("ResetEvent failed");
}
}
public:
HANDLE events[2];
atomic<size_t> i;
atomic<size_t> enterCount;
atomic<size_t> exitCount;
size_t barrierSize;
};
}
#else
namespace embree
{
struct BarrierSysImplementation
{
__forceinline BarrierSysImplementation (size_t N)
: count(0), barrierSize(0)
{
init(N);
}
__forceinline void init(size_t N)
{
assert(count == 0);
count = 0;
barrierSize = N;
}
__forceinline void wait()
{
mutex.lock();
count++;
if (count == barrierSize) {
count = 0;
cond.notify_all();
mutex.unlock();
return;
}
cond.wait(mutex);
mutex.unlock();
return;
}
public:
MutexSys mutex;
ConditionSys cond;
volatile size_t count;
volatile size_t barrierSize;
};
}
#endif
namespace embree
{
BarrierSys::BarrierSys (size_t N) {
opaque = new BarrierSysImplementation(N);
}
BarrierSys::~BarrierSys () {
delete (BarrierSysImplementation*) opaque;
}
void BarrierSys::init(size_t count) {
((BarrierSysImplementation*) opaque)->init(count);
}
void BarrierSys::wait() {
((BarrierSysImplementation*) opaque)->wait();
}
LinearBarrierActive::LinearBarrierActive (size_t N)
: count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0)
{
if (N == 0) N = getNumberOfLogicalThreads();
init(N);
}
LinearBarrierActive::~LinearBarrierActive()
{
delete[] count0;
delete[] count1;
}
void LinearBarrierActive::init(size_t N)
{
if (threadCount != N) {
threadCount = N;
if (count0) delete[] count0; count0 = new unsigned char[N];
if (count1) delete[] count1; count1 = new unsigned char[N];
}
mode = 0;
flag0 = 0;
flag1 = 0;
for (size_t i=0; i<N; i++) count0[i] = 0;
for (size_t i=0; i<N; i++) count1[i] = 0;
}
void LinearBarrierActive::wait (const size_t threadIndex)
{
if (mode == 0)
{
if (threadIndex == 0)
{
for (size_t i=0; i<threadCount; i++)
count1[i] = 0;
for (size_t i=1; i<threadCount; i++)
{
while (likely(count0[i] == 0))
pause_cpu();
}
mode = 1;
flag1 = 0;
__memory_barrier();
flag0 = 1;
}
else
{
count0[threadIndex] = 1;
{
while (likely(flag0 == 0))
pause_cpu();
}
}
}
else
{
if (threadIndex == 0)
{
for (size_t i=0; i<threadCount; i++)
count0[i] = 0;
for (size_t i=1; i<threadCount; i++)
{
while (likely(count1[i] == 0))
pause_cpu();
}
mode = 0;
flag0 = 0;
__memory_barrier();
flag1 = 1;
}
else
{
count1[threadIndex] = 1;
{
while (likely(flag1 == 0))
pause_cpu();
}
}
}
}
struct barrier_sys_regression_test : public RegressionTest
{
BarrierSys barrier;
std::atomic<size_t> threadID;
std::atomic<size_t> numFailed;
std::vector<size_t> threadResults;
barrier_sys_regression_test()
: RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0)
{
registerRegressionTest(this);
}
static void thread_alloc(barrier_sys_regression_test* This)
{
size_t tid = This->threadID++;
for (size_t j=0; j<1000; j++)
{
This->barrier.wait();
This->threadResults[tid] = tid;
This->barrier.wait();
}
}
bool run ()
{
threadID.store(0);
numFailed.store(0);
size_t numThreads = getNumberOfLogicalThreads();
threadResults.resize(numThreads);
barrier.init(numThreads+1);
/* create threads */
std::vector<thread_t> threads;
for (size_t i=0; i<numThreads; i++)
threads.push_back(createThread((thread_func)thread_alloc,this));
/* run test */
for (size_t i=0; i<1000; i++)
{
for (size_t i=0; i<numThreads; i++) threadResults[i] = 0;
barrier.wait();
barrier.wait();
for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i;
}
/* destroy threads */
for (size_t i=0; i<numThreads; i++)
join(threads[i]);
return numFailed == 0;
}
};
barrier_sys_regression_test barrier_sys_regression_test;
}

112
thirdparty/embree/common/sys/barrier.h vendored Normal file
View File

@@ -0,0 +1,112 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "intrinsics.h"
#include "sysinfo.h"
#include "atomic.h"
namespace embree
{
/*! system barrier using operating system */
class BarrierSys
{
public:
/*! construction / destruction */
BarrierSys (size_t N = 0);
~BarrierSys ();
private:
/*! class in non-copyable */
BarrierSys (const BarrierSys& other) DELETED; // do not implement
BarrierSys& operator= (const BarrierSys& other) DELETED; // do not implement
public:
/*! initializes the barrier with some number of threads */
void init(size_t count);
/*! lets calling thread wait in barrier */
void wait();
private:
void* opaque;
};
/*! fast active barrier using atomic counter */
struct BarrierActive
{
public:
BarrierActive ()
: cntr(0) {}
void reset() {
cntr.store(0);
}
void wait (size_t numThreads)
{
cntr++;
while (cntr.load() != numThreads)
pause_cpu();
}
private:
std::atomic<size_t> cntr;
};
/*! fast active barrier that does not require initialization to some number of threads */
struct BarrierActiveAutoReset
{
public:
BarrierActiveAutoReset ()
: cntr0(0), cntr1(0) {}
void wait (size_t threadCount)
{
cntr0.fetch_add(1);
while (cntr0 != threadCount) pause_cpu();
cntr1.fetch_add(1);
while (cntr1 != threadCount) pause_cpu();
cntr0.fetch_add(-1);
while (cntr0 != 0) pause_cpu();
cntr1.fetch_add(-1);
while (cntr1 != 0) pause_cpu();
}
private:
std::atomic<size_t> cntr0;
std::atomic<size_t> cntr1;
};
class LinearBarrierActive
{
public:
/*! construction and destruction */
LinearBarrierActive (size_t threadCount = 0);
~LinearBarrierActive();
private:
/*! class in non-copyable */
LinearBarrierActive (const LinearBarrierActive& other) DELETED; // do not implement
LinearBarrierActive& operator= (const LinearBarrierActive& other) DELETED; // do not implement
public:
/*! initializes the barrier with some number of threads */
void init(size_t threadCount);
/*! thread with threadIndex waits in the barrier */
void wait (const size_t threadIndex);
private:
volatile unsigned char* count0;
volatile unsigned char* count1;
volatile unsigned int mode;
volatile unsigned int flag0;
volatile unsigned int flag1;
volatile size_t threadCount;
};
}

View File

@@ -0,0 +1,85 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "condition.h"
#if defined(__WIN32__) && !defined(PTHREADS_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace embree
{
struct ConditionImplementation
{
__forceinline ConditionImplementation () {
InitializeConditionVariable(&cond);
}
__forceinline ~ConditionImplementation () {
}
__forceinline void wait(MutexSys& mutex_in) {
SleepConditionVariableCS(&cond, (LPCRITICAL_SECTION)mutex_in.mutex, INFINITE);
}
__forceinline void notify_all() {
WakeAllConditionVariable(&cond);
}
public:
CONDITION_VARIABLE cond;
};
}
#endif
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
#include <pthread.h>
namespace embree
{
struct ConditionImplementation
{
__forceinline ConditionImplementation () {
if (pthread_cond_init(&cond,nullptr) != 0)
THROW_RUNTIME_ERROR("pthread_cond_init failed");
}
__forceinline ~ConditionImplementation() {
MAYBE_UNUSED bool ok = pthread_cond_destroy(&cond) == 0;
assert(ok);
}
__forceinline void wait(MutexSys& mutex) {
if (pthread_cond_wait(&cond, (pthread_mutex_t*)mutex.mutex) != 0)
THROW_RUNTIME_ERROR("pthread_cond_wait failed");
}
__forceinline void notify_all() {
if (pthread_cond_broadcast(&cond) != 0)
THROW_RUNTIME_ERROR("pthread_cond_broadcast failed");
}
public:
pthread_cond_t cond;
};
}
#endif
namespace embree
{
ConditionSys::ConditionSys () {
cond = new ConditionImplementation;
}
ConditionSys::~ConditionSys() {
delete (ConditionImplementation*) cond;
}
void ConditionSys::wait(MutexSys& mutex) {
((ConditionImplementation*) cond)->wait(mutex);
}
void ConditionSys::notify_all() {
((ConditionImplementation*) cond)->notify_all();
}
}

View File

@@ -0,0 +1,31 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "mutex.h"
namespace embree
{
class ConditionSys
{
public:
ConditionSys();
~ConditionSys();
void wait( class MutexSys& mutex );
void notify_all();
template<typename Predicate>
__forceinline void wait( class MutexSys& mutex, const Predicate& pred )
{
while (!pred()) wait(mutex);
}
private:
ConditionSys (const ConditionSys& other) DELETED; // do not implement
ConditionSys& operator= (const ConditionSys& other) DELETED; // do not implement
protected:
void* cond;
};
}

View File

@@ -0,0 +1,42 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "estring.h"
#include <algorithm>
#include <ctype.h>
namespace embree
{
char to_lower(char c) { return char(tolower(int(c))); }
char to_upper(char c) { return char(toupper(int(c))); }
std::string toLowerCase(const std::string& s) { std::string dst(s); std::transform(dst.begin(), dst.end(), dst.begin(), to_lower); return dst; }
std::string toUpperCase(const std::string& s) { std::string dst(s); std::transform(dst.begin(), dst.end(), dst.begin(), to_upper); return dst; }
Vec2f string_to_Vec2f ( std::string str )
{
size_t next = 0;
const float x = std::stof(str,&next); str = str.substr(next+1);
const float y = std::stof(str,&next);
return Vec2f(x,y);
}
Vec3f string_to_Vec3f ( std::string str )
{
size_t next = 0;
const float x = std::stof(str,&next); str = str.substr(next+1);
const float y = std::stof(str,&next); str = str.substr(next+1);
const float z = std::stof(str,&next);
return Vec3f(x,y,z);
}
Vec4f string_to_Vec4f ( std::string str )
{
size_t next = 0;
const float x = std::stof(str,&next); str = str.substr(next+1);
const float y = std::stof(str,&next); str = str.substr(next+1);
const float z = std::stof(str,&next); str = str.substr(next+1);
const float w = std::stof(str,&next);
return Vec4f(x,y,z,w);
}
}

73
thirdparty/embree/common/sys/estring.h vendored Normal file
View File

@@ -0,0 +1,73 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
#include "../math/vec2.h"
#include "../math/vec3.h"
#include "../math/vec4.h"
namespace embree
{
class IOStreamStateRestorer
{
public:
IOStreamStateRestorer(std::ostream& iostream)
: iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) {
}
~IOStreamStateRestorer() {
iostream.flags(flags);
iostream.precision(precision);
}
private:
std::ostream& iostream;
std::ios::fmtflags flags;
std::streamsize precision;
};
struct IndentOStream : public std::streambuf
{
explicit IndentOStream(std::ostream &ostream, int indent = 2)
: streambuf(ostream.rdbuf())
, start_of_line(true)
, ident_str(indent, ' ')
, stream(&ostream)
{
// set streambuf of ostream to this and save original streambuf
stream->rdbuf(this);
}
virtual ~IndentOStream()
{
if (stream != NULL) {
// restore old streambuf
stream->rdbuf(streambuf);
}
}
protected:
virtual int overflow(int ch) {
if (start_of_line && ch != '\n') {
streambuf->sputn(ident_str.data(), ident_str.size());
}
start_of_line = ch == '\n';
return streambuf->sputc(ch);
}
private:
std::streambuf *streambuf;
bool start_of_line;
std::string ident_str;
std::ostream *stream;
};
std::string toLowerCase(const std::string& s);
std::string toUpperCase(const std::string& s);
Vec2f string_to_Vec2f ( std::string str );
Vec3f string_to_Vec3f ( std::string str );
Vec4f string_to_Vec4f ( std::string str );
}

View File

@@ -0,0 +1,126 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "filename.h"
#include "sysinfo.h"
namespace embree
{
#ifdef __WIN32__
const char path_sep = '\\';
#else
const char path_sep = '/';
#endif
/*! create an empty filename */
FileName::FileName () {}
/*! create a valid filename from a string */
FileName::FileName (const char* in) {
filename = in;
for (size_t i=0; i<filename.size(); i++)
if (filename[i] == '\\' || filename[i] == '/')
filename[i] = path_sep;
while (!filename.empty() && filename[filename.size()-1] == path_sep)
filename.resize(filename.size()-1);
}
/*! create a valid filename from a string */
FileName::FileName (const std::string& in) {
filename = in;
for (size_t i=0; i<filename.size(); i++)
if (filename[i] == '\\' || filename[i] == '/')
filename[i] = path_sep;
while (!filename.empty() && filename[filename.size()-1] == path_sep)
filename.resize(filename.size()-1);
}
/*! returns path to executable */
FileName FileName::executableFolder() {
return FileName(getExecutableFileName()).path();
}
/*! returns the path */
FileName FileName::path() const {
size_t pos = filename.find_last_of(path_sep);
if (pos == std::string::npos) return FileName();
return filename.substr(0,pos);
}
/*! returns the basename */
std::string FileName::base() const {
size_t pos = filename.find_last_of(path_sep);
if (pos == std::string::npos) return filename;
return filename.substr(pos+1);
}
/*! returns the extension */
std::string FileName::ext() const {
size_t pos = filename.find_last_of('.');
if (pos == std::string::npos) return "";
return filename.substr(pos+1);
}
/*! returns the extension */
FileName FileName::dropExt() const {
size_t pos = filename.find_last_of('.');
if (pos == std::string::npos) return filename;
return filename.substr(0,pos);
}
/*! returns the basename without extension */
std::string FileName::name() const {
size_t start = filename.find_last_of(path_sep);
if (start == std::string::npos) start = 0; else start++;
size_t end = filename.find_last_of('.');
if (end == std::string::npos || end < start) end = filename.size();
return filename.substr(start, end - start);
}
/*! replaces the extension */
FileName FileName::setExt(const std::string& ext) const {
size_t start = filename.find_last_of(path_sep);
if (start == std::string::npos) start = 0; else start++;
size_t end = filename.find_last_of('.');
if (end == std::string::npos || end < start) return FileName(filename+ext);
return FileName(filename.substr(0,end)+ext);
}
/*! adds the extension */
FileName FileName::addExt(const std::string& ext) const {
return FileName(filename+ext);
}
/*! concatenates two filenames to this/other */
FileName FileName::operator +( const FileName& other ) const {
if (filename == "") return FileName(other);
else return FileName(filename + path_sep + other.filename);
}
/*! concatenates two filenames to this/other */
FileName FileName::operator +( const std::string& other ) const {
return operator+(FileName(other));
}
/*! removes the base from a filename (if possible) */
FileName FileName::operator -( const FileName& base ) const {
size_t pos = filename.find_first_of(base);
if (pos == std::string::npos) return *this;
return FileName(filename.substr(pos+1));
}
/*! == operator */
bool operator== (const FileName& a, const FileName& b) {
return a.filename == b.filename;
}
/*! != operator */
bool operator!= (const FileName& a, const FileName& b) {
return a.filename != b.filename;
}
/*! output operator */
std::ostream& operator<<(std::ostream& cout, const FileName& filename) {
return cout << filename.filename;
}
}

78
thirdparty/embree/common/sys/filename.h vendored Normal file
View File

@@ -0,0 +1,78 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
namespace embree
{
/*! Convenience class for handling file names and paths. */
class FileName
{
public:
/*! create an empty filename */
FileName ();
/*! create a valid filename from a string */
FileName (const char* filename);
/*! create a valid filename from a string */
FileName (const std::string& filename);
/*! returns path to executable */
static FileName executableFolder();
/*! auto convert into a string */
operator std::string() const { return filename; }
/*! returns a string of the filename */
const std::string str() const { return filename; }
/*! returns a c-string of the filename */
const char* c_str() const { return filename.c_str(); }
/*! returns the path of a filename */
FileName path() const;
/*! returns the file of a filename */
std::string base() const;
/*! returns the base of a filename without extension */
std::string name() const;
/*! returns the file extension */
std::string ext() const;
/*! drops the file extension */
FileName dropExt() const;
/*! replaces the file extension */
FileName setExt(const std::string& ext = "") const;
/*! adds file extension */
FileName addExt(const std::string& ext = "") const;
/*! concatenates two filenames to this/other */
FileName operator +( const FileName& other ) const;
/*! concatenates two filenames to this/other */
FileName operator +( const std::string& other ) const;
/*! removes the base from a filename (if possible) */
FileName operator -( const FileName& base ) const;
/*! == operator */
friend bool operator==(const FileName& a, const FileName& b);
/*! != operator */
friend bool operator!=(const FileName& a, const FileName& b);
/*! output operator */
friend std::ostream& operator<<(std::ostream& cout, const FileName& filename);
private:
std::string filename;
};
}

View File

@@ -0,0 +1,566 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
#if defined(__WIN32__)
#include <intrin.h>
#endif
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
#include <immintrin.h>
#if defined(__EMSCRIPTEN__)
#include "../simd/wasm/emulation.h"
#endif
#endif
#if defined(__BMI__) && defined(__GNUC__) && !defined(__INTEL_COMPILER)
#if !defined(_tzcnt_u32)
#define _tzcnt_u32 __tzcnt_u32
#endif
#if !defined(_tzcnt_u64)
#define _tzcnt_u64 __tzcnt_u64
#endif
#endif
#if defined(__aarch64__)
#if !defined(_lzcnt_u32)
#define _lzcnt_u32 __builtin_clz
#endif
#else
#if defined(__LZCNT__)
#if !defined(_lzcnt_u32)
#define _lzcnt_u32 __lzcnt32
#endif
#if !defined(_lzcnt_u64)
#define _lzcnt_u64 __lzcnt64
#endif
#endif
#endif
#if defined(__WIN32__)
# if !defined(NOMINMAX)
# define NOMINMAX
# endif
# include <windows.h>
#endif
/* normally defined in pmmintrin.h, but we always need this */
#if !defined(_MM_SET_DENORMALS_ZERO_MODE)
#define _MM_DENORMALS_ZERO_ON (0x0040)
#define _MM_DENORMALS_ZERO_OFF (0x0000)
#define _MM_DENORMALS_ZERO_MASK (0x0040)
#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
#endif
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// Windows Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__WIN32__) && !defined(__INTEL_LLVM_COMPILER)
__forceinline size_t read_tsc()
{
LARGE_INTEGER li;
QueryPerformanceCounter(&li);
return (size_t)li.QuadPart;
}
__forceinline int bsf(int v) {
#if defined(__AVX2__) && !defined(__aarch64__)
return _tzcnt_u32(v);
#else
unsigned long r = 0; _BitScanForward(&r,v); return r;
#endif
}
__forceinline unsigned bsf(unsigned v) {
#if defined(__AVX2__) && !defined(__aarch64__)
return _tzcnt_u32(v);
#else
unsigned long r = 0; _BitScanForward(&r,v); return r;
#endif
}
#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bsf(size_t v) {
#if defined(__AVX2__)
return _tzcnt_u64(v);
#else
unsigned long r = 0; _BitScanForward64(&r,v); return r;
#endif
}
#endif
__forceinline int bscf(int& v)
{
int i = bsf(v);
v &= v-1;
return i;
}
__forceinline unsigned bscf(unsigned& v)
{
unsigned i = bsf(v);
v &= v-1;
return i;
}
#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bscf(size_t& v)
{
size_t i = bsf(v);
v &= v-1;
return i;
}
#endif
__forceinline int bsr(int v) {
#if defined(__AVX2__) && !defined(__aarch64__)
return 31 - _lzcnt_u32(v);
#else
unsigned long r = 0; _BitScanReverse(&r,v); return r;
#endif
}
__forceinline unsigned bsr(unsigned v) {
#if defined(__AVX2__) && !defined(__aarch64__)
return 31 - _lzcnt_u32(v);
#else
unsigned long r = 0; _BitScanReverse(&r,v); return r;
#endif
}
#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bsr(size_t v) {
#if defined(__AVX2__)
return 63 -_lzcnt_u64(v);
#else
unsigned long r = 0; _BitScanReverse64(&r, v); return r;
#endif
}
#endif
__forceinline int lzcnt(const int x)
{
#if defined(__AVX2__) && !defined(__aarch64__)
return _lzcnt_u32(x);
#else
if (unlikely(x == 0)) return 32;
return 31 - bsr(x);
#endif
}
__forceinline int btc(int v, int i) {
long r = v; _bittestandcomplement(&r,i); return r;
}
__forceinline int bts(int v, int i) {
long r = v; _bittestandset(&r,i); return r;
}
__forceinline int btr(int v, int i) {
long r = v; _bittestandreset(&r,i); return r;
}
#if defined(__X86_64__)
__forceinline size_t btc(size_t v, size_t i) {
size_t r = v; _bittestandcomplement64((__int64*)&r,i); return r;
}
__forceinline size_t bts(size_t v, size_t i) {
__int64 r = v; _bittestandset64(&r,i); return r;
}
__forceinline size_t btr(size_t v, size_t i) {
__int64 r = v; _bittestandreset64(&r,i); return r;
}
#endif
__forceinline int32_t atomic_cmpxchg(volatile int32_t* p, const int32_t c, const int32_t v) {
return _InterlockedCompareExchange((volatile long*)p,v,c);
}
////////////////////////////////////////////////////////////////////////////////
/// Unix Platform
////////////////////////////////////////////////////////////////////////////////
#else
__forceinline uint64_t read_tsc() {
#if defined(__X86_ASM__)
uint32_t high,low;
asm volatile ("rdtsc" : "=d"(high), "=a"(low));
return (((uint64_t)high) << 32) + (uint64_t)low;
#else
/* Not supported yet, meaning measuring traversal cost per pixel does not work. */
return 0;
#endif
}
__forceinline int bsf(int v) {
#if defined(__ARM_NEON)
return __builtin_ctz(v);
#else
#if defined(__AVX2__)
return _tzcnt_u32(v);
#elif defined(__X86_ASM__)
int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return __builtin_ctz(v);
#endif
#endif
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
__forceinline unsigned int bsf(unsigned v) {
return sycl::ctz(v);
}
#else
#if defined(__64BIT__)
__forceinline unsigned bsf(unsigned v)
{
#if defined(__ARM_NEON)
return __builtin_ctz(v);
#else
#if defined(__AVX2__)
return _tzcnt_u32(v);
#elif defined(__X86_ASM__)
unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return __builtin_ctz(v);
#endif
#endif
}
#endif
#endif
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
__forceinline size_t bsf(size_t v) {
return sycl::ctz(v);
}
#else
__forceinline size_t bsf(size_t v) {
#if defined(__AVX2__) && !defined(__aarch64__)
#if defined(__X86_64__)
return _tzcnt_u64(v);
#else
return _tzcnt_u32(v);
#endif
#elif defined(__X86_ASM__)
size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return __builtin_ctzl(v);
#endif
}
#endif
__forceinline int bscf(int& v)
{
int i = bsf(v);
v &= v-1;
return i;
}
#if defined(__64BIT__)
__forceinline unsigned int bscf(unsigned int& v)
{
unsigned int i = bsf(v);
v &= v-1;
return i;
}
#endif
__forceinline size_t bscf(size_t& v)
{
size_t i = bsf(v);
v &= v-1;
return i;
}
__forceinline int bsr(int v) {
#if defined(__AVX2__) && !defined(__aarch64__)
return 31 - _lzcnt_u32(v);
#elif defined(__X86_ASM__)
int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return __builtin_clz(v) ^ 31;
#endif
}
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline unsigned bsr(unsigned v) {
#if defined(__AVX2__)
return 31 - _lzcnt_u32(v);
#elif defined(__X86_ASM__)
unsigned r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return __builtin_clz(v) ^ 31;
#endif
}
#endif
__forceinline size_t bsr(size_t v) {
#if defined(__AVX2__) && !defined(__aarch64__)
#if defined(__X86_64__)
return 63 - _lzcnt_u64(v);
#else
return 31 - _lzcnt_u32(v);
#endif
#elif defined(__X86_ASM__)
size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return (sizeof(v) * 8 - 1) - __builtin_clzl(v);
#endif
}
__forceinline int lzcnt(const int x)
{
#if defined(__AVX2__) && !defined(__aarch64__)
return _lzcnt_u32(x);
#else
if (unlikely(x == 0)) return 32;
return 31 - bsr(x);
#endif
}
__forceinline size_t blsr(size_t v) {
#if defined(__AVX2__) && !defined(__aarch64__)
#if defined(__INTEL_COMPILER)
return _blsr_u64(v);
#else
#if defined(__X86_64__)
return __blsr_u64(v);
#else
return __blsr_u32(v);
#endif
#endif
#else
return v & (v-1);
#endif
}
__forceinline int btc(int v, int i) {
#if defined(__X86_ASM__)
int r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r;
#else
return (v ^ (1 << i));
#endif
}
__forceinline int bts(int v, int i) {
#if defined(__X86_ASM__)
int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
return (v | (1 << i));
#endif
}
__forceinline int btr(int v, int i) {
#if defined(__X86_ASM__)
int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
return (v & ~(1 << i));
#endif
}
__forceinline size_t btc(size_t v, size_t i) {
#if defined(__X86_ASM__)
size_t r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r;
#else
return (v ^ (1 << i));
#endif
}
__forceinline size_t bts(size_t v, size_t i) {
#if defined(__X86_ASM__)
size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
return (v | (1 << i));
#endif
}
__forceinline size_t btr(size_t v, size_t i) {
#if defined(__X86_ASM__)
size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
return (v & ~(1 << i));
#endif
}
__forceinline int32_t atomic_cmpxchg(int32_t volatile* value, int32_t comparand, const int32_t input) {
return __sync_val_compare_and_swap(value, comparand, input);
}
#endif
#if !defined(__WIN32__)
#if defined(__i386__) && defined(__PIC__)
__forceinline void __cpuid(int out[4], int op)
{
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
: "0"(op));
}
__forceinline void __cpuid_count(int out[4], int op1, int op2)
{
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
: "0" (op1), "2" (op2));
}
#elif defined(__X86_ASM__)
__forceinline void __cpuid(int out[4], int op) {
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
}
__forceinline void __cpuid_count(int out[4], int op1, int op2) {
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
}
#endif
#endif
////////////////////////////////////////////////////////////////////////////////
/// All Platforms
////////////////////////////////////////////////////////////////////////////////
#if defined(__clang__) || defined(__GNUC__)
#if !defined(_mm_undefined_ps)
__forceinline __m128 _mm_undefined_ps() { return _mm_setzero_ps(); }
#endif
#if !defined(_mm_undefined_si128)
__forceinline __m128i _mm_undefined_si128() { return _mm_setzero_si128(); }
#endif
#if !defined(_mm256_undefined_ps) && defined(__AVX__)
__forceinline __m256 _mm256_undefined_ps() { return _mm256_setzero_ps(); }
#endif
#if !defined(_mm256_undefined_si256) && defined(__AVX__)
__forceinline __m256i _mm256_undefined_si256() { return _mm256_setzero_si256(); }
#endif
#if !defined(_mm512_undefined_ps) && defined(__AVX512F__)
__forceinline __m512 _mm512_undefined_ps() { return _mm512_setzero_ps(); }
#endif
#if !defined(_mm512_undefined_epi32) && defined(__AVX512F__)
__forceinline __m512i _mm512_undefined_epi32() { return _mm512_setzero_si512(); }
#endif
#endif
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
__forceinline unsigned int popcnt(unsigned int in) {
return sycl::popcount(in);
}
#else
#if defined(__SSE4_2__) || defined(__ARM_NEON)
__forceinline int popcnt(int in) {
return _mm_popcnt_u32(in);
}
__forceinline unsigned popcnt(unsigned in) {
return _mm_popcnt_u32(in);
}
#if defined(__64BIT__)
__forceinline size_t popcnt(size_t in) {
return _mm_popcnt_u64(in);
}
#endif
#endif
#endif
#if defined(__X86_ASM__)
__forceinline uint64_t rdtsc()
{
int dummy[4];
__cpuid(dummy,0);
uint64_t clock = read_tsc();
__cpuid(dummy,0);
return clock;
}
#endif
__forceinline void pause_cpu(const size_t N = 8)
{
for (size_t i=0; i<N; i++)
_mm_pause();
}
/* prefetches */
__forceinline void prefetchL1 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T0); }
__forceinline void prefetchL2 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T1); }
__forceinline void prefetchL3 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T2); }
__forceinline void prefetchNTA(const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_NTA); }
__forceinline void prefetchEX (const void* ptr) {
#if defined(__INTEL_COMPILER)
_mm_prefetch((const char*)ptr,_MM_HINT_ET0);
#else
_mm_prefetch((const char*)ptr,_MM_HINT_T0);
#endif
}
__forceinline void prefetchL1EX(const void* ptr) {
prefetchEX(ptr);
}
__forceinline void prefetchL2EX(const void* ptr) {
prefetchEX(ptr);
}
#if defined(__AVX2__) && !defined(__aarch64__)
__forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); }
__forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); }
#if defined(__X86_64__)
__forceinline size_t pext(size_t a, size_t b) { return _pext_u64(a, b); }
__forceinline size_t pdep(size_t a, size_t b) { return _pdep_u64(a, b); }
#endif
#endif
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER)
__forceinline float mm512_cvtss_f32(__m512 v) {
return _mm512_cvtss_f32(v);
}
__forceinline int mm512_mask2int(__mmask16 k1) {
return _mm512_mask2int(k1);
}
__forceinline __mmask16 mm512_int2mask(int mask) {
return _mm512_int2mask(mask);
}
#else
__forceinline float mm512_cvtss_f32(__m512 v) { // FIXME: _mm512_cvtss_f32 neither supported by clang v4.0.0 nor GCC 6.3
return _mm_cvtss_f32(_mm512_castps512_ps128(v));
}
__forceinline int mm512_mask2int(__mmask16 k1) { // FIXME: _mm512_mask2int not yet supported by GCC 6.3
return (int)k1;
}
__forceinline __mmask16 mm512_int2mask(int mask) { // FIXME: _mm512_int2mask not yet supported by GCC 6.3
return (__mmask16)mask;
}
#endif
#endif
}

View File

@@ -0,0 +1,83 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "library.h"
#include "sysinfo.h"
#include "filename.h"
////////////////////////////////////////////////////////////////////////////////
/// Windows Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__WIN32__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace embree
{
/* opens a shared library */
lib_t openLibrary(const std::string& file)
{
std::string fullName = file+".dll";
FileName executable = getExecutableFileName();
HANDLE handle = LoadLibrary((executable.path() + fullName).c_str());
return lib_t(handle);
}
/* returns address of a symbol from the library */
void* getSymbol(lib_t lib, const std::string& sym) {
return (void*)GetProcAddress(HMODULE(lib),sym.c_str());
}
/* closes the shared library */
void closeLibrary(lib_t lib) {
FreeLibrary(HMODULE(lib));
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Unix Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__UNIX__)
#include <dlfcn.h>
namespace embree
{
/* opens a shared library */
lib_t openLibrary(const std::string& file)
{
#if defined(__MACOSX__)
std::string fullName = "lib"+file+".dylib";
#else
std::string fullName = "lib"+file+".so";
#endif
void* lib = dlopen(fullName.c_str(), RTLD_NOW);
if (lib) return lib_t(lib);
FileName executable = getExecutableFileName();
lib = dlopen((executable.path() + fullName).c_str(),RTLD_NOW);
if (lib == nullptr) {
const char* error = dlerror();
if (error) {
THROW_RUNTIME_ERROR(error);
} else {
THROW_RUNTIME_ERROR("could not load library "+executable.str());
}
}
return lib_t(lib);
}
/* returns address of a symbol from the library */
void* getSymbol(lib_t lib, const std::string& sym) {
return dlsym(lib,sym.c_str());
}
/* closes the shared library */
void closeLibrary(lib_t lib) {
dlclose(lib);
}
}
#endif

21
thirdparty/embree/common/sys/library.h vendored Normal file
View File

@@ -0,0 +1,21 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
namespace embree
{
/*! type for shared library */
typedef struct opaque_lib_t* lib_t;
/*! loads a shared library */
lib_t openLibrary(const std::string& file);
/*! returns address of a symbol from the library */
void* getSymbol(lib_t lib, const std::string& sym);
/*! unloads a shared library */
void closeLibrary(lib_t lib);
}

58
thirdparty/embree/common/sys/mutex.cpp vendored Normal file
View File

@@ -0,0 +1,58 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "mutex.h"
#include "regression.h"
#if defined(__WIN32__) && !defined(PTHREADS_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace embree
{
MutexSys::MutexSys() { mutex = new CRITICAL_SECTION; InitializeCriticalSection((CRITICAL_SECTION*)mutex); }
MutexSys::~MutexSys() { DeleteCriticalSection((CRITICAL_SECTION*)mutex); delete (CRITICAL_SECTION*)mutex; }
void MutexSys::lock() { EnterCriticalSection((CRITICAL_SECTION*)mutex); }
bool MutexSys::try_lock() { return TryEnterCriticalSection((CRITICAL_SECTION*)mutex) != 0; }
void MutexSys::unlock() { LeaveCriticalSection((CRITICAL_SECTION*)mutex); }
}
#endif
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
#include <pthread.h>
namespace embree
{
/*! system mutex using pthreads */
MutexSys::MutexSys()
{
mutex = new pthread_mutex_t;
if (pthread_mutex_init((pthread_mutex_t*)mutex, nullptr) != 0)
THROW_RUNTIME_ERROR("pthread_mutex_init failed");
}
MutexSys::~MutexSys()
{
MAYBE_UNUSED bool ok = pthread_mutex_destroy((pthread_mutex_t*)mutex) == 0;
assert(ok);
delete (pthread_mutex_t*)mutex;
mutex = nullptr;
}
void MutexSys::lock()
{
if (pthread_mutex_lock((pthread_mutex_t*)mutex) != 0)
THROW_RUNTIME_ERROR("pthread_mutex_lock failed");
}
bool MutexSys::try_lock() {
return pthread_mutex_trylock((pthread_mutex_t*)mutex) == 0;
}
void MutexSys::unlock()
{
if (pthread_mutex_unlock((pthread_mutex_t*)mutex) != 0)
THROW_RUNTIME_ERROR("pthread_mutex_unlock failed");
}
};
#endif

104
thirdparty/embree/common/sys/mutex.h vendored Normal file
View File

@@ -0,0 +1,104 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
#include "intrinsics.h"
#include "atomic.h"
#define CPU_CACHELINE_SIZE 64
namespace embree
{
/*! system mutex */
class MutexSys {
friend struct ConditionImplementation;
public:
MutexSys();
~MutexSys();
private:
MutexSys (const MutexSys& other) DELETED; // do not implement
MutexSys& operator= (const MutexSys& other) DELETED; // do not implement
public:
void lock();
bool try_lock();
void unlock();
protected:
void* mutex;
};
/*! spinning mutex */
class SpinLock
{
public:
SpinLock ()
: flag(false) {}
__forceinline bool isLocked() {
return flag.load();
}
__forceinline void lock()
{
while (true)
{
while (flag.load())
{
_mm_pause();
_mm_pause();
}
bool expected = false;
if (flag.compare_exchange_strong(expected,true,std::memory_order_acquire))
break;
}
}
__forceinline bool try_lock()
{
bool expected = false;
if (flag.load() != expected) {
return false;
}
return flag.compare_exchange_strong(expected,true,std::memory_order_acquire);
}
__forceinline void unlock() {
flag.store(false,std::memory_order_release);
}
__forceinline void wait_until_unlocked()
{
while(flag.load())
{
_mm_pause();
_mm_pause();
}
}
public:
atomic<bool> flag;
};
class PaddedSpinLock : public SpinLock
{
private:
MAYBE_UNUSED char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
};
/*! safe mutex lock and unlock helper */
template<typename Mutex> class Lock {
public:
Lock (Mutex& mutex) : mutex(mutex), locked(true) { mutex.lock(); }
Lock (Mutex& mutex, bool locked) : mutex(mutex), locked(locked) {}
~Lock() { if (locked) mutex.unlock(); }
__forceinline void lock() { assert(!locked); locked = true; mutex.lock(); }
__forceinline bool isLocked() const { return locked; }
protected:
Mutex& mutex;
bool locked;
};
}

616
thirdparty/embree/common/sys/platform.h vendored Normal file
View File

@@ -0,0 +1,616 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <cstddef>
#include <cassert>
#include <cstdlib>
#include <cstdio>
#include <memory>
#include <stdexcept>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <cstring>
#include <stdint.h>
#include <functional>
#include <mutex>
#if defined(EMBREE_SYCL_SUPPORT)
#define __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-W#pragma-messages"
#include <sycl/sycl.hpp>
#pragma clang diagnostic pop
#include "sycl.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
#define CONSTANT __attribute__((opencl_constant))
#else
#define CONSTANT
#endif
#endif
////////////////////////////////////////////////////////////////////////////////
/// detect platform
////////////////////////////////////////////////////////////////////////////////
/* detect 32 or 64 Intel platform */
#if defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
#define __X86_64__
#define __X86_ASM__
#elif defined(__i386__) || defined(_M_IX86)
#define __X86_ASM__
#endif
/* detect 64 bit platform */
#if defined(__X86_64__) || defined(__aarch64__)
#define __64BIT__
#endif
/* detect Linux platform */
#if defined(linux) || defined(__linux__) || defined(__LINUX__)
# if !defined(__LINUX__)
# define __LINUX__
# endif
# if !defined(__UNIX__)
# define __UNIX__
# endif
#endif
/* detect FreeBSD platform */
#if defined(__FreeBSD__) || defined(__FREEBSD__)
# if !defined(__FREEBSD__)
# define __FREEBSD__
# endif
# if !defined(__UNIX__)
# define __UNIX__
# endif
#endif
/* detect Windows 95/98/NT/2000/XP/Vista/7/8/10 platform */
#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__)
# if !defined(__WIN32__)
# define __WIN32__
# endif
#endif
/* detect Cygwin platform */
#if defined(__CYGWIN__)
# if !defined(__UNIX__)
# define __UNIX__
# endif
#endif
/* detect MAC OS X platform */
#if defined(__APPLE__) || defined(MACOSX) || defined(__MACOSX__)
# if !defined(__MACOSX__)
# define __MACOSX__
# endif
# if !defined(__UNIX__)
# define __UNIX__
# endif
#endif
/* try to detect other Unix systems */
#if defined(__unix__) || defined (unix) || defined(__unix) || defined(_unix)
# if !defined(__UNIX__)
# define __UNIX__
# endif
#endif
////////////////////////////////////////////////////////////////////////////////
/// Macros
////////////////////////////////////////////////////////////////////////////////
#ifdef __WIN32__
# if defined(EMBREE_STATIC_LIB)
# define dll_export
# define dll_import
# else
# define dll_export __declspec(dllexport)
# define dll_import __declspec(dllimport)
# endif
#else
# define dll_export __attribute__ ((visibility ("default")))
# define dll_import
#endif
#if defined(__WIN32__) && !defined(__MINGW32__)
#if !defined(__noinline)
#define __noinline __declspec(noinline)
#endif
//#define __forceinline __forceinline
//#define __restrict __restrict
#if defined(__INTEL_COMPILER)
#define __restrict__ __restrict
#else
#define __restrict__ //__restrict // causes issues with MSVC
#endif
#if !defined(__thread) && !defined(__INTEL_LLVM_COMPILER)
#define __thread __declspec(thread)
#endif
#if !defined(__aligned)
#define __aligned(...) __declspec(align(__VA_ARGS__))
#endif
//#define __FUNCTION__ __FUNCTION__
#define debugbreak() __debugbreak()
#else
#if !defined(__noinline)
#define __noinline __attribute__((noinline))
#endif
#if !defined(__forceinline)
#define __forceinline inline __attribute__((always_inline))
#endif
//#define __restrict __restrict
//#define __thread __thread
#if !defined(__aligned)
#define __aligned(...) __attribute__((aligned(__VA_ARGS__)))
#endif
#if !defined(__FUNCTION__)
#define __FUNCTION__ __PRETTY_FUNCTION__
#endif
#define debugbreak() asm ("int $3")
#endif
#if defined(__clang__) || defined(__GNUC__)
#define MAYBE_UNUSED __attribute__((unused))
#else
#define MAYBE_UNUSED
#endif
#if !defined(_unused)
#define _unused(x) ((void)(x))
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1900) // before VS2015 deleted functions are not supported properly
#define DELETED
#else
#define DELETED = delete
#endif
#if !defined(likely)
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) || defined(__SYCL_DEVICE_ONLY__)
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#else
#define likely(expr) __builtin_expect((bool)(expr),true )
#define unlikely(expr) __builtin_expect((bool)(expr),false)
#endif
#endif
////////////////////////////////////////////////////////////////////////////////
/// Error handling and debugging
////////////////////////////////////////////////////////////////////////////////
/* debug printing macros */
#define STRING(x) #x
#define TOSTRING(x) STRING(x)
#define PING embree_cout_uniform << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
#define PRINT(x) embree_cout << STRING(x) << " = " << (x) << embree_endl
#define PRINT2(x,y) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
#define PRINT3(x,y,z) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
#define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
#define UPRINT(x) embree_cout_uniform << STRING(x) << " = " << (x) << embree_endl
#define UPRINT2(x,y) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
#define UPRINT3(x,y,z) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
#define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
#if defined(DEBUG) // only report file and line in debug mode
#define THROW_RUNTIME_ERROR(str) \
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
//throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#else
#define THROW_RUNTIME_ERROR(str) \
abort(); //throw std::runtime_error(str);
#endif
#define FATAL(x) THROW_RUNTIME_ERROR(x)
#define WARNING(x) { std::cerr << "Warning: " << x << embree_endl << std::flush; }
#define NOT_IMPLEMENTED FATAL(std::string(__FUNCTION__) + " not implemented")
////////////////////////////////////////////////////////////////////////////////
/// Basic types
////////////////////////////////////////////////////////////////////////////////
/* default floating-point type */
namespace embree {
typedef float real;
}
/* windows does not have ssize_t */
#if defined(__WIN32__)
#if defined(__64BIT__)
typedef int64_t ssize_t;
#else
typedef int32_t ssize_t;
#endif
#endif
////////////////////////////////////////////////////////////////////////////////
/// Basic utility functions
////////////////////////////////////////////////////////////////////////////////
__forceinline std::string toString(long long value) {
return std::to_string(value);
}
////////////////////////////////////////////////////////////////////////////////
/// Disable some compiler warnings
////////////////////////////////////////////////////////////////////////////////
#if defined(__INTEL_COMPILER)
//#pragma warning(disable:265 ) // floating-point operation result is out of range
//#pragma warning(disable:383 ) // value copied to temporary, reference to temporary used
//#pragma warning(disable:869 ) // parameter was never referenced
//#pragma warning(disable:981 ) // operands are evaluated in unspecified order
//#pragma warning(disable:1418) // external function definition with no prior declaration
//#pragma warning(disable:1419) // external declaration in primary source file
//#pragma warning(disable:1572) // floating-point equality and inequality comparisons are unreliable
//#pragma warning(disable:94 ) // the size of an array must be greater than zero
//#pragma warning(disable:1599) // declaration hides parameter
//#pragma warning(disable:424 ) // extra ";" ignored
#pragma warning(disable:2196) // routine is both "inline" and "noinline"
//#pragma warning(disable:177 ) // label was declared but never referenced
//#pragma warning(disable:114 ) // function was referenced but not defined
//#pragma warning(disable:819 ) // template nesting depth does not match the previous declaration of function
#pragma warning(disable:15335) // was not vectorized: vectorization possible but seems inefficient
#endif
#if defined(_MSC_VER)
//#pragma warning(disable:4200) // nonstandard extension used : zero-sized array in struct/union
#pragma warning(disable:4800) // forcing value to bool 'true' or 'false' (performance warning)
//#pragma warning(disable:4267) // '=' : conversion from 'size_t' to 'unsigned long', possible loss of data
#pragma warning(disable:4244) // 'argument' : conversion from 'ssize_t' to 'unsigned int', possible loss of data
#pragma warning(disable:4267) // conversion from 'size_t' to 'const int', possible loss of data
//#pragma warning(disable:4355) // 'this' : used in base member initializer list
//#pragma warning(disable:391 ) // '<=' : signed / unsigned mismatch
//#pragma warning(disable:4018) // '<' : signed / unsigned mismatch
//#pragma warning(disable:4305) // 'initializing' : truncation from 'double' to 'float'
//#pragma warning(disable:4068) // unknown pragma
//#pragma warning(disable:4146) // unary minus operator applied to unsigned type, result still unsigned
//#pragma warning(disable:4838) // conversion from 'unsigned int' to 'const int' requires a narrowing conversion)
//#pragma warning(disable:4227) // anachronism used : qualifiers on reference are ignored
#pragma warning(disable:4503) // decorated name length exceeded, name was truncated
#pragma warning(disable:4180) // qualifier applied to function type has no meaning; ignored
#pragma warning(disable:4258) // definition from the for loop is ignored; the definition from the enclosing scope is used
# if _MSC_VER < 1910 // prior to Visual studio 2017 (V141)
# pragma warning(disable:4101) // warning C4101: 'x': unreferenced local variable // a compiler bug issues wrong warnings
# pragma warning(disable:4789) // buffer '' of size 8 bytes will be overrun; 32 bytes will be written starting at offset 0
# endif
#endif
#if defined(__clang__) && !defined(__INTEL_COMPILER)
//#pragma clang diagnostic ignored "-Wunknown-pragmas"
//#pragma clang diagnostic ignored "-Wunused-variable"
//#pragma clang diagnostic ignored "-Wreorder"
//#pragma clang diagnostic ignored "-Wmicrosoft"
//#pragma clang diagnostic ignored "-Wunused-private-field"
//#pragma clang diagnostic ignored "-Wunused-local-typedef"
//#pragma clang diagnostic ignored "-Wunused-function"
//#pragma clang diagnostic ignored "-Wnarrowing"
//#pragma clang diagnostic ignored "-Wc++11-narrowing"
//#pragma clang diagnostic ignored "-Wdeprecated-register"
//#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
#pragma GCC diagnostic ignored "-Wpragmas"
//#pragma GCC diagnostic ignored "-Wnarrowing"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
//#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
//#pragma GCC diagnostic ignored "-Warray-bounds"
#pragma GCC diagnostic ignored "-Wattributes"
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wparentheses"
#endif
#if defined(__clang__) && defined(__WIN32__)
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wmicrosoft-cast"
#pragma clang diagnostic ignored "-Wmicrosoft-enum-value"
#pragma clang diagnostic ignored "-Wmicrosoft-include"
#pragma clang diagnostic ignored "-Wunused-function"
#pragma clang diagnostic ignored "-Wunknown-pragmas"
#endif
/* disabling deprecated warning, please use only where use of deprecated Embree API functions is desired */
#if defined(__WIN32__) && defined(__INTEL_COMPILER)
#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 1478)) // warning: function was declared deprecated
#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable: 1478)) // warning: function was declared deprecated
#elif defined(__INTEL_COMPILER)
#define DISABLE_DEPRECATED_WARNING _Pragma("warning (disable: 1478)") // warning: function was declared deprecated
#define ENABLE_DEPRECATED_WARNING _Pragma("warning (enable : 1478)") // warning: function was declared deprecated
#elif defined(__clang__)
#define DISABLE_DEPRECATED_WARNING _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
#define ENABLE_DEPRECATED_WARNING _Pragma("clang diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
#elif defined(__GNUC__)
#define DISABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
#define ENABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated
#elif defined(_MSC_VER)
#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 4996)) // warning: function was declared deprecated
#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable : 4996)) // warning: function was declared deprecated
#endif
////////////////////////////////////////////////////////////////////////////////
/// SYCL specific
////////////////////////////////////////////////////////////////////////////////
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
#define sycl_printf0(format, ...) { \
static const CONSTANT char fmt[] = format; \
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
}
#define sycl_printf0_(format) { \
static const CONSTANT char fmt[] = format; \
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
sycl::ext::oneapi::experimental::printf(fmt); \
}
#else
#define sycl_printf0(format, ...) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
}
#define sycl_printf0_(format) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt); \
}
#endif
#define sycl_printf(format, ...) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
}
#define sycl_printf_(format) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt); \
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
namespace embree
{
struct sycl_ostream_ {
sycl_ostream_ (bool uniform) : uniform(uniform) {}
bool uniform = false;
};
struct sycl_endl_ {};
#define embree_ostream embree::sycl_ostream_
#define embree_cout embree::sycl_ostream_(false)
#define embree_cout_uniform embree::sycl_ostream_(true)
#define embree_endl embree::sycl_endl_()
inline sycl_ostream_ operator <<(sycl_ostream_ cout, int i)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%i",i);
}
else
sycl_printf("%i ",i);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, unsigned int i)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%u",i);
} else
sycl_printf("%u ",i);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, float f)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%f",f);
} else
sycl_printf("%f ",f);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, double d)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%f",d);
} else
sycl_printf("%f ",d);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, uint64_t l)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%lu",l);
} else
sycl_printf("%lu ",l);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, long l)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%l",l);
} else
sycl_printf("%l ",l);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, void* p)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%p",p);
} else
sycl_printf("%p ",p);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, const char* c)
{
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%s",c);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, sycl_endl_)
{
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf_("\n");
return cout;
}
}
#else
#define embree_ostream std::ostream&
#define embree_cout std::cout
#define embree_cout_uniform std::cout
#define embree_endl std::endl
#endif
#if defined(EMBREE_SYCL_SUPPORT)
/* printing out sycle vector types */
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float4& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float3& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float2& v) {
return out << "(" << v.x() << "," << v.y() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int4& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int3& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int2& v) {
return out << "(" << v.x() << "," << v.y() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint4& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint3& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint2& v) {
return out << "(" << v.x() << "," << v.y() << ")";
}
#endif
inline void tab(std::ostream& cout, int n) {
for (int i=0; i<n; i++) cout << " ";
}
inline std::string tab(int depth) {
return std::string(2*depth,' ');
}
////////////////////////////////////////////////////////////////////////////////
/// Some macros for static profiling
////////////////////////////////////////////////////////////////////////////////
#if defined (__GNUC__)
#define IACA_SSC_MARK( MARK_ID ) \
__asm__ __volatile__ ( \
"\n\t movl $"#MARK_ID", %%ebx" \
"\n\t .byte 0x64, 0x67, 0x90" \
: : : "memory" );
#define IACA_UD_BYTES __asm__ __volatile__ ("\n\t .byte 0x0F, 0x0B");
#else
#define IACA_UD_BYTES {__asm _emit 0x0F \
__asm _emit 0x0B}
#define IACA_SSC_MARK(x) {__asm mov ebx, x\
__asm _emit 0x64 \
__asm _emit 0x67 \
__asm _emit 0x90 }
#define IACA_VC64_START __writegsbyte(111, 111);
#define IACA_VC64_END __writegsbyte(222, 222);
#endif
#define IACA_START {IACA_UD_BYTES \
IACA_SSC_MARK(111)}
#define IACA_END {IACA_SSC_MARK(222) \
IACA_UD_BYTES}
namespace embree
{
template<typename Closure>
struct OnScopeExitHelper
{
OnScopeExitHelper (const Closure f) : active(true), f(f) {}
~OnScopeExitHelper() { if (active) f(); }
void deactivate() { active = false; }
bool active;
const Closure f;
};
template <typename Closure>
OnScopeExitHelper<Closure> OnScopeExit(const Closure f) {
return OnScopeExitHelper<Closure>(f);
}
#define STRING_JOIN2(arg1, arg2) DO_STRING_JOIN2(arg1, arg2)
#define DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
#define ON_SCOPE_EXIT(code) \
auto STRING_JOIN2(on_scope_exit_, __LINE__) = OnScopeExit([&](){code;})
template<typename Ty>
std::unique_ptr<Ty> make_unique(Ty* ptr) {
return std::unique_ptr<Ty>(ptr);
}
}

122
thirdparty/embree/common/sys/ref.h vendored Normal file
View File

@@ -0,0 +1,122 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "atomic.h"
namespace embree
{
struct NullTy {
};
extern MAYBE_UNUSED NullTy null;
class RefCount
{
public:
RefCount(int val = 0) : refCounter(val) {}
virtual ~RefCount() {};
virtual RefCount* refInc() { refCounter.fetch_add(1); return this; }
virtual void refDec() { if (refCounter.fetch_add(-1) == 1) delete this; }
private:
std::atomic<size_t> refCounter;
};
////////////////////////////////////////////////////////////////////////////////
/// Reference to single object
////////////////////////////////////////////////////////////////////////////////
template<typename Type>
class Ref
{
public:
Type* ptr;
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Ref() : ptr(nullptr) {}
__forceinline Ref(NullTy) : ptr(nullptr) {}
__forceinline Ref(const Ref& input) : ptr(input.ptr) { if (ptr) ptr->refInc(); }
__forceinline Ref(Ref&& input) : ptr(input.ptr) { input.ptr = nullptr; }
__forceinline Ref(Type* const input) : ptr(input)
{
if (ptr)
ptr->refInc();
}
__forceinline ~Ref()
{
if (ptr)
ptr->refDec();
}
__forceinline Ref& operator =(const Ref& input)
{
if (input.ptr)
input.ptr->refInc();
if (ptr)
ptr->refDec();
ptr = input.ptr;
return *this;
}
__forceinline Ref& operator =(Ref&& input)
{
if (ptr)
ptr->refDec();
ptr = input.ptr;
input.ptr = nullptr;
return *this;
}
__forceinline Ref& operator =(Type* const input)
{
if (input)
input->refInc();
if (ptr)
ptr->refDec();
ptr = input;
return *this;
}
__forceinline Ref& operator =(NullTy)
{
if (ptr)
ptr->refDec();
ptr = nullptr;
return *this;
}
__forceinline operator bool() const { return ptr != nullptr; }
__forceinline const Type& operator *() const { return *ptr; }
__forceinline Type& operator *() { return *ptr; }
__forceinline const Type* operator ->() const { return ptr; }
__forceinline Type* operator ->() { return ptr; }
template<typename TypeOut>
__forceinline Ref<TypeOut> cast() { return Ref<TypeOut>(static_cast<TypeOut*>(ptr)); }
template<typename TypeOut>
__forceinline const Ref<TypeOut> cast() const { return Ref<TypeOut>(static_cast<TypeOut*>(ptr)); }
template<typename TypeOut>
__forceinline Ref<TypeOut> dynamicCast() { return Ref<TypeOut>(dynamic_cast<TypeOut*>(ptr)); }
template<typename TypeOut>
__forceinline const Ref<TypeOut> dynamicCast() const { return Ref<TypeOut>(dynamic_cast<TypeOut*>(ptr)); }
};
template<typename Type> __forceinline bool operator < (const Ref<Type>& a, const Ref<Type>& b) { return a.ptr < b.ptr; }
template<typename Type> __forceinline bool operator ==(const Ref<Type>& a, NullTy ) { return a.ptr == nullptr; }
template<typename Type> __forceinline bool operator ==(NullTy , const Ref<Type>& b) { return nullptr == b.ptr; }
template<typename Type> __forceinline bool operator ==(const Ref<Type>& a, const Ref<Type>& b) { return a.ptr == b.ptr; }
template<typename Type> __forceinline bool operator !=(const Ref<Type>& a, NullTy ) { return a.ptr != nullptr; }
template<typename Type> __forceinline bool operator !=(NullTy , const Ref<Type>& b) { return nullptr != b.ptr; }
template<typename Type> __forceinline bool operator !=(const Ref<Type>& a, const Ref<Type>& b) { return a.ptr != b.ptr; }
}

View File

@@ -0,0 +1,30 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "regression.h"
namespace embree
{
/* registerRegressionTest is invoked from static initializers, thus
* we cannot have the regression_tests variable as global static
* variable due to issues with static variable initialization
* order. */
std::vector<RegressionTest*>& get_regression_tests()
{
static std::vector<RegressionTest*> regression_tests;
return regression_tests;
}
void registerRegressionTest(RegressionTest* test)
{
get_regression_tests().push_back(test);
}
RegressionTest* getRegressionTest(size_t index)
{
if (index >= get_regression_tests().size())
return nullptr;
return get_regression_tests()[index];
}
}

View File

@@ -0,0 +1,25 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
#include <vector>
namespace embree
{
/*! virtual interface for all regression tests */
struct RegressionTest
{
RegressionTest (std::string name) : name(name) {}
virtual bool run() = 0;
std::string name;
};
/*! registers a regression test */
void registerRegressionTest(RegressionTest* test);
/*! run all regression tests */
RegressionTest* getRegressionTest(size_t index);
}

302
thirdparty/embree/common/sys/sycl.h vendored Normal file
View File

@@ -0,0 +1,302 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
using sycl::float16;
using sycl::float8;
using sycl::float4;
using sycl::float3;
using sycl::float2;
using sycl::int16;
using sycl::int8;
using sycl::int4;
using sycl::int3;
using sycl::int2;
using sycl::uint16;
using sycl::uint8;
using sycl::uint4;
using sycl::uint3;
using sycl::uint2;
using sycl::uchar16;
using sycl::uchar8;
using sycl::uchar4;
using sycl::uchar3;
using sycl::uchar2;
using sycl::ushort16;
using sycl::ushort8;
using sycl::ushort4;
using sycl::ushort3;
using sycl::ushort2;
#ifdef __SYCL_DEVICE_ONLY__
#define GLOBAL __attribute__((opencl_global))
#define LOCAL __attribute__((opencl_local))
SYCL_EXTERNAL extern int work_group_reduce_add(int x);
SYCL_EXTERNAL extern float work_group_reduce_min(float x);
SYCL_EXTERNAL extern float work_group_reduce_max(float x);
SYCL_EXTERNAL extern float atomic_min(volatile GLOBAL float *p, float val);
SYCL_EXTERNAL extern float atomic_min(volatile LOCAL float *p, float val);
SYCL_EXTERNAL extern float atomic_max(volatile GLOBAL float *p, float val);
SYCL_EXTERNAL extern float atomic_max(volatile LOCAL float *p, float val);
SYCL_EXTERNAL extern "C" unsigned int intel_sub_group_ballot(bool valid);
SYCL_EXTERNAL extern "C" void __builtin_IB_assume_uniform(void *p);
// Load message caching control
enum LSC_LDCC {
LSC_LDCC_DEFAULT,
LSC_LDCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
LSC_LDCC_L1UC_L3C, // Override to L1 uncached and L3 cached
LSC_LDCC_L1C_L3UC, // Override to L1 cached and L3 uncached
LSC_LDCC_L1C_L3C, // Override to L1 cached and L3 cached
LSC_LDCC_L1S_L3UC, // Override to L1 streaming load and L3 uncached
LSC_LDCC_L1S_L3C, // Override to L1 streaming load and L3 cached
LSC_LDCC_L1IAR_L3C, // Override to L1 invalidate-after-read, and L3 cached
};
// Store message caching control (also used for atomics)
enum LSC_STCC {
LSC_STCC_DEFAULT,
LSC_STCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
LSC_STCC_L1UC_L3WB, // Override to L1 uncached and L3 written back
LSC_STCC_L1WT_L3UC, // Override to L1 written through and L3 uncached
LSC_STCC_L1WT_L3WB, // Override to L1 written through and L3 written back
LSC_STCC_L1S_L3UC, // Override to L1 streaming and L3 uncached
LSC_STCC_L1S_L3WB, // Override to L1 streaming and L3 written back
LSC_STCC_L1WB_L3WB, // Override to L1 written through and L3 written back
};
///////////////////////////////////////////////////////////////////////
// LSC Loads
///////////////////////////////////////////////////////////////////////
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uchar_to_uint (const GLOBAL uint8_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D8U32
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_ushort_to_uint(const GLOBAL uint16_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D16U32
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uint (const GLOBAL uint32_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V1
SYCL_EXTERNAL /* extern "C" */ sycl::uint2 __builtin_IB_lsc_load_global_uint2 (const GLOBAL sycl::uint2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V2
SYCL_EXTERNAL /* extern "C" */ sycl::uint3 __builtin_IB_lsc_load_global_uint3 (const GLOBAL sycl::uint3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V3
SYCL_EXTERNAL /* extern "C" */ sycl::uint4 __builtin_IB_lsc_load_global_uint4 (const GLOBAL sycl::uint4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V4
SYCL_EXTERNAL /* extern "C" */ sycl::uint8 __builtin_IB_lsc_load_global_uint8 (const GLOBAL sycl::uint8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V8
SYCL_EXTERNAL /* extern "C" */ uint64_t __builtin_IB_lsc_load_global_ulong (const GLOBAL uint64_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V1
SYCL_EXTERNAL /* extern "C" */ sycl::ulong2 __builtin_IB_lsc_load_global_ulong2 (const GLOBAL sycl::ulong2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V2
SYCL_EXTERNAL /* extern "C" */ sycl::ulong3 __builtin_IB_lsc_load_global_ulong3 (const GLOBAL sycl::ulong3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V3
SYCL_EXTERNAL /* extern "C" */ sycl::ulong4 __builtin_IB_lsc_load_global_ulong4 (const GLOBAL sycl::ulong4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V4
SYCL_EXTERNAL /* extern "C" */ sycl::ulong8 __builtin_IB_lsc_load_global_ulong8 (const GLOBAL sycl::ulong8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V8
// global address space
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uchar_from_uint (GLOBAL uint8_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D8U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ushort_from_uint(GLOBAL uint16_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D16U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint (GLOBAL uint32_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D32V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint2 (GLOBAL sycl::uint2 *base, int immElemOff, sycl::uint2 val, enum LSC_STCC cacheOpt); //D32V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint3 (GLOBAL sycl::uint3 *base, int immElemOff, sycl::uint3 val, enum LSC_STCC cacheOpt); //D32V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint4 (GLOBAL sycl::uint4 *base, int immElemOff, sycl::uint4 val, enum LSC_STCC cacheOpt); //D32V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint8 (GLOBAL sycl::uint8 *base, int immElemOff, sycl::uint8 val, enum LSC_STCC cacheOpt); //D32V8
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong (GLOBAL uint64_t *base, int immElemOff, uint64_t val, enum LSC_STCC cacheOpt); //D64V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong2 (GLOBAL sycl::ulong2 *base, int immElemOff, sycl::ulong2 val, enum LSC_STCC cacheOpt); //D64V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong3 (GLOBAL sycl::ulong3 *base, int immElemOff, sycl::ulong3 val, enum LSC_STCC cacheOpt); //D64V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong4 (GLOBAL sycl::ulong4 *base, int immElemOff, sycl::ulong4 val, enum LSC_STCC cacheOpt); //D64V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong8 (GLOBAL sycl::ulong8 *base, int immElemOff, sycl::ulong8 val, enum LSC_STCC cacheOpt); //D64V8
///////////////////////////////////////////////////////////////////////
// prefetching
///////////////////////////////////////////////////////////////////////
//
// LSC Pre-Fetch Load functions with CacheControls
// global address space
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uchar (const GLOBAL uint8_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D8U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ushort(const GLOBAL uint16_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D16U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint (const GLOBAL uint32_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint2 (const GLOBAL sycl::uint2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint3 (const GLOBAL sycl::uint3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint4 (const GLOBAL sycl::uint4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint8 (const GLOBAL sycl::uint8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V8
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong (const GLOBAL uint64_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong2(const GLOBAL sycl::ulong2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong3(const GLOBAL sycl::ulong3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong4(const GLOBAL sycl::ulong4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong8(const GLOBAL sycl::ulong8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V8
#else
#define GLOBAL
#define LOCAL
/* dummy functions for host */
inline int work_group_reduce_add(int x) { return x; }
inline float work_group_reduce_min(float x) { return x; }
inline float work_group_reduce_max(float x) { return x; }
inline float atomic_min(volatile float *p, float val) { return val; };
inline float atomic_max(volatile float *p, float val) { return val; };
inline uint32_t intel_sub_group_ballot(bool valid) { return 0; }
#endif
/* creates a temporary that is enforced to be uniform */
#define SYCL_UNIFORM_VAR(Ty,tmp,k) \
Ty tmp##_data; \
Ty* p##tmp##_data = (Ty*) sub_group_broadcast((uint64_t)&tmp##_data,k); \
Ty& tmp = *p##tmp##_data;
#if !defined(__forceinline)
#define __forceinline inline __attribute__((always_inline))
#endif
#if __SYCL_COMPILER_VERSION < 20210801
#define all_of_group all_of
#define any_of_group any_of
#define none_of_group none_of
#define group_broadcast broadcast
#define reduce_over_group reduce
#define exclusive_scan_over_group exclusive_scan
#define inclusive_scan_over_group inclusive_scan
#endif
namespace embree
{
template<typename T>
__forceinline T cselect(const bool mask, const T &a, const T &b)
{
return sycl::select(b,a,(int)mask);
}
template<typename T, typename M>
__forceinline T cselect(const M &mask, const T &a, const T &b)
{
return sycl::select(b,a,mask);
}
#define XSTR(x) STR(x)
#define STR(x) #x
__forceinline const sycl::sub_group this_sub_group() {
#if __LIBSYCL_MAJOR_VERSION >= 8
return sycl::ext::oneapi::this_work_item::get_sub_group();
#else
return sycl::ext::oneapi::experimental::this_sub_group();
#endif
}
__forceinline const uint32_t get_sub_group_local_id() {
return this_sub_group().get_local_id()[0];
}
__forceinline const uint32_t get_sub_group_size() {
return this_sub_group().get_max_local_range().size();
}
__forceinline const uint32_t get_sub_group_id() {
return this_sub_group().get_group_id()[0];
}
__forceinline const uint32_t get_num_sub_groups() {
return this_sub_group().get_group_range().size();
}
__forceinline uint32_t sub_group_ballot(bool pred) {
return intel_sub_group_ballot(pred);
}
__forceinline bool sub_group_all_of(bool pred) {
return sycl::all_of_group(this_sub_group(),pred);
}
__forceinline bool sub_group_any_of(bool pred) {
return sycl::any_of_group(this_sub_group(),pred);
}
__forceinline bool sub_group_none_of(bool pred) {
return sycl::none_of_group(this_sub_group(),pred);
}
template <typename T> __forceinline T sub_group_broadcast(T x, sycl::id<1> local_id) {
return sycl::group_broadcast<sycl::sub_group>(this_sub_group(),x,local_id);
}
template <typename T> __forceinline T sub_group_make_uniform(T x) {
return sub_group_broadcast(x,sycl::ctz(intel_sub_group_ballot(true)));
}
__forceinline void assume_uniform_array(void* ptr) {
#ifdef __SYCL_DEVICE_ONLY__
__builtin_IB_assume_uniform(ptr);
#endif
}
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, BinaryOperation binary_op) {
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, T init, BinaryOperation binary_op) {
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,init,binary_op);
}
template <typename T> __forceinline T sub_group_reduce_min(T x, T init) {
return sub_group_reduce(x, init, sycl::ext::oneapi::minimum<T>());
}
template <typename T> __forceinline T sub_group_reduce_min(T x) {
return sub_group_reduce(x, sycl::ext::oneapi::minimum<T>());
}
template <typename T> __forceinline T sub_group_reduce_max(T x) {
return sub_group_reduce(x, sycl::ext::oneapi::maximum<T>());
}
template <typename T> __forceinline T sub_group_reduce_add(T x) {
return sub_group_reduce(x, sycl::ext::oneapi::plus<T>());
}
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, BinaryOperation binary_op) {
return sycl::exclusive_scan_over_group(this_sub_group(),x,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan_min(T x) {
return sub_group_exclusive_scan(x,sycl::ext::oneapi::minimum<T>());
}
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, T init, BinaryOperation binary_op) {
return sycl::exclusive_scan_over_group(this_sub_group(),x,init,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op) {
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op, T init) {
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init);
}
template <typename T> __forceinline T sub_group_load(const void* src) {
return this_sub_group().load(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)src));
}
template <typename T> __forceinline void sub_group_store(void* dst, const T& x) {
this_sub_group().store(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)dst),x);
}
}
#if __SYCL_COMPILER_VERSION < 20210801
#undef all_of_group
#undef any_of_group
#undef none_of_group
#undef group_broadcast
#undef reduce_over_group
#undef exclusive_scan_over_group
#undef inclusive_scan_over_group
#endif

727
thirdparty/embree/common/sys/sysinfo.cpp vendored Normal file
View File

@@ -0,0 +1,727 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#if defined(__INTEL_LLVM_COMPILER)
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wignored-attributes"
#endif
#include "sysinfo.h"
#include "intrinsics.h"
#include "estring.h"
#include "ref.h"
#if defined(__FREEBSD__)
#include <sys/cpuset.h>
#include <pthread_np.h>
typedef cpuset_t cpu_set_t;
#endif
////////////////////////////////////////////////////////////////////////////////
/// All Platforms
////////////////////////////////////////////////////////////////////////////////
namespace embree
{
NullTy null;
std::string getPlatformName()
{
#if defined(__ANDROID__) && !defined(__64BIT__)
return "Android (32bit)";
#elif defined(__ANDROID__) && defined(__64BIT__)
return "Android (64bit)";
#elif defined(__LINUX__) && !defined(__64BIT__)
return "Linux (32bit)";
#elif defined(__LINUX__) && defined(__64BIT__)
return "Linux (64bit)";
#elif defined(__FREEBSD__) && !defined(__64BIT__)
return "FreeBSD (32bit)";
#elif defined(__FREEBSD__) && defined(__64BIT__)
return "FreeBSD (64bit)";
#elif defined(__CYGWIN__) && !defined(__64BIT__)
return "Cygwin (32bit)";
#elif defined(__CYGWIN__) && defined(__64BIT__)
return "Cygwin (64bit)";
#elif defined(__WIN32__) && !defined(__64BIT__)
return "Windows (32bit)";
#elif defined(__WIN32__) && defined(__64BIT__)
return "Windows (64bit)";
#elif defined(__MACOSX__) && !defined(__64BIT__)
return "Mac OS X (32bit)";
#elif defined(__MACOSX__) && defined(__64BIT__)
return "Mac OS X (64bit)";
#elif defined(__UNIX__) && !defined(__64BIT__)
return "Unix (32bit)";
#elif defined(__UNIX__) && defined(__64BIT__)
return "Unix (64bit)";
#else
return "Unknown";
#endif
}
std::string getCompilerName()
{
#if defined(__INTEL_COMPILER)
int icc_mayor = __INTEL_COMPILER / 100 % 100;
int icc_minor = __INTEL_COMPILER % 100;
std::string version = "Intel Compiler ";
version += toString(icc_mayor);
version += "." + toString(icc_minor);
#if defined(__INTEL_COMPILER_UPDATE)
version += "." + toString(__INTEL_COMPILER_UPDATE);
#endif
return version;
#elif defined(__clang__)
return "CLANG " __clang_version__;
#elif defined (__GNUC__)
return "GCC " __VERSION__;
#elif defined(_MSC_VER)
std::string version = toString(_MSC_FULL_VER);
version.insert(4,".");
version.insert(9,".");
version.insert(2,".");
return "Visual C++ Compiler " + version;
#else
return "Unknown Compiler";
#endif
}
std::string getCPUVendor()
{
#if defined(__X86_ASM__)
int cpuinfo[4];
__cpuid (cpuinfo, 0);
int name[4];
name[0] = cpuinfo[1];
name[1] = cpuinfo[3];
name[2] = cpuinfo[2];
name[3] = 0;
return (char*)name;
#elif defined(__ARM_NEON)
return "ARM";
#else
return "Unknown";
#endif
}
CPU getCPUModel()
{
#if defined(__X86_ASM__)
if (getCPUVendor() != "GenuineIntel")
return CPU::UNKNOWN;
int out[4];
__cpuid(out, 0);
if (out[0] < 1) return CPU::UNKNOWN;
__cpuid(out, 1);
/* please see CPUID documentation for these formulas */
uint32_t family_ID = (out[0] >> 8) & 0x0F;
uint32_t extended_family_ID = (out[0] >> 20) & 0xFF;
uint32_t model_ID = (out[0] >> 4) & 0x0F;
uint32_t extended_model_ID = (out[0] >> 16) & 0x0F;
uint32_t DisplayFamily = family_ID;
if (family_ID == 0x0F)
DisplayFamily += extended_family_ID;
uint32_t DisplayModel = model_ID;
if (family_ID == 0x06 || family_ID == 0x0F)
DisplayModel += extended_model_ID << 4;
uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0);
// Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel)
if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE;
if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE;
if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE;
if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE;
if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE;
if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE;
if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE;
if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE;
if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE;
if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE;
if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE;
if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE;
if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE;
if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL;
if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL;
if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL;
if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL;
if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL;
if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL;
if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL;
if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL;
if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE;
if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE;
if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE;
if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE;
if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE;
if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM;
if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM;
if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM;
if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM;
if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM;
if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM;
if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM;
if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2;
if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2;
if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1;
if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL;
if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING;
#elif defined(__ARM_NEON)
return CPU::ARM;
#endif
return CPU::UNKNOWN;
}
std::string stringOfCPUModel(CPU model)
{
switch (model) {
case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake";
case CPU::CORE_ICE_LAKE : return "Core Ice Lake";
case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake";
case CPU::CORE_COMET_LAKE : return "Core Comet Lake";
case CPU::CORE_CANNON_LAKE : return "Core Cannon Lake";
case CPU::CORE_KABY_LAKE : return "Core Kaby Lake";
case CPU::XEON_SKY_LAKE : return "Xeon Sky Lake";
case CPU::CORE_SKY_LAKE : return "Core Sky Lake";
case CPU::XEON_PHI_KNIGHTS_MILL : return "Xeon Phi Knights Mill";
case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing";
case CPU::XEON_BROADWELL : return "Xeon Broadwell";
case CPU::CORE_BROADWELL : return "Core Broadwell";
case CPU::XEON_HASWELL : return "Xeon Haswell";
case CPU::CORE_HASWELL : return "Core Haswell";
case CPU::XEON_IVY_BRIDGE : return "Xeon Ivy Bridge";
case CPU::CORE_IVY_BRIDGE : return "Core Ivy Bridge";
case CPU::SANDY_BRIDGE : return "Sandy Bridge";
case CPU::NEHALEM : return "Nehalem";
case CPU::CORE2 : return "Core2";
case CPU::CORE1 : return "Core";
case CPU::ARM : return "ARM";
case CPU::UNKNOWN : return "Unknown CPU";
}
return "Unknown CPU (error)";
}
#if defined(__X86_ASM__)
/* constants to access destination registers of CPUID instruction */
static const int EAX = 0;
static const int EBX = 1;
static const int ECX = 2;
static const int EDX = 3;
/* cpuid[eax=1].ecx */
static const int CPU_FEATURE_BIT_SSE3 = 1 << 0;
static const int CPU_FEATURE_BIT_SSSE3 = 1 << 9;
static const int CPU_FEATURE_BIT_FMA3 = 1 << 12;
static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19;
static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20;
//static const int CPU_FEATURE_BIT_MOVBE = 1 << 22;
static const int CPU_FEATURE_BIT_POPCNT = 1 << 23;
//static const int CPU_FEATURE_BIT_XSAVE = 1 << 26;
static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27;
static const int CPU_FEATURE_BIT_AVX = 1 << 28;
static const int CPU_FEATURE_BIT_F16C = 1 << 29;
static const int CPU_FEATURE_BIT_RDRAND = 1 << 30;
/* cpuid[eax=1].edx */
static const int CPU_FEATURE_BIT_SSE = 1 << 25;
static const int CPU_FEATURE_BIT_SSE2 = 1 << 26;
/* cpuid[eax=0x80000001].ecx */
static const int CPU_FEATURE_BIT_LZCNT = 1 << 5;
/* cpuid[eax=7,ecx=0].ebx */
static const int CPU_FEATURE_BIT_BMI1 = 1 << 3;
static const int CPU_FEATURE_BIT_AVX2 = 1 << 5;
static const int CPU_FEATURE_BIT_BMI2 = 1 << 8;
static const int CPU_FEATURE_BIT_AVX512F = 1 << 16; // AVX512F (foundation)
static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17; // AVX512DQ (doubleword and quadword instructions)
static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26; // AVX512PF (prefetch gather/scatter instructions)
static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27; // AVX512ER (exponential and reciprocal instructions)
static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28; // AVX512CD (conflict detection instructions)
static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions)
static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions)
static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions)
/* cpuid[eax=7,ecx=0].ecx */
static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions)
#endif
#if defined(__X86_ASM__)
__noinline int64_t get_xcr0()
{
#if defined (__WIN32__) && !defined (__MINGW32__) && defined(_XCR_XFEATURE_ENABLED_MASK)
int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32
xcr0 = _xgetbv(0);
return xcr0;
#else
int xcr0 = 0;
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
return xcr0;
#endif
}
#endif
int getCPUFeatures()
{
#if defined(__X86_ASM__)
/* cache CPU features access */
static int cpu_features = 0;
if (cpu_features)
return cpu_features;
/* get number of CPUID leaves */
int cpuid_leaf0[4];
__cpuid(cpuid_leaf0, 0x00000000);
unsigned nIds = cpuid_leaf0[EAX];
/* get number of extended CPUID leaves */
int cpuid_leafe[4];
__cpuid(cpuid_leafe, 0x80000000);
unsigned nExIds = cpuid_leafe[EAX];
/* get CPUID leaves for EAX = 1,7, and 0x80000001 */
int cpuid_leaf_1[4] = { 0,0,0,0 };
int cpuid_leaf_7[4] = { 0,0,0,0 };
int cpuid_leaf_e1[4] = { 0,0,0,0 };
if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001);
#if _WIN32
#if _MSC_VER && (_MSC_FULL_VER < 160040219)
#elif defined(_MSC_VER)
if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0);
#endif
#else
if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0);
#endif
if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001);
/* detect if OS saves XMM, YMM, and ZMM states */
bool xmm_enabled = true;
bool ymm_enabled = false;
bool zmm_enabled = false;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) {
int64_t xcr0 = get_xcr0();
xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */
ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */
zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */
}
if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED;
if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED;
if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED;
if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE;
if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3 ) cpu_features |= CPU_FEATURE_FMA3;
if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT;
if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1;
if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL;
if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
#if defined(__MACOSX__)
if ( (cpu_features & CPU_FEATURE_AVX512F)
|| (cpu_features & CPU_FEATURE_AVX512DQ)
|| (cpu_features & CPU_FEATURE_AVX512CD)
|| (cpu_features & CPU_FEATURE_AVX512BW)
|| (cpu_features & CPU_FEATURE_AVX512VL) )
{
// on macOS AVX512 will be enabled automatically by the kernel when the first AVX512 instruction is called
// see https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176
// therefore we ignore the state of XCR0
cpu_features |= CPU_FEATURE_ZMM_ENABLED;
}
#endif
return cpu_features;
#elif defined(__ARM_NEON) || defined(__EMSCRIPTEN__)
int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2;
cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42;
cpu_features |= CPU_FEATURE_XMM_ENABLED;
cpu_features |= CPU_FEATURE_YMM_ENABLED;
cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C;
cpu_features |= CPU_FEATURE_POPCNT;
cpu_features |= CPU_FEATURE_AVX;
cpu_features |= CPU_FEATURE_AVX2;
cpu_features |= CPU_FEATURE_FMA3;
cpu_features |= CPU_FEATURE_LZCNT;
cpu_features |= CPU_FEATURE_BMI1;
cpu_features |= CPU_FEATURE_BMI2;
cpu_features |= CPU_FEATURE_NEON_2X;
return cpu_features;
#else
/* Unknown CPU. */
return 0;
#endif
}
std::string stringOfCPUFeatures(int features)
{
std::string str;
if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM ";
if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM ";
if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM ";
if (features & CPU_FEATURE_SSE ) str += "SSE ";
if (features & CPU_FEATURE_SSE2 ) str += "SSE2 ";
if (features & CPU_FEATURE_SSE3 ) str += "SSE3 ";
if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 ";
if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 ";
if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 ";
if (features & CPU_FEATURE_POPCNT) str += "POPCNT ";
if (features & CPU_FEATURE_AVX ) str += "AVX ";
if (features & CPU_FEATURE_F16C ) str += "F16C ";
if (features & CPU_FEATURE_RDRAND) str += "RDRAND ";
if (features & CPU_FEATURE_AVX2 ) str += "AVX2 ";
if (features & CPU_FEATURE_FMA3 ) str += "FMA3 ";
if (features & CPU_FEATURE_LZCNT ) str += "LZCNT ";
if (features & CPU_FEATURE_BMI1 ) str += "BMI1 ";
if (features & CPU_FEATURE_BMI2 ) str += "BMI2 ";
if (features & CPU_FEATURE_AVX512F) str += "AVX512F ";
if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ ";
if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF ";
if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER ";
if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD ";
if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW ";
if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL ";
if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA ";
if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI ";
if (features & CPU_FEATURE_NEON) str += "NEON ";
if (features & CPU_FEATURE_NEON_2X) str += "2xNEON ";
return str;
}
std::string stringOfISA (int isa)
{
if (isa == SSE) return "SSE";
if (isa == SSE2) return "SSE2";
if (isa == SSE3) return "SSE3";
if (isa == SSSE3) return "SSSE3";
if (isa == SSE41) return "SSE4.1";
if (isa == SSE42) return "SSE4.2";
if (isa == AVX) return "AVX";
if (isa == AVX2) return "AVX2";
if (isa == AVX512) return "AVX512";
if (isa == NEON) return "NEON";
if (isa == NEON_2X) return "2xNEON";
return "UNKNOWN";
}
bool hasISA(int features, int isa) {
return (features & isa) == isa;
}
std::string supportedTargetList (int features)
{
std::string v;
if (hasISA(features,SSE)) v += "SSE ";
if (hasISA(features,SSE2)) v += "SSE2 ";
if (hasISA(features,SSE3)) v += "SSE3 ";
if (hasISA(features,SSSE3)) v += "SSSE3 ";
if (hasISA(features,SSE41)) v += "SSE4.1 ";
if (hasISA(features,SSE42)) v += "SSE4.2 ";
if (hasISA(features,AVX)) v += "AVX ";
if (hasISA(features,AVXI)) v += "AVXI ";
if (hasISA(features,AVX2)) v += "AVX2 ";
if (hasISA(features,AVX512)) v += "AVX512 ";
if (hasISA(features,NEON)) v += "NEON ";
if (hasISA(features,NEON_2X)) v += "2xNEON ";
return v;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Windows Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__WIN32__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <psapi.h>
namespace embree
{
std::string getExecutableFileName() {
char filename[1024];
if (!GetModuleFileName(nullptr, filename, sizeof(filename)))
return std::string();
return std::string(filename);
}
unsigned int getNumberOfLogicalThreads()
{
static int nThreads = -1;
if (nThreads != -1) return nThreads;
typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
HMODULE hlib = LoadLibrary("Kernel32");
GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc) GetProcAddress(hlib, "GetActiveProcessorCount");
if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount)
{
int groups = pGetActiveProcessorGroupCount();
int totalProcessors = 0;
for (int i = 0; i < groups; i++)
totalProcessors += pGetActiveProcessorCount(i);
nThreads = totalProcessors;
}
else
{
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
nThreads = sysinfo.dwNumberOfProcessors;
}
assert(nThreads);
return nThreads;
}
int getTerminalWidth()
{
HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE);
if (handle == INVALID_HANDLE_VALUE) return 80;
CONSOLE_SCREEN_BUFFER_INFO info;
memset(&info,0,sizeof(info));
GetConsoleScreenBufferInfo(handle, &info);
return info.dwSize.X;
}
double getSeconds()
{
LARGE_INTEGER freq, val;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&val);
return (double)val.QuadPart / (double)freq.QuadPart;
}
void sleepSeconds(double t) {
Sleep(DWORD(1000.0*t));
}
size_t getVirtualMemoryBytes()
{
PROCESS_MEMORY_COUNTERS info;
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
return (size_t)info.QuotaPeakPagedPoolUsage;
}
size_t getResidentMemoryBytes()
{
PROCESS_MEMORY_COUNTERS info;
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
return (size_t)info.WorkingSetSize;
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Linux Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__LINUX__)
#include <stdio.h>
#include <unistd.h>
namespace embree
{
std::string getExecutableFileName()
{
std::string pid = "/proc/" + toString(getpid()) + "/exe";
char buf[4096];
memset(buf,0,sizeof(buf));
if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1)
return std::string();
return std::string(buf);
}
size_t getVirtualMemoryBytes()
{
size_t virt, resident, shared;
std::ifstream buffer("/proc/self/statm");
buffer >> virt >> resident >> shared;
return virt*sysconf(_SC_PAGE_SIZE);
}
size_t getResidentMemoryBytes()
{
size_t virt, resident, shared;
std::ifstream buffer("/proc/self/statm");
buffer >> virt >> resident >> shared;
return resident*sysconf(_SC_PAGE_SIZE);
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// FreeBSD Platform
////////////////////////////////////////////////////////////////////////////////
#if defined (__FreeBSD__)
#include <sys/sysctl.h>
namespace embree
{
std::string getExecutableFileName()
{
const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
char buf[4096];
memset(buf,0,sizeof(buf));
size_t len = sizeof(buf)-1;
if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1)
return std::string();
return std::string(buf);
}
size_t getVirtualMemoryBytes() {
return 0;
}
size_t getResidentMemoryBytes() {
return 0;
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Mac OS X Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__MACOSX__)
#include <mach-o/dyld.h>
namespace embree
{
std::string getExecutableFileName()
{
char buf[4096];
uint32_t size = sizeof(buf);
if (_NSGetExecutablePath(buf, &size) != 0)
return std::string();
return std::string(buf);
}
size_t getVirtualMemoryBytes() {
return 0;
}
size_t getResidentMemoryBytes() {
return 0;
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Unix Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__UNIX__)
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <pthread.h>
#if defined(__EMSCRIPTEN__)
#include <emscripten.h>
extern "C" {
extern int godot_js_os_hw_concurrency_get();
}
#endif
namespace embree
{
unsigned int getNumberOfLogicalThreads()
{
static int nThreads = -1;
if (nThreads != -1) return nThreads;
#if defined(__MACOSX__) || defined(__ANDROID__)
nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
assert(nThreads);
#elif defined(__EMSCRIPTEN__)
nThreads = godot_js_os_hw_concurrency_get();
#if 0
// WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
// threads from the browser or Node.js using JavaScript.
nThreads = MAIN_THREAD_EM_ASM_INT({
const isBrowser = typeof window !== 'undefined';
const isNode = typeof process !== 'undefined' && process.versions != null &&
process.versions.node != null;
if (isBrowser) {
// Return 1 if the browser does not expose hardwareConcurrency.
return window.navigator.hardwareConcurrency || 1;
} else if (isNode) {
return require('os').cpus().length;
} else {
return 1;
}
});
#endif
#else
cpu_set_t set;
if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
nThreads = CPU_COUNT(&set);
#endif
assert(nThreads);
return nThreads;
}
int getTerminalWidth()
{
struct winsize info;
if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80;
return info.ws_col;
}
double getSeconds() {
struct timeval tp; gettimeofday(&tp,nullptr);
return double(tp.tv_sec) + double(tp.tv_usec)/1E6;
}
void sleepSeconds(double t) {
usleep(1000000.0*t);
}
}
#endif
#if defined(__INTEL_LLVM_COMPILER)
#pragma clang diagnostic pop
#endif

187
thirdparty/embree/common/sys/sysinfo.h vendored Normal file
View File

@@ -0,0 +1,187 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#define CACHELINE_SIZE 64
#if !defined(PAGE_SIZE)
#define PAGE_SIZE 4096
#endif
#define PAGE_SIZE_2M (2*1024*1024)
#define PAGE_SIZE_4K (4*1024)
#include "platform.h"
/* define isa namespace and ISA bitvector */
#if defined (__AVX512VL__)
# define isa avx512
# define ISA AVX512
# define ISA_STR "AVX512"
#elif defined (__AVX2__)
# define isa avx2
# define ISA AVX2
# define ISA_STR "AVX2"
#elif defined(__AVXI__)
# define isa avxi
# define ISA AVXI
# define ISA_STR "AVXI"
#elif defined(__AVX__)
# define isa avx
# define ISA AVX
# define ISA_STR "AVX"
#elif defined (__SSE4_2__)
# define isa sse42
# define ISA SSE42
# define ISA_STR "SSE4.2"
//#elif defined (__SSE4_1__) // we demote this to SSE2, MacOSX code compiles with SSE41 by default with XCode 11
//# define isa sse41
//# define ISA SSE41
//# define ISA_STR "SSE4.1"
//#elif defined(__SSSE3__) // we demote this to SSE2, MacOSX code compiles with SSSE3 by default with ICC
//# define isa ssse3
//# define ISA SSSE3
//# define ISA_STR "SSSE3"
//#elif defined(__SSE3__) // we demote this to SSE2, MacOSX code compiles with SSE3 by default with clang
//# define isa sse3
//# define ISA SSE3
//# define ISA_STR "SSE3"
#elif defined(__SSE2__) || defined(__SSE3__) || defined(__SSSE3__)
# define isa sse2
# define ISA SSE2
# define ISA_STR "SSE2"
#elif defined(__SSE__)
# define isa sse
# define ISA SSE
# define ISA_STR "SSE"
#elif defined(__ARM_NEON)
// NOTE(LTE): Use sse2 for `isa` for the compatibility at the moment.
#define isa sse2
#define ISA NEON
#define ISA_STR "NEON"
#else
#error Unknown ISA
#endif
namespace embree
{
enum class CPU
{
XEON_ICE_LAKE,
CORE_ICE_LAKE,
CORE_TIGER_LAKE,
CORE_COMET_LAKE,
CORE_CANNON_LAKE,
CORE_KABY_LAKE,
XEON_SKY_LAKE,
CORE_SKY_LAKE,
XEON_PHI_KNIGHTS_MILL,
XEON_PHI_KNIGHTS_LANDING,
XEON_BROADWELL,
CORE_BROADWELL,
XEON_HASWELL,
CORE_HASWELL,
XEON_IVY_BRIDGE,
CORE_IVY_BRIDGE,
SANDY_BRIDGE,
NEHALEM,
CORE2,
CORE1,
ARM,
UNKNOWN,
};
/*! get the full path to the running executable */
std::string getExecutableFileName();
/*! return platform name */
std::string getPlatformName();
/*! get the full name of the compiler */
std::string getCompilerName();
/*! return the name of the CPU */
std::string getCPUVendor();
/*! get microprocessor model */
CPU getCPUModel();
/*! converts CPU model into string */
std::string stringOfCPUModel(CPU model);
/*! CPU features */
static const int CPU_FEATURE_SSE = 1 << 0;
static const int CPU_FEATURE_SSE2 = 1 << 1;
static const int CPU_FEATURE_SSE3 = 1 << 2;
static const int CPU_FEATURE_SSSE3 = 1 << 3;
static const int CPU_FEATURE_SSE41 = 1 << 4;
static const int CPU_FEATURE_SSE42 = 1 << 5;
static const int CPU_FEATURE_POPCNT = 1 << 6;
static const int CPU_FEATURE_AVX = 1 << 7;
static const int CPU_FEATURE_F16C = 1 << 8;
static const int CPU_FEATURE_RDRAND = 1 << 9;
static const int CPU_FEATURE_AVX2 = 1 << 10;
static const int CPU_FEATURE_FMA3 = 1 << 11;
static const int CPU_FEATURE_LZCNT = 1 << 12;
static const int CPU_FEATURE_BMI1 = 1 << 13;
static const int CPU_FEATURE_BMI2 = 1 << 14;
static const int CPU_FEATURE_AVX512F = 1 << 16;
static const int CPU_FEATURE_AVX512DQ = 1 << 17;
static const int CPU_FEATURE_AVX512PF = 1 << 18;
static const int CPU_FEATURE_AVX512ER = 1 << 19;
static const int CPU_FEATURE_AVX512CD = 1 << 20;
static const int CPU_FEATURE_AVX512BW = 1 << 21;
static const int CPU_FEATURE_AVX512VL = 1 << 22;
static const int CPU_FEATURE_AVX512IFMA = 1 << 23;
static const int CPU_FEATURE_AVX512VBMI = 1 << 24;
static const int CPU_FEATURE_XMM_ENABLED = 1 << 25;
static const int CPU_FEATURE_YMM_ENABLED = 1 << 26;
static const int CPU_FEATURE_ZMM_ENABLED = 1 << 27;
static const int CPU_FEATURE_NEON = 1 << 28;
static const int CPU_FEATURE_NEON_2X = 1 << 29;
/*! get CPU features */
int getCPUFeatures();
/*! convert CPU features into a string */
std::string stringOfCPUFeatures(int features);
/*! creates a string of all supported targets that are supported */
std::string supportedTargetList (int isa);
/*! ISAs */
static const int SSE = CPU_FEATURE_SSE | CPU_FEATURE_XMM_ENABLED;
static const int SSE2 = SSE | CPU_FEATURE_SSE2;
static const int SSE3 = SSE2 | CPU_FEATURE_SSE3;
static const int SSSE3 = SSE3 | CPU_FEATURE_SSSE3;
static const int SSE41 = SSSE3 | CPU_FEATURE_SSE41;
static const int SSE42 = SSE41 | CPU_FEATURE_SSE42 | CPU_FEATURE_POPCNT;
static const int AVX = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED;
static const int AVXI = AVX | CPU_FEATURE_F16C;
static const int AVX2 = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT;
static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED;
static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2;
static const int NEON_2X = CPU_FEATURE_NEON_2X | AVX2;
/*! converts ISA bitvector into a string */
std::string stringOfISA(int features);
/*! return the number of logical threads of the system */
unsigned int getNumberOfLogicalThreads();
/*! returns the size of the terminal window in characters */
int getTerminalWidth();
/*! returns performance counter in seconds */
double getSeconds();
/*! sleeps the specified number of seconds */
void sleepSeconds(double t);
/*! returns virtual address space occupied by process */
size_t getVirtualMemoryBytes();
/*! returns resident memory required by process */
size_t getResidentMemoryBytes();
}

485
thirdparty/embree/common/sys/thread.cpp vendored Normal file
View File

@@ -0,0 +1,485 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "thread.h"
#include "sysinfo.h"
#include "estring.h"
#include <iostream>
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
#include <xmmintrin.h>
#if defined(__EMSCRIPTEN__)
#include "../simd/wasm/emulation.h"
#endif
#endif
#if defined(PTHREADS_WIN32)
#pragma comment (lib, "pthreadVC.lib")
#endif
////////////////////////////////////////////////////////////////////////////////
/// Windows Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__WIN32__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace embree
{
/*! set the affinity of a given thread */
void setAffinity(HANDLE thread, ssize_t affinity)
{
typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
HMODULE hlib = LoadLibrary("Kernel32");
GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
{
int groups = pGetActiveProcessorGroupCount();
int totalProcessors = 0, group = 0, number = 0;
for (int i = 0; i<groups; i++) {
int processors = pGetActiveProcessorCount(i);
if (totalProcessors + processors > affinity) {
group = i;
number = (int)affinity - totalProcessors;
break;
}
totalProcessors += processors;
}
GROUP_AFFINITY groupAffinity;
groupAffinity.Group = (WORD)group;
groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
groupAffinity.Reserved[0] = 0;
groupAffinity.Reserved[1] = 0;
groupAffinity.Reserved[2] = 0;
if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
PROCESSOR_NUMBER processorNumber;
processorNumber.Group = group;
processorNumber.Number = number;
processorNumber.Reserved = 0;
if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
}
else
{
if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
}
}
/*! set affinity of the calling thread */
void setAffinity(ssize_t affinity) {
setAffinity(GetCurrentThread(), affinity);
}
struct ThreadStartupData
{
public:
ThreadStartupData (thread_func f, void* arg)
: f(f), arg(arg) {}
public:
thread_func f;
void* arg;
};
DWORD WINAPI threadStartup(LPVOID ptr)
{
ThreadStartupData* parg = (ThreadStartupData*) ptr;
_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
parg->f(parg->arg);
delete parg;
return 0;
}
#if !defined(PTHREADS_WIN32)
/*! creates a hardware thread running on specific core */
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
{
HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
if (thread == nullptr) FATAL("CreateThread failed");
if (threadID >= 0) setAffinity(thread, threadID);
return thread_t(thread);
}
/*! the thread calling this function gets yielded */
void yield() {
SwitchToThread();
}
/*! waits until the given thread has terminated */
void join(thread_t tid) {
WaitForSingleObject(HANDLE(tid), INFINITE);
CloseHandle(HANDLE(tid));
}
/*! destroy a hardware thread by its handle */
void destroyThread(thread_t tid) {
TerminateThread(HANDLE(tid),0);
CloseHandle(HANDLE(tid));
}
/*! creates thread local storage */
tls_t createTls() {
return tls_t(size_t(TlsAlloc()));
}
/*! set the thread local storage pointer */
void setTls(tls_t tls, void* const ptr) {
TlsSetValue(DWORD(size_t(tls)), ptr);
}
/*! return the thread local storage pointer */
void* getTls(tls_t tls) {
return TlsGetValue(DWORD(size_t(tls)));
}
/*! destroys thread local storage identifier */
void destroyTls(tls_t tls) {
TlsFree(DWORD(size_t(tls)));
}
#endif
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Linux Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__LINUX__) && !defined(__ANDROID__)
#include <fstream>
#include <sstream>
#include <algorithm>
namespace embree
{
static MutexSys mutex;
static std::vector<size_t> threadIDs;
/* changes thread ID mapping such that we first fill up all thread on one core */
size_t mapThreadID(size_t threadID)
{
Lock<MutexSys> lock(mutex);
if (threadIDs.size() == 0)
{
/* parse thread/CPU topology */
for (size_t cpuID=0;;cpuID++)
{
std::fstream fs;
std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
fs.open (cpu.c_str(), std::fstream::in);
if (fs.fail()) break;
int i;
while (fs >> i)
{
if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
threadIDs.push_back(i);
if (fs.peek() == ',')
fs.ignore();
}
fs.close();
}
#if 0
for (size_t i=0;i<threadIDs.size();i++)
std::cout << i << " -> " << threadIDs[i] << std::endl;
#endif
/* verify the mapping and do not use it if the mapping has errors */
for (size_t i=0;i<threadIDs.size();i++) {
for (size_t j=0;j<threadIDs.size();j++) {
if (i != j && threadIDs[i] == threadIDs[j]) {
threadIDs.clear();
}
}
}
}
/* re-map threadIDs if mapping is available */
size_t ID = threadID;
if (threadID < threadIDs.size())
ID = threadIDs[threadID];
/* find correct thread to affinitize to */
cpu_set_t set;
CPU_ZERO(&set);
if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
{
for (int i=0, j=0; i<CPU_SETSIZE; i++)
{
if (!CPU_ISSET(i,&set)) continue;
if (j == ID) {
ID = i;
break;
}
j++;
}
}
return ID;
}
/*! set affinity of the calling thread */
void setAffinity(ssize_t affinity)
{
cpu_set_t cset;
CPU_ZERO(&cset);
//size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled
size_t threadID = affinity;
CPU_SET(threadID, &cset);
pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Android Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__ANDROID__)
namespace embree
{
/*! set affinity of the calling thread */
void setAffinity(ssize_t affinity)
{
cpu_set_t cset;
CPU_ZERO(&cset);
CPU_SET(affinity, &cset);
sched_setaffinity(0, sizeof(cset), &cset);
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// FreeBSD Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__FreeBSD__)
#include <pthread_np.h>
namespace embree
{
/*! set affinity of the calling thread */
void setAffinity(ssize_t affinity)
{
cpuset_t cset;
CPU_ZERO(&cset);
CPU_SET(affinity, &cset);
pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// WebAssembly Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__EMSCRIPTEN__)
namespace embree
{
/*! set affinity of the calling thread */
void setAffinity(ssize_t affinity)
{
// Setting thread affinity is not supported in WASM.
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// MacOSX Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__MACOSX__)
#include <mach/thread_act.h>
#include <mach/thread_policy.h>
#include <mach/mach_init.h>
namespace embree
{
/*! set affinity of the calling thread */
void setAffinity(ssize_t affinity)
{
#if !defined(__ARM_NEON) // affinity seems not supported on M1 chip
thread_affinity_policy ap;
ap.affinity_tag = affinity;
if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
WARNING("setting thread affinity failed"); // on purpose only a warning
#endif
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Unix Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
#include <pthread.h>
#include <sched.h>
#if defined(__USE_NUMA__)
#include <numa.h>
#endif
namespace embree
{
struct ThreadStartupData
{
public:
ThreadStartupData (thread_func f, void* arg, int affinity)
: f(f), arg(arg), affinity(affinity) {}
public:
thread_func f;
void* arg;
ssize_t affinity;
};
static void* threadStartup(ThreadStartupData* parg)
{
_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
/*! Mac OS X does not support setting affinity at thread creation time */
#if defined(__MACOSX__)
if (parg->affinity >= 0)
setAffinity(parg->affinity);
#endif
parg->f(parg->arg);
delete parg;
return nullptr;
}
/*! creates a hardware thread running on specific core */
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
{
/* set stack size */
pthread_attr_t attr;
pthread_attr_init(&attr);
if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
/* create thread */
pthread_t* tid = new pthread_t;
if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
pthread_attr_destroy(&attr);
delete tid;
FATAL("pthread_create failed");
}
pthread_attr_destroy(&attr);
/* set affinity */
#if defined(__LINUX__) && !defined(__ANDROID__)
if (threadID >= 0) {
cpu_set_t cset;
CPU_ZERO(&cset);
threadID = mapThreadID(threadID);
CPU_SET(threadID, &cset);
pthread_setaffinity_np(*tid, sizeof(cset), &cset);
}
#elif defined(__FreeBSD__)
if (threadID >= 0) {
cpuset_t cset;
CPU_ZERO(&cset);
CPU_SET(threadID, &cset);
pthread_setaffinity_np(*tid, sizeof(cset), &cset);
}
#elif defined(__ANDROID__)
if (threadID >= 0) {
cpu_set_t cset;
CPU_ZERO(&cset);
CPU_SET(threadID, &cset);
sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
}
#endif
return thread_t(tid);
}
/*! the thread calling this function gets yielded */
void yield() {
sched_yield();
}
/*! waits until the given thread has terminated */
void join(thread_t tid) {
if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
FATAL("pthread_join failed");
delete (pthread_t*)tid;
}
/*! destroy a hardware thread by its handle */
void destroyThread(thread_t tid) {
#if defined(__ANDROID__)
FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.
#else
pthread_cancel(*(pthread_t*)tid);
delete (pthread_t*)tid;
#endif
}
/*! creates thread local storage */
tls_t createTls()
{
pthread_key_t* key = new pthread_key_t;
if (pthread_key_create(key,nullptr) != 0) {
delete key;
FATAL("pthread_key_create failed");
}
return tls_t(key);
}
/*! return the thread local storage pointer */
void* getTls(tls_t tls)
{
assert(tls);
return pthread_getspecific(*(pthread_key_t*)tls);
}
/*! set the thread local storage pointer */
void setTls(tls_t tls, void* const ptr)
{
assert(tls);
if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
FATAL("pthread_setspecific failed");
}
/*! destroys thread local storage identifier */
void destroyTls(tls_t tls)
{
assert(tls);
if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
FATAL("pthread_key_delete failed");
delete (pthread_key_t*)tls;
}
}
#endif

49
thirdparty/embree/common/sys/thread.h vendored Normal file
View File

@@ -0,0 +1,49 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
#include "mutex.h"
#include "alloc.h"
#include "vector.h"
#include <vector>
namespace embree
{
/*! type for thread */
typedef struct opaque_thread_t* thread_t;
/*! signature of thread start function */
typedef void (*thread_func)(void*);
/*! creates a hardware thread running on specific logical thread */
thread_t createThread(thread_func f, void* arg, size_t stack_size = 0, ssize_t threadID = -1);
/*! set affinity of the calling thread */
void setAffinity(ssize_t affinity);
/*! the thread calling this function gets yielded */
void yield();
/*! waits until the given thread has terminated */
void join(thread_t tid);
/*! destroy handle of a thread */
void destroyThread(thread_t tid);
/*! type for handle to thread local storage */
typedef struct opaque_tls_t* tls_t;
/*! creates thread local storage */
tls_t createTls();
/*! set the thread local storage pointer */
void setTls(tls_t tls, void* const ptr);
/*! return the thread local storage pointer */
void* getTls(tls_t tls);
/*! destroys thread local storage identifier */
void destroyTls(tls_t tls);
}

266
thirdparty/embree/common/sys/vector.h vendored Normal file
View File

@@ -0,0 +1,266 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "alloc.h"
#include <algorithm>
namespace embree
{
class Device;
template<typename T, typename allocator>
class vector_t
{
public:
typedef T value_type;
typedef T* iterator;
typedef const T* const_iterator;
__forceinline vector_t ()
: size_active(0), size_alloced(0), items(nullptr) {}
__forceinline explicit vector_t (size_t sz)
: size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); }
template<typename M>
__forceinline explicit vector_t (M alloc, size_t sz)
: alloc(alloc), size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); }
__forceinline vector_t (Device* alloc)
: vector_t(alloc,0) {}
__forceinline vector_t(void* data, size_t bytes)
: size_active(0), size_alloced(bytes/sizeof(T)), items((T*)data) {}
__forceinline ~vector_t() {
clear();
}
__forceinline vector_t (const vector_t& other)
{
size_active = other.size_active;
size_alloced = other.size_alloced;
items = alloc.allocate(size_alloced);
for (size_t i=0; i<size_active; i++)
::new (&items[i]) value_type(other.items[i]);
}
__forceinline vector_t (vector_t&& other)
: alloc(std::move(other.alloc))
{
size_active = other.size_active; other.size_active = 0;
size_alloced = other.size_alloced; other.size_alloced = 0;
items = other.items; other.items = nullptr;
}
__forceinline vector_t& operator=(const vector_t& other)
{
resize(other.size_active);
for (size_t i=0; i<size_active; i++)
items[i] = value_type(other.items[i]);
return *this;
}
__forceinline vector_t& operator=(vector_t&& other)
{
clear();
alloc = std::move(other.alloc);
size_active = other.size_active; other.size_active = 0;
size_alloced = other.size_alloced; other.size_alloced = 0;
items = other.items; other.items = nullptr;
return *this;
}
__forceinline allocator& getAlloc() {
return alloc;
}
/********************** Iterators ****************************/
__forceinline iterator begin() { return items; };
__forceinline const_iterator begin() const { return items; };
__forceinline iterator end () { return items+size_active; };
__forceinline const_iterator end () const { return items+size_active; };
/********************** Capacity ****************************/
__forceinline bool empty () const { return size_active == 0; }
__forceinline size_t size () const { return size_active; }
__forceinline size_t capacity () const { return size_alloced; }
__forceinline void resize(size_t new_size) {
internal_resize(new_size,internal_grow_size(new_size));
}
__forceinline void reserve(size_t new_alloced)
{
/* do nothing if container already large enough */
if (new_alloced <= size_alloced)
return;
/* resize exact otherwise */
internal_resize(size_active,new_alloced);
}
__forceinline void shrink_to_fit() {
internal_resize(size_active,size_active);
}
/******************** Element access **************************/
__forceinline T& operator[](size_t i) { assert(i < size_active); return items[i]; }
__forceinline const T& operator[](size_t i) const { assert(i < size_active); return items[i]; }
__forceinline T& at(size_t i) { assert(i < size_active); return items[i]; }
__forceinline const T& at(size_t i) const { assert(i < size_active); return items[i]; }
__forceinline T& front() const { assert(size_active > 0); return items[0]; };
__forceinline T& back () const { assert(size_active > 0); return items[size_active-1]; };
__forceinline T* data() { return items; };
__forceinline const T* data() const { return items; };
/* dangerous only use if you know what you're doing */
__forceinline void setDataPtr(T* data) { items = data; }
/******************** Modifiers **************************/
__forceinline void push_back(const T& nt)
{
const T v = nt; // need local copy as input reference could point to this vector
internal_resize(size_active,internal_grow_size(size_active+1));
::new (&items[size_active++]) T(v);
}
__forceinline void pop_back()
{
assert(!empty());
size_active--;
items[size_active].~T();
}
__forceinline void clear()
{
/* destroy elements */
for (size_t i=0; i<size_active; i++){
items[i].~T();
}
/* free memory */
alloc.deallocate(items,size_alloced);
items = nullptr;
size_active = size_alloced = 0;
}
/******************** Comparisons **************************/
friend bool operator== (const vector_t& a, const vector_t& b)
{
if (a.size() != b.size()) return false;
for (size_t i=0; i<a.size(); i++)
if (a[i] != b[i])
return false;
return true;
}
friend bool operator!= (const vector_t& a, const vector_t& b) {
return !(a==b);
}
private:
__forceinline void internal_resize_init(size_t new_active)
{
assert(size_active == 0);
assert(size_alloced == 0);
assert(items == nullptr);
if (new_active == 0) return;
items = alloc.allocate(new_active);
for (size_t i=0; i<new_active; i++) ::new (&items[i]) T();
size_active = new_active;
size_alloced = new_active;
}
__forceinline void internal_resize(size_t new_active, size_t new_alloced)
{
assert(new_active <= new_alloced);
/* destroy elements */
if (new_active < size_active)
{
for (size_t i=new_active; i<size_active; i++){
items[i].~T();
}
size_active = new_active;
}
/* only reallocate if necessary */
if (new_alloced == size_alloced) {
for (size_t i=size_active; i<new_active; i++) ::new (&items[i]) T;
size_active = new_active;
return;
}
/* reallocate and copy items */
T* old_items = items;
items = alloc.allocate(new_alloced);
for (size_t i=0; i<size_active; i++) {
::new (&items[i]) T(std::move(old_items[i]));
old_items[i].~T();
}
for (size_t i=size_active; i<new_active; i++) {
::new (&items[i]) T;
}
alloc.deallocate(old_items,size_alloced);
size_active = new_active;
size_alloced = new_alloced;
}
__forceinline size_t internal_grow_size(size_t new_alloced)
{
/* do nothing if container already large enough */
if (new_alloced <= size_alloced)
return size_alloced;
/* if current size is 0 allocate exact requested size */
if (size_alloced == 0)
return new_alloced;
/* resize to next power of 2 otherwise */
size_t new_size_alloced = size_alloced;
while (new_size_alloced < new_alloced) {
new_size_alloced = std::max(size_t(1),2*new_size_alloced);
}
return new_size_alloced;
}
private:
allocator alloc;
size_t size_active; // number of valid items
size_t size_alloced; // number of items allocated
T* items; // data array
};
/*! vector class that performs standard allocations */
template<typename T>
using vector = vector_t<T,std::allocator<T>>;
/*! vector class that performs aligned allocations */
template<typename T>
using avector = vector_t<T,aligned_allocator<T,std::alignment_of<T>::value> >;
/*! vector class that performs OS allocations */
template<typename T>
using ovector = vector_t<T,os_allocator<T> >;
/*! vector class with externally managed data buffer */
template<typename T>
using evector = vector_t<T,no_allocator<T>>;
}